Merge branches 'acpi-battery', 'acpi-video' and 'acpi-misc'
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 30 May 2022 16:07:05 +0000 (18:07 +0200)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 30 May 2022 16:07:05 +0000 (18:07 +0200)
Merge ACPI battery and backlight driver update and miscellaneous
cleanup for 5.19-rc1:

 - Make the ACPI battery driver show the "not-charging" status by
   default unless "charging" or "full" is directly indicated (Werner
   Sembach).

 - Improve the PM notifier in the ACPI backlight driver (Zhang Rui).

 - Clean up some white space in the ACPI code (Ian Cowan).

* acpi-battery:
  ACPI: battery: Make "not-charging" the default on no charging or full info

* acpi-video:
  ACPI: video: improve PM notifer callback

* acpi-misc:
  ACPI: clean up white space in a few places for consistency

2196 files changed:
.mailmap
Documentation/ABI/stable/sysfs-driver-mlxreg-io
Documentation/ABI/testing/ima_policy
Documentation/ABI/testing/securityfs-secrets-coco [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-regulator
Documentation/ABI/testing/sysfs-driver-xen-blkback
Documentation/ABI/testing/sysfs-driver-xen-blkfront
Documentation/ABI/testing/sysfs-platform-intel-ifs [new file with mode: 0644]
Documentation/RCU/Design/Data-Structures/Data-Structures.rst
Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
Documentation/RCU/Design/Requirements/Requirements.rst
Documentation/RCU/arrayRCU.rst
Documentation/RCU/checklist.rst
Documentation/RCU/rcu.rst
Documentation/RCU/rculist_nulls.rst
Documentation/RCU/stallwarn.rst
Documentation/RCU/whatisRCU.rst
Documentation/accounting/psi.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/sysctl/kernel.rst
Documentation/arm64/booting.rst
Documentation/arm64/elf_hwcaps.rst
Documentation/arm64/index.rst
Documentation/arm64/silicon-errata.rst
Documentation/arm64/sme.rst [new file with mode: 0644]
Documentation/arm64/sve.rst
Documentation/cdrom/cdrom-standard.rst
Documentation/core-api/timekeeping.rst
Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt [deleted file]
Documentation/devicetree/bindings/hwmon/adt7475.yaml
Documentation/devicetree/bindings/hwmon/lm75.yaml
Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/hwmon/national,lm90.yaml
Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt [deleted file]
Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/mmc-controller.yaml
Documentation/devicetree/bindings/mmc/mtk-sd.yaml
Documentation/devicetree/bindings/mmc/orion-sdio.txt [deleted file]
Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
Documentation/devicetree/bindings/mmc/sdhci-dove.txt [deleted file]
Documentation/devicetree/bindings/mmc/sdhci-msm.txt [deleted file]
Documentation/devicetree/bindings/mmc/sdhci-msm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
Documentation/devicetree/bindings/mtd/aspeed-smc.txt [deleted file]
Documentation/devicetree/bindings/mtd/elm.txt [deleted file]
Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml
Documentation/devicetree/bindings/mtd/renesas-nandc.yaml
Documentation/devicetree/bindings/mtd/ti,elm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/perf/arm,cmn.yaml
Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
Documentation/devicetree/bindings/regulator/richtek,rt4801-regulator.yaml
Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml
Documentation/devicetree/bindings/reserved-memory/phram.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/spi/ingenic,spi.yaml
Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
Documentation/devicetree/bindings/spi/renesas,rspi.yaml
Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml
Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/trivial-devices.yaml
Documentation/devicetree/bindings/vendor-prefixes.yaml
Documentation/dontdiff
Documentation/driver-api/gpio/driver.rst
Documentation/driver-api/libata.rst
Documentation/features/debug/debug-vm-pgtable/arch-support.txt
Documentation/features/time/context-tracking/arch-support.txt
Documentation/features/time/virt-cpuacct/arch-support.txt
Documentation/filesystems/fsverity.rst
Documentation/filesystems/proc.rst
Documentation/filesystems/zonefs.rst
Documentation/firmware-guide/acpi/enumeration.rst
Documentation/hwmon/aquacomputer_d5next.rst
Documentation/hwmon/asus_ec_sensors.rst
Documentation/hwmon/dell-smm-hwmon.rst
Documentation/hwmon/hwmon-kernel-api.rst
Documentation/hwmon/index.rst
Documentation/hwmon/lan966x.rst [new file with mode: 0644]
Documentation/hwmon/max16601.rst
Documentation/hwmon/xdpe152c4.rst [new file with mode: 0644]
Documentation/kbuild/reproducible-builds.rst
Documentation/power/energy-model.rst
Documentation/process/embargoed-hardware-issues.rst
Documentation/process/maintainer-tip.rst
Documentation/security/IMA-templates.rst
Documentation/security/index.rst
Documentation/security/keys/trusted-encrypted.rst
Documentation/security/landlock.rst
Documentation/security/secrets/coco.rst [new file with mode: 0644]
Documentation/security/secrets/index.rst [new file with mode: 0644]
Documentation/userspace-api/ioctl/cdrom.rst
Documentation/userspace-api/landlock.rst
Documentation/userspace-api/seccomp_filter.rst
Documentation/virt/coco/sev-guest.rst [new file with mode: 0644]
Documentation/virt/index.rst
Documentation/virt/kvm/api.rst
Documentation/x86/cpuinfo.rst
Documentation/x86/ifs.rst [new file with mode: 0644]
Documentation/x86/index.rst
Documentation/x86/tdx.rst [new file with mode: 0644]
Documentation/x86/x86_64/boot-options.rst
Documentation/x86/zero-page.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/timex.h
arch/arm/Kconfig
arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
arch/arm/boot/dts/aspeed-g6.dtsi
arch/arm/configs/lpc18xx_defconfig
arch/arm/configs/mps2_defconfig
arch/arm/configs/stm32_defconfig
arch/arm/configs/vf610m4_defconfig
arch/arm/include/asm/arch_gicv3.h
arch/arm/include/asm/assembler.h
arch/arm/include/asm/io.h
arch/arm/include/asm/module.h
arch/arm/include/asm/timex.h
arch/arm/include/asm/unwind.h
arch/arm/kernel/entry-armv.S
arch/arm/kernel/entry-common.S
arch/arm/kernel/entry-header.S
arch/arm/kernel/hw_breakpoint.c
arch/arm/kernel/module.c
arch/arm/kernel/signal.c
arch/arm/mach-sunxi/Kconfig
arch/arm/mm/ioremap.c
arch/arm/mm/proc-v7-bugs.c
arch/arm/vdso/Makefile
arch/arm64/Kconfig
arch/arm64/Kconfig.platforms
arch/arm64/boot/dts/qcom/sm8250-mtp.dts
arch/arm64/boot/dts/qcom/sm8250.dtsi
arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
arch/arm64/include/asm/Kbuild
arch/arm64/include/asm/arch_gicv3.h
arch/arm64/include/asm/archrandom.h
arch/arm64/include/asm/asm-bug.h
arch/arm64/include/asm/compiler.h
arch/arm64/include/asm/cpu.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/debug-monitors.h
arch/arm64/include/asm/el2_setup.h
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/exception.h
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/fpsimdmacros.h
arch/arm64/include/asm/ftrace.h
arch/arm64/include/asm/hugetlb.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/io.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_ras.h
arch/arm64/include/asm/mte.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/stacktrace.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/system_misc.h
arch/arm64/include/asm/thread_info.h
arch/arm64/include/asm/traps.h
arch/arm64/include/asm/uaccess.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/include/uapi/asm/ptrace.h
arch/arm64/include/uapi/asm/sigcontext.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/entry-common.c
arch/arm64/kernel/entry-fpsimd.S
arch/arm64/kernel/entry-ftrace.S
arch/arm64/kernel/entry.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/ftrace.c
arch/arm64/kernel/hw_breakpoint.c
arch/arm64/kernel/kgdb.c
arch/arm64/kernel/machine_kexec.c
arch/arm64/kernel/machine_kexec_file.c
arch/arm64/kernel/mte.c
arch/arm64/kernel/paravirt.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/kernel/probes/uprobes.c
arch/arm64/kernel/process.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/relocate_kernel.S
arch/arm64/kernel/setup.c
arch/arm64/kernel/signal.c
arch/arm64/kernel/signal32.c
arch/arm64/kernel/smp.c
arch/arm64/kernel/stacktrace.c
arch/arm64/kernel/sys_compat.c
arch/arm64/kernel/syscall.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/arm.c
arch/arm64/kvm/fpsimd.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/nvhe/sys_regs.c
arch/arm64/kvm/hyp/vgic-v3-sr.c
arch/arm64/kvm/hyp/vhe/switch.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/mte.S
arch/arm64/mm/copypage.c
arch/arm64/mm/fault.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/init.c
arch/arm64/mm/ioremap.c
arch/arm64/mm/trans_pgd.c
arch/arm64/tools/Makefile
arch/arm64/tools/cpucaps
arch/arm64/tools/gen-sysreg.awk [new file with mode: 0755]
arch/arm64/tools/sysreg [new file with mode: 0644]
arch/csky/Kbuild
arch/csky/Kconfig
arch/csky/Makefile
arch/csky/abiv1/Makefile
arch/csky/abiv1/memcpy.S [deleted file]
arch/csky/abiv1/strksyms.c [deleted file]
arch/csky/abiv2/Makefile
arch/csky/abiv2/strksyms.c
arch/csky/boot/Makefile
arch/csky/include/asm/atomic.h [new file with mode: 0644]
arch/csky/include/asm/barrier.h
arch/csky/include/asm/cmpxchg.h
arch/csky/include/asm/io.h
arch/csky/kernel/Makefile
arch/csky/kernel/io.c [new file with mode: 0644]
arch/csky/kernel/module.c
arch/csky/kernel/probes/kprobes.c
arch/csky/kernel/probes/uprobes.c
arch/csky/kernel/process.c
arch/csky/lib/Makefile
arch/csky/lib/string.c [new file with mode: 0644]
arch/csky/mm/dma-mapping.c
arch/ia64/include/asm/timex.h
arch/m68k/Kbuild
arch/m68k/Kconfig.cpu
arch/m68k/Kconfig.machine
arch/m68k/configs/amiga_defconfig
arch/m68k/configs/apollo_defconfig
arch/m68k/configs/atari_defconfig
arch/m68k/configs/bvme6000_defconfig
arch/m68k/configs/hp300_defconfig
arch/m68k/configs/mac_defconfig
arch/m68k/configs/multi_defconfig
arch/m68k/configs/mvme147_defconfig
arch/m68k/configs/mvme16x_defconfig
arch/m68k/configs/q40_defconfig
arch/m68k/configs/sun3_defconfig
arch/m68k/configs/sun3x_defconfig
arch/m68k/configs/virt_defconfig [new file with mode: 0644]
arch/m68k/include/asm/config.h
arch/m68k/include/asm/io.h
arch/m68k/include/asm/irq.h
arch/m68k/include/asm/pgtable_mm.h
arch/m68k/include/asm/raw_io.h
arch/m68k/include/asm/setup.h
arch/m68k/include/asm/timex.h
arch/m68k/include/asm/virt.h [new file with mode: 0644]
arch/m68k/include/uapi/asm/bootinfo-virt.h [new file with mode: 0644]
arch/m68k/include/uapi/asm/bootinfo.h
arch/m68k/kernel/Makefile
arch/m68k/kernel/entry.S
arch/m68k/kernel/head.S
arch/m68k/kernel/ptrace.c
arch/m68k/kernel/setup_mm.c
arch/m68k/kernel/signal.c
arch/m68k/math-emu/fp_arith.c
arch/m68k/mm/kmap.c
arch/m68k/virt/Makefile [new file with mode: 0644]
arch/m68k/virt/config.c [new file with mode: 0644]
arch/m68k/virt/ints.c [new file with mode: 0644]
arch/m68k/virt/platform.c [new file with mode: 0644]
arch/mips/include/asm/timex.h
arch/nios2/include/asm/timex.h
arch/openrisc/include/asm/timex.h
arch/openrisc/kernel/head.S
arch/parisc/include/asm/cacheflush.h
arch/parisc/include/asm/page.h
arch/parisc/include/asm/timex.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/patch.c
arch/parisc/mm/fault.c
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/timex.h
arch/powerpc/kernel/fadump.c
arch/powerpc/kvm/book3s_32_sr.S
arch/powerpc/platforms/powernv/opal-core.c
arch/riscv/Kconfig
arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
arch/riscv/boot/dts/sifive/fu540-c000.dtsi
arch/riscv/include/asm/bug.h
arch/riscv/include/asm/timex.h
arch/s390/Makefile
arch/s390/boot/.gitignore
arch/s390/boot/Makefile
arch/s390/boot/boot.h
arch/s390/boot/clz_ctz.c [moved from arch/s390/boot/compressed/clz_ctz.c with 100% similarity]
arch/s390/boot/compressed/Makefile [deleted file]
arch/s390/boot/decompressor.c [moved from arch/s390/boot/compressed/decompressor.c with 100% similarity]
arch/s390/boot/decompressor.h [moved from arch/s390/boot/compressed/decompressor.h with 100% similarity]
arch/s390/boot/head.S
arch/s390/boot/ipl_data.c [new file with mode: 0644]
arch/s390/boot/ipl_parm.c
arch/s390/boot/kaslr.c
arch/s390/boot/mem_detect.c
arch/s390/boot/startup.c
arch/s390/boot/vmlinux.lds.S [moved from arch/s390/boot/compressed/vmlinux.lds.S with 94% similarity]
arch/s390/crypto/des_s390.c
arch/s390/crypto/prng.c
arch/s390/hypfs/hypfs_vm.c
arch/s390/include/asm/alternative-asm.h
arch/s390/include/asm/alternative.h
arch/s390/include/asm/asm-extable.h
arch/s390/include/asm/barrier.h
arch/s390/include/asm/bug.h
arch/s390/include/asm/cio.h
arch/s390/include/asm/compat.h
arch/s390/include/asm/ctl_reg.h
arch/s390/include/asm/entry-common.h
arch/s390/include/asm/ipl.h
arch/s390/include/asm/lowcore.h
arch/s390/include/asm/nmi.h
arch/s390/include/asm/nospec-insn.h
arch/s390/include/asm/pai.h [new file with mode: 0644]
arch/s390/include/asm/pci_debug.h
arch/s390/include/asm/preempt.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/asm/sclp.h
arch/s390/include/asm/scsw.h
arch/s390/include/asm/spinlock.h
arch/s390/include/asm/stp.h
arch/s390/include/asm/timex.h
arch/s390/include/asm/vx-insn.h
arch/s390/include/uapi/asm/pkey.h
arch/s390/include/uapi/asm/zcrypt.h
arch/s390/kernel/Makefile
arch/s390/kernel/alternative.c
arch/s390/kernel/compat_linux.h
arch/s390/kernel/entry.S
arch/s390/kernel/irq.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/nmi.c
arch/s390/kernel/perf_cpum_cf_events.c
arch/s390/kernel/perf_pai_crypto.c [new file with mode: 0644]
arch/s390/kernel/relocate_kernel.S
arch/s390/kernel/setup.c
arch/s390/kernel/time.c
arch/s390/kernel/vdso.c
arch/s390/kvm/priv.c
arch/s390/lib/spinlock.c
arch/s390/mm/mmap.c
arch/s390/pci/pci.c
arch/s390/pci/pci_clp.c
arch/s390/pci/pci_debug.c
arch/s390/pci/pci_event.c
arch/s390/pci/pci_insn.c
arch/s390/purgatory/head.S
arch/sh/boards/board-sh7757lcr.c
arch/sh/boards/mach-ecovec24/setup.c
arch/sh/boot/romimage/mmcif-sh7724.c
arch/sh/configs/rsk7201_defconfig
arch/sh/configs/rsk7203_defconfig
arch/sh/configs/se7206_defconfig
arch/sparc/include/asm/timex_32.h
arch/sparc/kernel/signal32.c
arch/sparc/kernel/signal_64.c
arch/sparc/vdso/Makefile
arch/um/drivers/ubd_kern.c
arch/um/include/asm/timex.h
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/Makefile
arch/x86/boot/boot.h
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/acpi.c
arch/x86/boot/compressed/early_serial_console.c
arch/x86/boot/compressed/efi.c [new file with mode: 0644]
arch/x86/boot/compressed/efi.h [new file with mode: 0644]
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/ident_map_64.c
arch/x86/boot/compressed/idt_64.c
arch/x86/boot/compressed/kaslr.c
arch/x86/boot/compressed/mem_encrypt.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/misc.h
arch/x86/boot/compressed/pgtable.h
arch/x86/boot/compressed/pgtable_64.c
arch/x86/boot/compressed/sev.c
arch/x86/boot/compressed/tdcall.S [new file with mode: 0644]
arch/x86/boot/compressed/tdx.c [new file with mode: 0644]
arch/x86/boot/compressed/tdx.h [new file with mode: 0644]
arch/x86/boot/cpucheck.c
arch/x86/boot/cpuflags.c
arch/x86/boot/cpuflags.h
arch/x86/boot/header.S
arch/x86/boot/io.h [new file with mode: 0644]
arch/x86/boot/main.c
arch/x86/boot/msr.h [new file with mode: 0644]
arch/x86/coco/Makefile
arch/x86/coco/core.c
arch/x86/coco/tdx/Makefile [new file with mode: 0644]
arch/x86/coco/tdx/tdcall.S [new file with mode: 0644]
arch/x86/coco/tdx/tdx.c [new file with mode: 0644]
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vdso/Makefile
arch/x86/entry/vdso/vma.c
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/events/Kconfig
arch/x86/events/amd/Makefile
arch/x86/events/amd/brs.c [new file with mode: 0644]
arch/x86/events/amd/core.c
arch/x86/events/amd/ibs.c
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore_snb.c
arch/x86/events/msr.c
arch/x86/events/perf_event.h
arch/x86/ia32/Makefile
arch/x86/ia32/ia32_aout.c [deleted file]
arch/x86/include/asm/acenv.h
arch/x86/include/asm/amd-ibs.h
arch/x86/include/asm/amd_nb.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/apicdef.h
arch/x86/include/asm/bootparam_utils.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/cmpxchg_64.h
arch/x86/include/asm/cpu.h
arch/x86/include/asm/cpu_entry_area.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/cpuid.h [new file with mode: 0644]
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/entry-common.h
arch/x86/include/asm/fpu/api.h
arch/x86/include/asm/fpu/internal.h [deleted file]
arch/x86/include/asm/highmem.h
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/io.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/mmx.h [deleted file]
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/msr.h
arch/x86/include/asm/nmi.h
arch/x86/include/asm/page_64.h
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/pkeys.h
arch/x86/include/asm/proto.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/realmode.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/sev-common.h
arch/x86/include/asm/sev.h
arch/x86/include/asm/shared/io.h [new file with mode: 0644]
arch/x86/include/asm/shared/msr.h [new file with mode: 0644]
arch/x86/include/asm/shared/tdx.h [new file with mode: 0644]
arch/x86/include/asm/smap.h
arch/x86/include/asm/special_insns.h
arch/x86/include/asm/suspend_32.h
arch/x86/include/asm/suspend_64.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/tdx.h [new file with mode: 0644]
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/timex.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/tsc.h
arch/x86/include/uapi/asm/amd_hsmp.h
arch/x86/include/uapi/asm/bootparam.h
arch/x86/include/uapi/asm/svm.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/cppc.c
arch/x86/kernel/alternative.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/cpu/aperfmperf.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mce/amd.c
arch/x86/kernel/cpu/mce/apei.c
arch/x86/kernel/cpu/mce/core.c
arch/x86/kernel/cpu/mce/severity.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/cpu/resctrl/rdtgroup.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpu/sgx/encl.c
arch/x86/kernel/cpu/sgx/encl.h
arch/x86/kernel/cpu/sgx/main.c
arch/x86/kernel/crash.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/fpu/xstate.h
arch/x86/kernel/ftrace.c
arch/x86/kernel/head64.c
arch/x86/kernel/head_64.S
arch/x86/kernel/idt.c
arch/x86/kernel/nmi.c
arch/x86/kernel/probe_roms.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/setup.c
arch/x86/kernel/sev-shared.c
arch/x86/kernel/sev.c
arch/x86/kernel/signal.c
arch/x86/kernel/signal_compat.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/traps.c
arch/x86/kernel/vm86_32.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/lib/delay.c
arch/x86/lib/insn-eval.c
arch/x86/lib/kaslr.c
arch/x86/lib/mmx_32.c [deleted file]
arch/x86/math-emu/get_address.c
arch/x86/mm/Makefile
arch/x86/mm/amdtopology.c
arch/x86/mm/fault.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/mem_encrypt_amd.c
arch/x86/mm/mem_encrypt_identity.c
arch/x86/mm/mmio-mod.c
arch/x86/mm/numa_emulation.c
arch/x86/mm/pat/memtype.c
arch/x86/mm/pti.c
arch/x86/mm/setup_nx.c [deleted file]
arch/x86/pci/irq.c
arch/x86/platform/efi/efi.c
arch/x86/platform/uv/uv_nmi.c
arch/x86/realmode/init.c
arch/x86/realmode/rm/header.S
arch/x86/realmode/rm/trampoline_64.S
arch/x86/realmode/rm/trampoline_common.S
arch/x86/realmode/rm/wakemain.c
arch/x86/virt/vmx/tdx/tdxcall.S [new file with mode: 0644]
arch/x86/xen/enlighten_pv.c
arch/x86/xen/smp_pv.c
arch/xtensa/Kconfig
arch/xtensa/boot/lib/Makefile
arch/xtensa/include/asm/barrier.h
arch/xtensa/include/asm/bitops.h
arch/xtensa/include/asm/coprocessor.h
arch/xtensa/include/asm/processor.h
arch/xtensa/include/asm/sections.h
arch/xtensa/include/asm/thread_info.h
arch/xtensa/include/asm/timex.h
arch/xtensa/include/asm/traps.h
arch/xtensa/kernel/Makefile
arch/xtensa/kernel/asm-offsets.c
arch/xtensa/kernel/coprocessor.S
arch/xtensa/kernel/entry.S
arch/xtensa/kernel/hibernate.c [new file with mode: 0644]
arch/xtensa/kernel/process.c
arch/xtensa/kernel/ptrace.c
arch/xtensa/kernel/s32c1i_selftest.c
arch/xtensa/kernel/signal.c
arch/xtensa/kernel/smp.c
arch/xtensa/kernel/traps.c
arch/xtensa/lib/Makefile
arch/xtensa/lib/kcsan-stubs.c [new file with mode: 0644]
arch/xtensa/lib/memcopy.S
arch/xtensa/mm/Makefile
arch/xtensa/mm/fault.c
arch/xtensa/mm/mmu.c
arch/xtensa/platforms/iss/network.c
arch/xtensa/platforms/iss/simdisk.c
arch/xtensa/platforms/xt2000/setup.c
block/Makefile
block/badblocks.c
block/bdev.c
block/bfq-cgroup.c
block/bfq-iosched.c
block/bfq-iosched.h
block/bio.c
block/blk-cgroup-fc-appid.c [new file with mode: 0644]
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-crypto-fallback.c
block/blk-iocost.c
block/blk-iolatency.c
block/blk-lib.c
block/blk-map.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-settings.c
block/blk-throttle.c
block/blk.h
block/bounce.c
block/fops.c
block/genhd.c
block/ioctl.c
block/mq-deadline.c
block/partitions/acorn.c
block/partitions/atari.c
block/partitions/core.c
block/partitions/ldm.c
certs/.gitignore
certs/Kconfig
certs/Makefile
certs/blacklist.c
crypto/asymmetric_keys/x509_public_key.c
drivers/acpi/acpi_pad.c
drivers/acpi/acpi_video.c
drivers/acpi/acpica/acapps.h
drivers/acpi/acpica/accommon.h
drivers/acpi/acpica/acconvert.h
drivers/acpi/acpica/acdebug.h
drivers/acpi/acpica/acdispat.h
drivers/acpi/acpica/acevents.h
drivers/acpi/acpica/acglobal.h
drivers/acpi/acpica/achware.h
drivers/acpi/acpica/acinterp.h
drivers/acpi/acpica/aclocal.h
drivers/acpi/acpica/acmacros.h
drivers/acpi/acpica/acnamesp.h
drivers/acpi/acpica/acobject.h
drivers/acpi/acpica/acopcode.h
drivers/acpi/acpica/acparser.h
drivers/acpi/acpica/acpredef.h
drivers/acpi/acpica/acresrc.h
drivers/acpi/acpica/acstruct.h
drivers/acpi/acpica/actables.h
drivers/acpi/acpica/acutils.h
drivers/acpi/acpica/amlcode.h
drivers/acpi/acpica/amlresrc.h
drivers/acpi/acpica/dbhistry.c
drivers/acpi/acpica/dsargs.c
drivers/acpi/acpica/dscontrol.c
drivers/acpi/acpica/dsdebug.c
drivers/acpi/acpica/dsfield.c
drivers/acpi/acpica/dsinit.c
drivers/acpi/acpica/dsmethod.c
drivers/acpi/acpica/dsobject.c
drivers/acpi/acpica/dsopcode.c
drivers/acpi/acpica/dspkginit.c
drivers/acpi/acpica/dswexec.c
drivers/acpi/acpica/dswload.c
drivers/acpi/acpica/dswload2.c
drivers/acpi/acpica/dswscope.c
drivers/acpi/acpica/dswstate.c
drivers/acpi/acpica/evevent.c
drivers/acpi/acpica/evglock.c
drivers/acpi/acpica/evgpe.c
drivers/acpi/acpica/evgpeblk.c
drivers/acpi/acpica/evgpeinit.c
drivers/acpi/acpica/evgpeutil.c
drivers/acpi/acpica/evhandler.c
drivers/acpi/acpica/evmisc.c
drivers/acpi/acpica/evregion.c
drivers/acpi/acpica/evrgnini.c
drivers/acpi/acpica/evxface.c
drivers/acpi/acpica/evxfevnt.c
drivers/acpi/acpica/evxfgpe.c
drivers/acpi/acpica/evxfregn.c
drivers/acpi/acpica/exconcat.c
drivers/acpi/acpica/exconfig.c
drivers/acpi/acpica/exconvrt.c
drivers/acpi/acpica/excreate.c
drivers/acpi/acpica/exdebug.c
drivers/acpi/acpica/exdump.c
drivers/acpi/acpica/exfield.c
drivers/acpi/acpica/exfldio.c
drivers/acpi/acpica/exmisc.c
drivers/acpi/acpica/exmutex.c
drivers/acpi/acpica/exnames.c
drivers/acpi/acpica/exoparg1.c
drivers/acpi/acpica/exoparg2.c
drivers/acpi/acpica/exoparg3.c
drivers/acpi/acpica/exoparg6.c
drivers/acpi/acpica/exprep.c
drivers/acpi/acpica/exregion.c
drivers/acpi/acpica/exresnte.c
drivers/acpi/acpica/exresolv.c
drivers/acpi/acpica/exresop.c
drivers/acpi/acpica/exserial.c
drivers/acpi/acpica/exstore.c
drivers/acpi/acpica/exstoren.c
drivers/acpi/acpica/exstorob.c
drivers/acpi/acpica/exsystem.c
drivers/acpi/acpica/extrace.c
drivers/acpi/acpica/exutils.c
drivers/acpi/acpica/hwacpi.c
drivers/acpi/acpica/hwesleep.c
drivers/acpi/acpica/hwgpe.c
drivers/acpi/acpica/hwregs.c
drivers/acpi/acpica/hwsleep.c
drivers/acpi/acpica/hwtimer.c
drivers/acpi/acpica/hwvalid.c
drivers/acpi/acpica/hwxface.c
drivers/acpi/acpica/hwxfsleep.c
drivers/acpi/acpica/nsarguments.c
drivers/acpi/acpica/nsconvert.c
drivers/acpi/acpica/nsdump.c
drivers/acpi/acpica/nsdumpdv.c
drivers/acpi/acpica/nsinit.c
drivers/acpi/acpica/nsload.c
drivers/acpi/acpica/nsparse.c
drivers/acpi/acpica/nspredef.c
drivers/acpi/acpica/nsprepkg.c
drivers/acpi/acpica/nsrepair.c
drivers/acpi/acpica/nsrepair2.c
drivers/acpi/acpica/nsutils.c
drivers/acpi/acpica/nswalk.c
drivers/acpi/acpica/nsxfname.c
drivers/acpi/acpica/psargs.c
drivers/acpi/acpica/psloop.c
drivers/acpi/acpica/psobject.c
drivers/acpi/acpica/psopcode.c
drivers/acpi/acpica/psopinfo.c
drivers/acpi/acpica/psparse.c
drivers/acpi/acpica/psscope.c
drivers/acpi/acpica/pstree.c
drivers/acpi/acpica/psutils.c
drivers/acpi/acpica/pswalk.c
drivers/acpi/acpica/psxface.c
drivers/acpi/acpica/tbdata.c
drivers/acpi/acpica/tbfadt.c
drivers/acpi/acpica/tbfind.c
drivers/acpi/acpica/tbinstal.c
drivers/acpi/acpica/tbprint.c
drivers/acpi/acpica/tbutils.c
drivers/acpi/acpica/tbxface.c
drivers/acpi/acpica/tbxfload.c
drivers/acpi/acpica/tbxfroot.c
drivers/acpi/acpica/utaddress.c
drivers/acpi/acpica/utalloc.c
drivers/acpi/acpica/utascii.c
drivers/acpi/acpica/utbuffer.c
drivers/acpi/acpica/utcache.c
drivers/acpi/acpica/utcopy.c
drivers/acpi/acpica/utdebug.c
drivers/acpi/acpica/utdecode.c
drivers/acpi/acpica/uteval.c
drivers/acpi/acpica/utglobal.c
drivers/acpi/acpica/uthex.c
drivers/acpi/acpica/utids.c
drivers/acpi/acpica/utinit.c
drivers/acpi/acpica/utlock.c
drivers/acpi/acpica/utobject.c
drivers/acpi/acpica/utosi.c
drivers/acpi/acpica/utpredef.c
drivers/acpi/acpica/utprint.c
drivers/acpi/acpica/uttrack.c
drivers/acpi/acpica/utuuid.c
drivers/acpi/acpica/utxface.c
drivers/acpi/acpica/utxfinit.c
drivers/acpi/apei/einj.c
drivers/acpi/apei/erst-dbg.c
drivers/acpi/apei/erst.c
drivers/acpi/arm64/agdi.c
drivers/acpi/battery.c
drivers/acpi/bgrt.c
drivers/acpi/bus.c
drivers/acpi/cppc_acpi.c
drivers/acpi/device_pm.c
drivers/acpi/dptf/dptf_pch_fivr.c
drivers/acpi/dptf/dptf_power.c
drivers/acpi/dptf/int340x_thermal.c
drivers/acpi/fan.h
drivers/acpi/glue.c
drivers/acpi/osl.c
drivers/acpi/pci_root.c
drivers/acpi/processor_idle.c
drivers/acpi/sleep.c
drivers/acpi/spcr.c
drivers/acpi/sysfs.c
drivers/acpi/utils.c
drivers/amba/bus.c
drivers/ata/ahci.c
drivers/ata/ahci_brcm.c
drivers/ata/libata-core.c
drivers/ata/libata-sata.c
drivers/ata/libata-scsi.c
drivers/ata/libata.h
drivers/ata/pata_ftide010.c
drivers/ata/pata_mpc52xx.c
drivers/ata/pata_sil680.c
drivers/ata/pata_via.c
drivers/ata/sata_gemini.c
drivers/base/firmware_loader/main.c
drivers/base/power/common.c
drivers/base/power/domain.c
drivers/base/power/domain_governor.c
drivers/base/power/runtime.c
drivers/base/power/wakeup.c
drivers/base/regmap/internal.h
drivers/base/regmap/regcache.c
drivers/base/regmap/regmap-i3c.c
drivers/base/regmap/regmap-sccb.c
drivers/base/regmap/regmap-sdw-mbq.c
drivers/base/regmap/regmap-sdw.c
drivers/base/regmap/regmap-slimbus.c
drivers/base/regmap/regmap-w1.c
drivers/base/regmap/regmap.c
drivers/block/aoe/aoe.h
drivers/block/aoe/aoeblk.c
drivers/block/aoe/aoecmd.c
drivers/block/aoe/aoedev.c
drivers/block/aoe/aoemain.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_state.c
drivers/block/drbd/drbd_worker.c
drivers/block/loop.c
drivers/block/loop.h [deleted file]
drivers/block/mtip32xx/mtip32xx.c
drivers/block/nbd.c
drivers/block/null_blk/main.c
drivers/block/null_blk/null_blk.h
drivers/block/null_blk/zoned.c
drivers/block/pktcdvd.c
drivers/block/rbd.c
drivers/block/rnbd/rnbd-clt.c
drivers/block/rnbd/rnbd-srv-dev.h
drivers/block/rnbd/rnbd-srv.c
drivers/block/virtio_blk.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/block/zram/zram_drv.c
drivers/cdrom/cdrom.c
drivers/char/agp/amd64-agp.c
drivers/char/ipmi/ipmb_dev_int.c
drivers/char/ipmi/ipmi_ipmb.c
drivers/char/ipmi/ipmi_msghandler.c
drivers/char/ipmi/ipmi_poweroff.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/ipmi/ipmi_ssif.c
drivers/char/ipmi/ipmi_watchdog.c
drivers/char/random.c
drivers/char/tpm/tpm2-cmd.c
drivers/char/tpm/tpm_ftpm_tee.c
drivers/char/tpm/tpm_ibmvtpm.c
drivers/char/tpm/tpm_tis.c
drivers/char/tpm/tpm_tis_core.h
drivers/char/tpm/tpm_tis_i2c_cr50.c
drivers/char/tpm/tpm_tis_spi.h
drivers/char/tpm/tpm_tis_spi_cr50.c
drivers/char/tpm/tpm_tis_spi_main.c
drivers/char/tpm/tpm_tis_synquacer.c
drivers/char/tpm/xen-tpmfront.c
drivers/clk/at91/clk-generated.c
drivers/clk/bcm/clk-bcm2835.c
drivers/clk/sunxi-ng/ccu-sun6i-rtc.c
drivers/clocksource/Kconfig
drivers/clocksource/Makefile
drivers/clocksource/timer-goldfish.c [new file with mode: 0644]
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/cpufreq_governor.h
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/mediatek-cpufreq-hw.c
drivers/cpufreq/pasemi-cpufreq.c
drivers/cpufreq/pmac32-cpufreq.c
drivers/cpufreq/pmac64-cpufreq.c
drivers/cpufreq/ppc_cbe_cpufreq.c
drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
drivers/cpufreq/scmi-cpufreq.c
drivers/cpuidle/cpuidle-psci-domain.c
drivers/cpuidle/cpuidle-psci.c
drivers/cpuidle/cpuidle-riscv-sbi.c
drivers/crypto/caam/Kconfig
drivers/crypto/caam/Makefile
drivers/crypto/caam/blob_gen.c [new file with mode: 0644]
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/intern.h
drivers/crypto/caam/regs.h
drivers/crypto/qcom-rng.c
drivers/devfreq/devfreq.c
drivers/devfreq/governor.h
drivers/devfreq/governor_passive.c
drivers/devfreq/rk3399_dmc.c
drivers/dma-buf/dma-buf.c
drivers/edac/Kconfig
drivers/edac/amd64_edac.c
drivers/edac/armada_xp_edac.c
drivers/edac/dmc520_edac.c
drivers/edac/edac_device.c
drivers/edac/edac_device.h
drivers/edac/edac_device_sysfs.c
drivers/edac/edac_mc.c
drivers/edac/edac_module.h
drivers/edac/edac_pci.c
drivers/edac/ghes_edac.c
drivers/edac/i5100_edac.c
drivers/edac/mpc85xx_edac.c
drivers/edac/synopsys_edac.c
drivers/edac/xgene_edac.c
drivers/firmware/efi/Kconfig
drivers/firmware/efi/cper.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/arm32-stub.c
drivers/firmware/efi/libstub/arm64-stub.c
drivers/firmware/efi/libstub/efi-stub.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/randomalloc.c
drivers/firmware/efi/libstub/riscv-stub.c
drivers/firmware/efi/libstub/x86-stub.c
drivers/gpio/TODO
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpio-pl061.c
drivers/gpio/gpio-tegra186.c
drivers/gpio/gpio-vf610.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/dp/drm_dp_mst_topology.c
drivers/gpu/drm/i915/display/intel_dmc.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_uc.c
drivers/gpu/drm/i915/gt/uc/intel_uc.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/nouveau/nouveau_backlight.c
drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/xen/xen_drm_front.h
drivers/gpu/drm/xen/xen_drm_front_evtchnl.c
drivers/hid/Kconfig
drivers/hid/Makefile
drivers/hid/amd-sfh-hid/amd_sfh_client.c
drivers/hid/amd-sfh-hid/amd_sfh_hid.c
drivers/hid/amd-sfh-hid/amd_sfh_hid.h
drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h
drivers/hid/hid-apple.c
drivers/hid/hid-bigbenff.c
drivers/hid/hid-core.c
drivers/hid/hid-elan.c
drivers/hid/hid-ids.h
drivers/hid/hid-kye.c
drivers/hid/hid-led.c
drivers/hid/hid-lenovo.c
drivers/hid/hid-megaworld.c [new file with mode: 0644]
drivers/hid/hid-multitouch.c
drivers/hid/hid-uclogic-core.c
drivers/hid/hid-uclogic-params.c
drivers/hid/hid-uclogic-params.h
drivers/hid/hid-uclogic-rdesc.c
drivers/hid/hid-uclogic-rdesc.h
drivers/hid/hid-viewsonic.c
drivers/hid/intel-ish-hid/ipc/hw-ish.h
drivers/hid/intel-ish-hid/ipc/pci-ish.c
drivers/hid/wacom_sys.c
drivers/hid/wacom_wac.c
drivers/hid/wacom_wac.h
drivers/hwmon/Kconfig
drivers/hwmon/Makefile
drivers/hwmon/acpi_power_meter.c
drivers/hwmon/adt7475.c
drivers/hwmon/aquacomputer_d5next.c
drivers/hwmon/as370-hwmon.c
drivers/hwmon/asus-ec-sensors.c
drivers/hwmon/bt1-pvt.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/hwmon.c
drivers/hwmon/ibmaem.c
drivers/hwmon/intel-m10-bmc-hwmon.c
drivers/hwmon/jc42.c
drivers/hwmon/lan966x-hwmon.c [new file with mode: 0644]
drivers/hwmon/lm75.c
drivers/hwmon/lm83.c
drivers/hwmon/lm90.c
drivers/hwmon/ltc2992.c
drivers/hwmon/mr75203.c
drivers/hwmon/nct6775-core.c [moved from drivers/hwmon/nct6775.c with 66% similarity]
drivers/hwmon/nct6775-i2c.c [new file with mode: 0644]
drivers/hwmon/nct6775-platform.c [new file with mode: 0644]
drivers/hwmon/nct6775.h [new file with mode: 0644]
drivers/hwmon/occ/common.c
drivers/hwmon/occ/common.h
drivers/hwmon/occ/p8_i2c.c
drivers/hwmon/occ/p9_sbe.c
drivers/hwmon/occ/sysfs.c
drivers/hwmon/peci/cputemp.c
drivers/hwmon/peci/dimmtemp.c
drivers/hwmon/pmbus/Kconfig
drivers/hwmon/pmbus/Makefile
drivers/hwmon/pmbus/ltc2978.c
drivers/hwmon/pmbus/max16601.c
drivers/hwmon/pmbus/pmbus.h
drivers/hwmon/pmbus/pmbus_core.c
drivers/hwmon/pmbus/xdpe152c4.c [new file with mode: 0644]
drivers/hwmon/pwm-fan.c
drivers/hwmon/sl28cpld-hwmon.c
drivers/hwmon/tmp401.c
drivers/i2c/busses/i2c-ismt.c
drivers/i2c/busses/i2c-mt7621.c
drivers/i2c/busses/i2c-thunderx-pcidrv.c
drivers/idle/intel_idle.c
drivers/iio/adc/qcom-vadc-common.c
drivers/iio/chemical/scd30.h
drivers/iio/chemical/scd30_core.c
drivers/iio/chemical/scd30_i2c.c
drivers/iio/chemical/scd30_serial.c
drivers/input/touchscreen/ili210x.c
drivers/interconnect/core.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-armada-370-xp.c
drivers/irqchip/irq-aspeed-i2c-ic.c
drivers/irqchip/irq-aspeed-scu-ic.c
drivers/irqchip/irq-bcm6345-l1.c
drivers/irqchip/irq-csky-apb-intc.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-imx-irqsteer.c
drivers/irqchip/irq-sni-exiu.c
drivers/irqchip/irq-sun6i-r.c
drivers/irqchip/irq-xtensa-mx.c
drivers/md/bcache/alloc.c
drivers/md/bcache/debug.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/dm-bufio.c
drivers/md/dm-cache-target.c
drivers/md/dm-clone-target.c
drivers/md/dm-io.c
drivers/md/dm-log-writes.c
drivers/md/dm-raid.c
drivers/md/dm-table.c
drivers/md/dm-thin.c
drivers/md/dm-zoned-target.c
drivers/md/dm.c
drivers/md/md-bitmap.c
drivers/md/md-cluster.c
drivers/md/md-linear.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5-cache.c
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/misc/lkdtm/stackleak.c
drivers/mmc/core/block.c
drivers/mmc/core/core.c
drivers/mmc/core/mmc.c
drivers/mmc/core/mmc_ops.c
drivers/mmc/core/queue.c
drivers/mmc/core/sd.c
drivers/mmc/host/atmel-mci.c
drivers/mmc/host/bcm2835.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/jz4740_mmc.c
drivers/mmc/host/meson-gx-mmc.c
drivers/mmc/host/meson-mx-sdhc-mmc.c
drivers/mmc/host/mmci.c
drivers/mmc/host/mmci_stm32_sdmmc.c
drivers/mmc/host/mtk-sd.c
drivers/mmc/host/of_mmc_spi.c
drivers/mmc/host/omap.c
drivers/mmc/host/renesas_sdhi.h
drivers/mmc/host/renesas_sdhi_core.c
drivers/mmc/host/renesas_sdhi_internal_dmac.c
drivers/mmc/host/sdhci-brcmstb.c
drivers/mmc/host/sdhci-msm.c
drivers/mmc/host/sdhci-of-arasan.c
drivers/mmc/host/sdhci-omap.c
drivers/mmc/host/sdhci-pci-gli.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci_am654.c
drivers/mmc/host/sh_mmcif.c
drivers/mmc/host/sunxi-mmc.c
drivers/mmc/host/uniphier-sd.c
drivers/mtd/chips/cfi_cmdset_0002.c
drivers/mtd/devices/phram.c
drivers/mtd/devices/st_spi_fsm.c
drivers/mtd/maps/Kconfig
drivers/mtd/maps/Makefile
drivers/mtd/maps/ixp4xx.c [deleted file]
drivers/mtd/mtd_blkdevs.c
drivers/mtd/mtdblock.c
drivers/mtd/mtdcore.c
drivers/mtd/mtdoops.c
drivers/mtd/mtdpart.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/Makefile
drivers/mtd/nand/ecc-mtk.c [moved from drivers/mtd/nand/raw/mtk_ecc.c with 98% similarity]
drivers/mtd/nand/raw/Kconfig
drivers/mtd/nand/raw/Makefile
drivers/mtd/nand/raw/cadence-nand-controller.c
drivers/mtd/nand/raw/cs553x_nand.c
drivers/mtd/nand/raw/davinci_nand.c
drivers/mtd/nand/raw/denali_pci.c
drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.h
drivers/mtd/nand/raw/intel-nand-controller.c
drivers/mtd/nand/raw/mpc5121_nfc.c
drivers/mtd/nand/raw/mtk_nand.c
drivers/mtd/nand/raw/nand_base.c
drivers/mtd/nand/raw/nand_ids.c
drivers/mtd/nand/raw/nand_toshiba.c
drivers/mtd/nand/raw/omap_elm.c
drivers/mtd/nand/raw/renesas-nand-controller.c
drivers/mtd/nand/raw/rockchip-nand-controller.c
drivers/mtd/nand/raw/tmio_nand.c
drivers/mtd/nand/spi/Makefile
drivers/mtd/nand/spi/core.c
drivers/mtd/nand/spi/gigadevice.c
drivers/mtd/nand/spi/xtx.c [new file with mode: 0644]
drivers/mtd/parsers/bcm47xxpart.c
drivers/mtd/spi-nor/Makefile
drivers/mtd/spi-nor/controllers/Kconfig
drivers/mtd/spi-nor/controllers/Makefile
drivers/mtd/spi-nor/controllers/aspeed-smc.c [deleted file]
drivers/mtd/spi-nor/core.c
drivers/mtd/spi-nor/core.h
drivers/mtd/spi-nor/debugfs.c [new file with mode: 0644]
drivers/mtd/spi-nor/eon.c
drivers/mtd/spi-nor/micron-st.c
drivers/mtd/spi-nor/spansion.c
drivers/mtd/spi-nor/winbond.c
drivers/mtd/spi-nor/xilinx.c
drivers/net/can/m_can/m_can.c
drivers/net/can/m_can/m_can.h
drivers/net/can/m_can/m_can_pci.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/ocelot/felix.c
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/dec/tulip/tulip_core.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_idc.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_ptp.c
drivers/net/ethernet/intel/ice/ice_txrx.h
drivers/net/ethernet/intel/ice/ice_virtchnl.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/mediatek/mtk_ppe.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_flower.c
drivers/net/ethernet/mscc/ocelot_vcap.c
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
drivers/net/ethernet/qlogic/qla3xxx.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx_channels.c
drivers/net/ethernet/sfc/ptp.c
drivers/net/ethernet/sfc/ptp.h
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
drivers/net/ethernet/sun/niu.c
drivers/net/ipa/gsi.c
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_qmi.c
drivers/net/phy/micrel.c
drivers/net/phy/phy.c
drivers/net/ppp/pppoe.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/wireless/ath/ath11k/core.c
drivers/net/wireless/ath/ath11k/core.h
drivers/net/wireless/ath/ath11k/mac.c
drivers/net/wireless/ath/ath11k/mac.h
drivers/net/wireless/ath/ath11k/reg.c
drivers/net/wireless/ath/ath11k/reg.h
drivers/net/wireless/ath/ath11k/wmi.c
drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/xen-netfront.c
drivers/nfc/pn533/pn533.c
drivers/nvme/host/constants.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/ioctl.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/io-cmd-bdev.c
drivers/nvme/target/zns.c
drivers/of/fdt.c
drivers/of/kexec.c
drivers/of/platform.c
drivers/opp/of.c
drivers/pci/controller/dwc/pcie-qcom.c
drivers/pci/controller/pci-aardvark.c
drivers/pci/pci-acpi.c
drivers/pci/pci.c
drivers/pci/xen-pcifront.c
drivers/perf/arm-cmn.c
drivers/perf/arm_pmu_acpi.c
drivers/perf/arm_spe_pmu.c
drivers/perf/hisilicon/Makefile
drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c [new file with mode: 0644]
drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.h
drivers/perf/marvell_cn10k_tad_pmu.c
drivers/perf/riscv_pmu_sbi.c
drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
drivers/pinctrl/mediatek/pinctrl-mt8365.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/pinctrl-apple-gpio.c
drivers/pinctrl/pinctrl-ocelot.c
drivers/pinctrl/qcom/pinctrl-msm.c
drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
drivers/platform/mellanox/Kconfig
drivers/platform/mellanox/Makefile
drivers/platform/mellanox/nvsw-sn2201.c [new file with mode: 0644]
drivers/platform/surface/aggregator/core.c
drivers/platform/surface/surface_gpe.c
drivers/platform/x86/Kconfig
drivers/platform/x86/Makefile
drivers/platform/x86/amd-pmc.c
drivers/platform/x86/asus-nb-wmi.c
drivers/platform/x86/asus-wmi.c
drivers/platform/x86/dell/dcdbas.c
drivers/platform/x86/dell/dcdbas.h
drivers/platform/x86/dell/dell-smbios-smm.c
drivers/platform/x86/gigabyte-wmi.c
drivers/platform/x86/hp-wmi.c
drivers/platform/x86/intel/Kconfig
drivers/platform/x86/intel/Makefile
drivers/platform/x86/intel/chtwc_int33fe.c
drivers/platform/x86/intel/hid.c
drivers/platform/x86/intel/ifs/Kconfig [new file with mode: 0644]
drivers/platform/x86/intel/ifs/Makefile [new file with mode: 0644]
drivers/platform/x86/intel/ifs/core.c [new file with mode: 0644]
drivers/platform/x86/intel/ifs/ifs.h [new file with mode: 0644]
drivers/platform/x86/intel/ifs/load.c [new file with mode: 0644]
drivers/platform/x86/intel/ifs/runtest.c [new file with mode: 0644]
drivers/platform/x86/intel/ifs/sysfs.c [new file with mode: 0644]
drivers/platform/x86/intel/pmc/core.c
drivers/platform/x86/intel/pmt/telemetry.c
drivers/platform/x86/pmc_atom.c
drivers/platform/x86/samsung-laptop.c
drivers/platform/x86/thinkpad_acpi.c
drivers/platform/x86/toshiba_acpi.c
drivers/platform/x86/winmate-fm07-keys.c [new file with mode: 0644]
drivers/platform/x86/wmi.c
drivers/powercap/dtpm_cpu.c
drivers/powercap/intel_rapl_common.c
drivers/powercap/intel_rapl_msr.c
drivers/ptp/ptp_ocp.c
drivers/regulator/Kconfig
drivers/regulator/Makefile
drivers/regulator/core.c
drivers/regulator/da9121-regulator.c
drivers/regulator/fixed.c
drivers/regulator/mt6358-regulator.c
drivers/regulator/pca9450-regulator.c
drivers/regulator/pfuze100-regulator.c
drivers/regulator/qcom_smd-regulator.c
drivers/regulator/rpi-panel-attiny-regulator.c
drivers/regulator/rt4801-regulator.c
drivers/regulator/rt5759-regulator.c [new file with mode: 0644]
drivers/regulator/scmi-regulator.c
drivers/regulator/sm5703-regulator.c [new file with mode: 0644]
drivers/regulator/stm32-vrefbuf.c
drivers/rtc/rtc-goldfish.c
drivers/rtc/rtc-sun6i.c
drivers/s390/block/dasd_fba.c
drivers/s390/char/con3215.c
drivers/s390/char/con3270.c
drivers/s390/char/raw3270.c
drivers/s390/char/raw3270.h
drivers/s390/char/sclp_con.c
drivers/s390/char/sclp_early.c
drivers/s390/char/sclp_vt220.c
drivers/s390/cio/chsc.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/ap_queue.c
drivers/s390/crypto/pkey_api.c
drivers/s390/crypto/vfio_ap_drv.c
drivers/s390/crypto/zcrypt_api.c
drivers/s390/crypto/zcrypt_api.h
drivers/s390/crypto/zcrypt_card.c
drivers/s390/crypto/zcrypt_cca_key.h
drivers/s390/crypto/zcrypt_ccamisc.c
drivers/s390/crypto/zcrypt_ccamisc.h
drivers/s390/crypto/zcrypt_cex2a.c
drivers/s390/crypto/zcrypt_cex2c.c
drivers/s390/crypto/zcrypt_cex4.c
drivers/s390/crypto/zcrypt_ep11misc.c
drivers/s390/crypto/zcrypt_ep11misc.h
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/s390/crypto/zcrypt_msgtype6.h
drivers/s390/crypto/zcrypt_queue.c
drivers/s390/net/ctcm_mpc.c
drivers/s390/net/ctcm_sysfs.c
drivers/s390/net/lcs.c
drivers/scsi/device_handler/scsi_dh_alua.c
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/sd.c
drivers/scsi/ufs/ufshpb.c
drivers/scsi/xen-scsifront.c
drivers/slimbus/qcom-ctrl.c
drivers/soc/rockchip/pm_domains.c
drivers/spi/Kconfig
drivers/spi/Makefile
drivers/spi/atmel-quadspi.c
drivers/spi/spi-aspeed-smc.c [new file with mode: 0644]
drivers/spi/spi-au1550.c
drivers/spi/spi-cadence-quadspi.c
drivers/spi/spi-cadence.c
drivers/spi/spi-clps711x.c
drivers/spi/spi-fsl-qspi.c
drivers/spi/spi-img-spfi.c
drivers/spi/spi-imx.c
drivers/spi/spi-ingenic.c
drivers/spi/spi-intel.c
drivers/spi/spi-mem.c
drivers/spi/spi-mpc52xx-psc.c
drivers/spi/spi-mpc52xx.c
drivers/spi/spi-mt65xx.c
drivers/spi/spi-mtk-snfi.c [new file with mode: 0644]
drivers/spi/spi-mxs.c
drivers/spi/spi-omap2-mcspi.c
drivers/spi/spi-rockchip.c
drivers/spi/spi-rspi.c
drivers/spi/spi-sprd.c
drivers/spi/spi-stm32-qspi.c
drivers/spi/spi-stm32.c
drivers/spi/spi-sunplus-sp7021.c
drivers/spi/spi-tegra114.c
drivers/spi/spi-tegra20-sflash.c
drivers/spi/spi-tegra20-slink.c
drivers/spi/spi-ti-qspi.c
drivers/spi/spi.c
drivers/spi/spidev.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/target_core_device.c
drivers/target/target_core_file.c
drivers/target/target_core_iblock.c
drivers/target/target_core_pscsi.c
drivers/thermal/Makefile
drivers/thermal/broadcom/bcm2711_thermal.c
drivers/thermal/broadcom/sr-thermal.c
drivers/thermal/cpufreq_cooling.c
drivers/thermal/devfreq_cooling.c
drivers/thermal/hisi_thermal.c
drivers/thermal/imx_sc_thermal.c
drivers/thermal/intel/int340x_thermal/int3400_thermal.c
drivers/thermal/intel/int340x_thermal/int3403_thermal.c
drivers/thermal/intel/intel_hfi.c
drivers/thermal/intel/intel_pch_thermal.c
drivers/thermal/k3_bandgap.c
drivers/thermal/k3_j72xx_bandgap.c [new file with mode: 0644]
drivers/thermal/qcom/lmh.c
drivers/thermal/qcom/qcom-spmi-adc-tm5.c
drivers/thermal/qcom/tsens.c
drivers/thermal/rcar_thermal.c
drivers/thermal/rzg2l_thermal.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_hwmon.c
drivers/thermal/thermal_of.c
drivers/tty/goldfish.c
drivers/tty/n_gsm.c
drivers/tty/serial/8250/8250_mtk.c
drivers/tty/serial/digicolor-usart.c
drivers/tty/serial/fsl_lpuart.c
drivers/usb/class/cdc-wdm.c
drivers/usb/gadget/function/f_uvc.c
drivers/usb/gadget/function/uvc.h
drivers/usb/gadget/function/uvc_v4l2.c
drivers/usb/gadget/legacy/raw_gadget.c
drivers/usb/host/xen-hcd.c
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.h
drivers/usb/serial/option.c
drivers/usb/serial/pl2303.c
drivers/usb/serial/pl2303.h
drivers/usb/serial/qcserial.c
drivers/usb/typec/tcpm/tcpci.c
drivers/usb/typec/tcpm/tcpci_mt6360.c
drivers/usb/typec/ucsi/ucsi_acpi.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vhost/net.c
drivers/video/fbdev/core/fbmem.c
drivers/video/fbdev/core/fbsysfs.c
drivers/video/fbdev/efifb.c
drivers/video/fbdev/simplefb.c
drivers/video/fbdev/vesafb.c
drivers/virt/Kconfig
drivers/virt/Makefile
drivers/virt/coco/efi_secret/Kconfig [new file with mode: 0644]
drivers/virt/coco/efi_secret/Makefile [new file with mode: 0644]
drivers/virt/coco/efi_secret/efi_secret.c [new file with mode: 0644]
drivers/virt/coco/sev-guest/Kconfig [new file with mode: 0644]
drivers/virt/coco/sev-guest/Makefile [moved from arch/s390/boot/compressed/.gitignore with 50% similarity]
drivers/virt/coco/sev-guest/sev-guest.c [new file with mode: 0644]
drivers/virt/coco/sev-guest/sev-guest.h [new file with mode: 0644]
drivers/xen/gntdev-dmabuf.c
drivers/xen/grant-table.c
drivers/xen/xen-front-pgdir-shbuf.c
drivers/xen/xen-scsiback.c
drivers/xen/xenbus/xenbus_client.c
drivers/xen/xenbus/xenbus_probe.c
fs/Kconfig.binfmt
fs/afs/inode.c
fs/binfmt_flat.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/ioctl.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/zoned.c
fs/ceph/addr.c
fs/ceph/file.c
fs/crypto/crypto.c
fs/crypto/fname.c
fs/crypto/fscrypt_private.h
fs/crypto/inline_crypt.c
fs/crypto/keyring.c
fs/crypto/keysetup.c
fs/crypto/policy.c
fs/direct-io.c
fs/exfat/file.c
fs/exfat/super.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/super.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/segment.c
fs/fat/file.c
fs/fat/inode.c
fs/fs-writeback.c
fs/gfs2/bmap.c
fs/gfs2/file.c
fs/gfs2/rgrp.c
fs/internal.h
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
fs/iomap/direct-io.c
fs/jbd2/journal.c
fs/jfs/ioctl.c
fs/jfs/super.c
fs/nfs/fs_context.c
fs/nilfs2/ioctl.c
fs/nilfs2/sufile.c
fs/nilfs2/the_nilfs.c
fs/notify/fanotify/fanotify_user.c
fs/ntfs3/file.c
fs/ntfs3/super.c
fs/ocfs2/ioctl.c
fs/proc/cpuinfo.c
fs/proc/fd.c
fs/squashfs/block.c
fs/super.c
fs/ubifs/ubifs.h
fs/udf/namei.c
fs/verity/Kconfig
fs/verity/enable.c
fs/verity/fsverity_private.h
fs/verity/measure.c
fs/verity/open.c
fs/verity/read_metadata.c
fs/xattr.c
fs/xfs/xfs_discard.c
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_super.c
fs/zonefs/Makefile
fs/zonefs/super.c
fs/zonefs/sysfs.c [new file with mode: 0644]
fs/zonefs/zonefs.h
include/acpi/acbuffer.h
include/acpi/acconfig.h
include/acpi/acexcep.h
include/acpi/acnames.h
include/acpi/acoutput.h
include/acpi/acpi.h
include/acpi/acpi_bus.h
include/acpi/acpiosxf.h
include/acpi/acpixf.h
include/acpi/acrestyp.h
include/acpi/actbl.h
include/acpi/actbl1.h
include/acpi/actbl2.h
include/acpi/actbl3.h
include/acpi/actypes.h
include/acpi/acuuid.h
include/acpi/apei.h
include/acpi/cppc_acpi.h
include/acpi/platform/acenv.h
include/acpi/platform/acenvex.h
include/acpi/platform/acgcc.h
include/acpi/platform/acgccex.h
include/acpi/platform/acintel.h
include/acpi/platform/aclinux.h
include/acpi/platform/aclinuxex.h
include/asm-generic/qrwlock.h
include/asm-generic/qrwlock_types.h
include/asm-generic/vmlinux.lds.h
include/clocksource/timer-goldfish.h [new file with mode: 0644]
include/keys/system_keyring.h
include/keys/trusted-type.h
include/keys/trusted_caam.h [new file with mode: 0644]
include/linux/acpi.h
include/linux/amba/mmci.h
include/linux/atomic/atomic-arch-fallback.h
include/linux/atomic/atomic-instrumented.h
include/linux/audit.h
include/linux/backing-dev.h
include/linux/bio.h
include/linux/blk-cgroup.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/bpf.h
include/linux/cc_platform.h
include/linux/cdrom.h
include/linux/ceph/osd_client.h
include/linux/compat.h
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/compiler_types.h
include/linux/cper.h
include/linux/cpufreq.h
include/linux/cpuhotplug.h
include/linux/devfreq.h
include/linux/efi.h
include/linux/elfcore.h
include/linux/energy_model.h
include/linux/entry-common.h
include/linux/fs.h
include/linux/fscrypt.h
include/linux/fsverity.h
include/linux/goldfish.h
include/linux/gpio/driver.h
include/linux/highmem-internal.h
include/linux/hwmon.h
include/linux/iio/adc/qcom-vadc-common.h
include/linux/instrumentation.h
include/linux/interrupt.h
include/linux/io_uring.h
include/linux/ipmi.h
include/linux/ipmi_smi.h
include/linux/irq.h
include/linux/irqchip/arm-gic-v3.h
include/linux/irqflags.h
include/linux/kthread.h
include/linux/kvm_host.h
include/linux/libata.h
include/linux/linkage.h
include/linux/lsm_audit.h
include/linux/lsm_hook_defs.h
include/linux/lsm_hooks.h
include/linux/mm.h
include/linux/mmc/core.h
include/linux/mmc/host.h
include/linux/mtd/cfi.h
include/linux/mtd/nand-ecc-mtk.h [moved from drivers/mtd/nand/raw/mtk_ecc.h with 100% similarity]
include/linux/mtd/spi-nor.h
include/linux/mtd/spinand.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/netfs.h
include/linux/nvme.h
include/linux/objtool.h
include/linux/pagemap.h
include/linux/perf_event.h
include/linux/platform_data/mlxreg.h
include/linux/platform_data/sh_mmcif.h [moved from include/linux/mmc/sh_mmcif.h with 99% similarity]
include/linux/platform_data/x86/pmc_atom.h
include/linux/pm.h
include/linux/pm_domain.h
include/linux/pm_runtime.h
include/linux/polynomial.h [new file with mode: 0644]
include/linux/prandom.h
include/linux/random.h
include/linux/randomize_kstack.h
include/linux/rcupdate.h
include/linux/regmap.h
include/linux/regulator/mt6358-regulator.h
include/linux/regulator/pca9450.h
include/linux/sched.h
include/linux/sched/signal.h
include/linux/sched/task_stack.h
include/linux/seccomp.h
include/linux/security.h
include/linux/siphash.h
include/linux/socket.h
include/linux/spi/spi.h
include/linux/srcutree.h
include/linux/stackleak.h
include/linux/stop_machine.h
include/linux/sunrpc/clnt.h
include/linux/suspend.h
include/linux/task_work.h
include/linux/thermal.h
include/linux/timekeeping.h
include/linux/timer.h
include/linux/timex.h
include/linux/topology.h
include/linux/torture.h
include/linux/uaccess.h
include/linux/vermagic.h
include/net/bluetooth/hci_core.h
include/net/inet_timewait_sock.h
include/net/ip.h
include/net/tc_act/tc_pedit.h
include/net/xfrm.h
include/soc/fsl/caam-blob.h [new file with mode: 0644]
include/soc/mscc/ocelot_vcap.h
include/soc/rockchip/pm_domains.h [new file with mode: 0644]
include/target/target_core_backend.h
include/trace/events/intel_ifs.h [new file with mode: 0644]
include/trace/events/io_uring.h
include/trace/events/lock.h
include/trace/events/sched.h
include/trace/events/timer.h
include/uapi/asm-generic/siginfo.h
include/uapi/linux/cdrom.h
include/uapi/linux/dma-buf.h
include/uapi/linux/elf.h
include/uapi/linux/input.h
include/uapi/linux/io_uring.h
include/uapi/linux/landlock.h
include/uapi/linux/loop.h
include/uapi/linux/nvme_ioctl.h
include/uapi/linux/prctl.h
include/uapi/linux/rfkill.h
include/uapi/linux/seccomp.h
include/uapi/linux/sev-guest.h [new file with mode: 0644]
include/uapi/linux/spi/spi.h
include/uapi/linux/virtio_ids.h
include/xen/grant_table.h
include/xen/interface/grant_table.h
include/xen/interface/io/ring.h
include/xen/interface/io/vscsiif.h
include/xen/interface/io/xs_wire.h
include/xen/xenbus.h
init/main.c
kernel/auditsc.c
kernel/bpf/Kconfig
kernel/cgroup/cpuset.c
kernel/configs/x86_debug.config [new file with mode: 0644]
kernel/cpu.c
kernel/crash_core.c
kernel/debug/debug_core.c
kernel/debug/kdb/kdb_main.c
kernel/entry/common.c
kernel/events/core.c
kernel/fork.c
kernel/futex/pi.c
kernel/irq/affinity.c
kernel/irq/chip.c
kernel/irq/debugfs.c
kernel/irq/irq_sim.c
kernel/irq/irqdesc.c
kernel/irq/manage.c
kernel/irq/matrix.c
kernel/irq/msi.c
kernel/kcsan/kcsan_test.c
kernel/kexec_core.c
kernel/kthread.c
kernel/locking/lockdep.c
kernel/locking/mutex.c
kernel/locking/percpu-rwsem.c
kernel/locking/qrwlock.c
kernel/locking/qspinlock.c
kernel/locking/rtmutex.c
kernel/locking/rwbase_rt.c
kernel/locking/rwsem.c
kernel/locking/semaphore.c
kernel/panic.c
kernel/power/Makefile
kernel/power/energy_model.c
kernel/power/main.c
kernel/power/process.c
kernel/power/snapshot.c
kernel/rcu/Kconfig
kernel/rcu/Kconfig.debug
kernel/rcu/rcu.h
kernel/rcu/rcu_segcblist.c
kernel/rcu/rcuscale.c
kernel/rcu/rcutorture.c
kernel/rcu/refscale.c
kernel/rcu/srcutree.c
kernel/rcu/sync.c
kernel/rcu/tasks.h
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_exp.h
kernel/rcu/tree_nocb.h
kernel/rcu/tree_plugin.h
kernel/rcu/tree_stall.h
kernel/rcu/update.c
kernel/scftorture.c
kernel/sched/build_policy.c
kernel/sched/build_utility.c
kernel/sched/clock.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/pelt.h
kernel/sched/psi.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/smp.h
kernel/seccomp.c
kernel/signal.c
kernel/smp.c
kernel/smpboot.c
kernel/softirq.c
kernel/stackleak.c
kernel/stop_machine.c
kernel/sys.c
kernel/sysctl.c
kernel/task_work.c
kernel/time/clockevents.c
kernel/time/clocksource.c
kernel/time/sched_clock.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer.c
kernel/trace/Kconfig
kernel/trace/blktrace.c
kernel/trace/fgraph.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace_events.c
kernel/trace/trace_osnoise.c
kernel/trace/trace_preemptirq.c
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
lib/Kconfig
lib/Kconfig.debug
lib/Kconfig.kcsan
lib/Kconfig.ubsan
lib/Makefile
lib/bug.c
lib/debugobjects.c
lib/dim/net_dim.c
lib/irq_poll.c
lib/percpu-refcount.c
lib/polynomial.c [new file with mode: 0644]
lib/random32.c
lib/siphash.c
lib/vsprintf.c
mm/backing-dev.c
mm/gup.c
mm/huge_memory.c
mm/kfence/core.c
mm/memory-failure.c
mm/mremap.c
mm/page_io.c
mm/readahead.c
mm/swapfile.c
mm/usercopy.c
mm/util.c
net/batman-adv/fragmentation.c
net/bluetooth/hci_core.c
net/bridge/br_input.c
net/ceph/osd_client.c
net/core/dev.c
net/core/skbuff.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/decnet/dn_dev.c
net/decnet/dn_neigh.c
net/decnet/dn_route.c
net/dsa/port.c
net/ipv4/devinet.c
net/ipv4/inet_timewait_sock.c
net/ipv4/ping.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_ipv4.c
net/ipv6/addrconf.c
net/ipv6/tcp_ipv6.c
net/key/af_key.c
net/mac80211/mlme.c
net/mac80211/rx.c
net/mptcp/options.c
net/mptcp/pm.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_flow_offload.c
net/netlink/af_netlink.c
net/nfc/nci/data.c
net/nfc/nci/hci.c
net/rds/tcp.c
net/rds/tcp.h
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/sched/act_pedit.c
net/smc/smc_rx.c
net/socket.c
net/sunrpc/auth_gss/gss_rpc_upcall.c
net/sunrpc/clnt.c
net/tls/tls_device.c
net/unix/af_unix.c
net/wireless/nl80211.c
net/wireless/scan.c
net/xfrm/xfrm_policy.c
samples/landlock/sandboxer.c
samples/trace_events/trace_custom_sched.h
scripts/Makefile.build
scripts/Makefile.gcc-plugins
scripts/Makefile.randstruct [new file with mode: 0644]
scripts/atomic/gen-atomic-fallback.sh
scripts/atomic/gen-atomic-instrumented.sh
scripts/basic/.gitignore
scripts/basic/Makefile
scripts/check-blacklist-hashes.awk [new file with mode: 0755]
scripts/faddr2line
scripts/gcc-plugins/Kconfig
scripts/gcc-plugins/Makefile
scripts/gcc-plugins/gen-random-seed.sh [deleted file]
scripts/gcc-plugins/latent_entropy_plugin.c
scripts/gcc-plugins/randomize_layout_plugin.c
scripts/gcc-plugins/sancov_plugin.c
scripts/gcc-plugins/stackleak_plugin.c
scripts/gcc-plugins/structleak_plugin.c
scripts/gen-randstruct-seed.sh [new file with mode: 0755]
scripts/link-vmlinux.sh
scripts/min-tool-version.sh
scripts/objdump-func [new file with mode: 0755]
scripts/package/builddeb
scripts/selinux/genheaders/genheaders.c
scripts/selinux/mdp/mdp.c
security/Kconfig
security/Kconfig.hardening
security/apparmor/lsm.c
security/integrity/digsig.c
security/integrity/evm/evm.h
security/integrity/evm/evm_crypto.c
security/integrity/evm/evm_main.c
security/integrity/ima/Kconfig
security/integrity/ima/ima_api.c
security/integrity/ima/ima_appraise.c
security/integrity/ima/ima_main.c
security/integrity/ima/ima_policy.c
security/integrity/ima/ima_template.c
security/integrity/ima/ima_template_lib.c
security/integrity/ima/ima_template_lib.h
security/integrity/integrity.h
security/integrity/platform_certs/keyring_handler.c
security/integrity/platform_certs/keyring_handler.h
security/integrity/platform_certs/load_uefi.c
security/keys/Kconfig
security/keys/big_key.c
security/keys/trusted-keys/Kconfig [new file with mode: 0644]
security/keys/trusted-keys/Makefile
security/keys/trusted-keys/trusted_caam.c [new file with mode: 0644]
security/keys/trusted-keys/trusted_core.c
security/landlock/cred.c
security/landlock/cred.h
security/landlock/fs.c
security/landlock/fs.h
security/landlock/limits.h
security/landlock/object.c
security/landlock/object.h
security/landlock/ptrace.c
security/landlock/ruleset.c
security/landlock/ruleset.h
security/landlock/syscalls.c
security/loadpin/loadpin.c
security/lsm_audit.c
security/security.c
security/selinux/avc.c
security/selinux/hooks.c
security/selinux/include/audit.h
security/selinux/include/avc.h
security/selinux/include/avc_ss.h
security/selinux/include/classmap.h
security/selinux/include/ibpkey.h
security/selinux/include/initial_sid_to_string.h
security/selinux/include/netnode.h
security/selinux/include/netport.h
security/selinux/include/policycap.h
security/selinux/include/policycap_names.h
security/selinux/include/security.h
security/selinux/include/xfrm.h
security/selinux/nlmsgtab.c
security/selinux/selinuxfs.c
security/selinux/ss/avtab.c
security/selinux/ss/hashtab.c
security/selinux/ss/policydb.c
security/selinux/ss/services.c
security/smack/smackfs.c
security/tomoyo/tomoyo.c
sound/isa/wavefront/wavefront_synth.c
sound/pci/hda/patch_realtek.c
sound/usb/quirks-table.h
sound/usb/quirks.c
sound/xen/xen_snd_front_evtchnl.c
sound/xen/xen_snd_front_evtchnl.h
tools/Makefile
tools/arch/x86/include/asm/amd-ibs.h
tools/arch/x86/include/asm/disabled-features.h
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c [new file with mode: 0644]
tools/certs/print-cert-tbs-hash.sh [new file with mode: 0755]
tools/include/linux/objtool.h
tools/include/nolibc/Makefile [new file with mode: 0644]
tools/include/nolibc/arch-aarch64.h [new file with mode: 0644]
tools/include/nolibc/arch-arm.h [new file with mode: 0644]
tools/include/nolibc/arch-i386.h [new file with mode: 0644]
tools/include/nolibc/arch-mips.h [new file with mode: 0644]
tools/include/nolibc/arch-riscv.h [new file with mode: 0644]
tools/include/nolibc/arch-x86_64.h [new file with mode: 0644]
tools/include/nolibc/arch.h [new file with mode: 0644]
tools/include/nolibc/ctype.h [new file with mode: 0644]
tools/include/nolibc/errno.h [new file with mode: 0644]
tools/include/nolibc/nolibc.h
tools/include/nolibc/signal.h [new file with mode: 0644]
tools/include/nolibc/std.h [new file with mode: 0644]
tools/include/nolibc/stdio.h [new file with mode: 0644]
tools/include/nolibc/stdlib.h [new file with mode: 0644]
tools/include/nolibc/string.h [new file with mode: 0644]
tools/include/nolibc/sys.h [new file with mode: 0644]
tools/include/nolibc/time.h [new file with mode: 0644]
tools/include/nolibc/types.h [new file with mode: 0644]
tools/include/nolibc/unistd.h [new file with mode: 0644]
tools/include/uapi/linux/kvm.h
tools/lib/subcmd/parse-options.c
tools/lib/thermal/.gitignore [new file with mode: 0644]
tools/lib/thermal/Build [new file with mode: 0644]
tools/lib/thermal/Makefile [new file with mode: 0644]
tools/lib/thermal/commands.c [new file with mode: 0644]
tools/lib/thermal/events.c [new file with mode: 0644]
tools/lib/thermal/include/thermal.h [new file with mode: 0644]
tools/lib/thermal/libthermal.map [new file with mode: 0644]
tools/lib/thermal/libthermal.pc.template [new file with mode: 0644]
tools/lib/thermal/sampling.c [new file with mode: 0644]
tools/lib/thermal/thermal.c [new file with mode: 0644]
tools/lib/thermal/thermal_nl.c [new file with mode: 0644]
tools/lib/thermal/thermal_nl.h [new file with mode: 0644]
tools/memory-model/README
tools/objtool/Build
tools/objtool/Documentation/objtool.txt [moved from tools/objtool/Documentation/stack-validation.txt with 80% similarity]
tools/objtool/Makefile
tools/objtool/arch/x86/decode.c
tools/objtool/arch/x86/special.c
tools/objtool/builtin-check.c
tools/objtool/builtin-orc.c [deleted file]
tools/objtool/check.c
tools/objtool/elf.c
tools/objtool/include/objtool/builtin.h
tools/objtool/include/objtool/elf.h
tools/objtool/include/objtool/warn.h
tools/objtool/objtool.c
tools/objtool/weak.c
tools/perf/Makefile.config
tools/perf/arch/x86/util/perf_regs.c
tools/perf/bench/numa.c
tools/perf/tests/bpf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/shell/stat_all_pmu.sh
tools/perf/tests/shell/test_arm_coresight.sh
tools/perf/tests/topology.c
tools/perf/util/bpf-event.c
tools/perf/util/session.c
tools/perf/util/stat.c
tools/power/acpi/common/cmfsize.c
tools/power/acpi/common/getopt.c
tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
tools/power/acpi/os_specific/service_layers/osunixdir.c
tools/power/acpi/os_specific/service_layers/osunixmap.c
tools/power/acpi/os_specific/service_layers/osunixxf.c
tools/power/acpi/tools/acpidump/acpidump.h
tools/power/acpi/tools/acpidump/apdump.c
tools/power/acpi/tools/acpidump/apfiles.c
tools/power/acpi/tools/acpidump/apmain.c
tools/power/x86/intel-speed-select/hfi-events.c
tools/power/x86/intel-speed-select/isst-config.c
tools/power/x86/turbostat/Makefile
tools/power/x86/turbostat/turbostat.8
tools/power/x86/turbostat/turbostat.c
tools/testing/selftests/arm64/Makefile
tools/testing/selftests/arm64/abi/.gitignore
tools/testing/selftests/arm64/abi/Makefile
tools/testing/selftests/arm64/abi/syscall-abi-asm.S
tools/testing/selftests/arm64/abi/syscall-abi.c
tools/testing/selftests/arm64/abi/syscall-abi.h [new file with mode: 0644]
tools/testing/selftests/arm64/abi/tpidr2.c [new file with mode: 0644]
tools/testing/selftests/arm64/bti/Makefile
tools/testing/selftests/arm64/fp/.gitignore
tools/testing/selftests/arm64/fp/Makefile
tools/testing/selftests/arm64/fp/rdvl-sme.c [new file with mode: 0644]
tools/testing/selftests/arm64/fp/rdvl.S
tools/testing/selftests/arm64/fp/rdvl.h
tools/testing/selftests/arm64/fp/sme-inst.h [new file with mode: 0644]
tools/testing/selftests/arm64/fp/ssve-stress [new file with mode: 0644]
tools/testing/selftests/arm64/fp/sve-ptrace.c
tools/testing/selftests/arm64/fp/sve-test.S
tools/testing/selftests/arm64/fp/vec-syscfg.c
tools/testing/selftests/arm64/fp/vlset.c
tools/testing/selftests/arm64/fp/za-fork-asm.S [new file with mode: 0644]
tools/testing/selftests/arm64/fp/za-fork.c [new file with mode: 0644]
tools/testing/selftests/arm64/fp/za-ptrace.c [new file with mode: 0644]
tools/testing/selftests/arm64/fp/za-stress [new file with mode: 0644]
tools/testing/selftests/arm64/fp/za-test.S [new file with mode: 0644]
tools/testing/selftests/arm64/mte/.gitignore
tools/testing/selftests/arm64/mte/check_child_memory.c
tools/testing/selftests/arm64/mte/check_prctl.c [new file with mode: 0644]
tools/testing/selftests/arm64/mte/check_tags_inclusion.c
tools/testing/selftests/arm64/mte/mte_common_util.c
tools/testing/selftests/arm64/mte/mte_common_util.h
tools/testing/selftests/arm64/signal/.gitignore
tools/testing/selftests/arm64/signal/test_signals.h
tools/testing/selftests/arm64/signal/test_signals_utils.c
tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/sme_vl.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/ssve_regs.c [new file with mode: 0644]
tools/testing/selftests/arm64/signal/testcases/testcases.c
tools/testing/selftests/arm64/signal/testcases/testcases.h
tools/testing/selftests/arm64/signal/testcases/za_regs.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
tools/testing/selftests/landlock/base_test.c
tools/testing/selftests/landlock/common.h
tools/testing/selftests/landlock/fs_test.c
tools/testing/selftests/landlock/ptrace_test.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/bpf/Makefile [new file with mode: 0644]
tools/testing/selftests/net/bpf/nat6to4.c [new file with mode: 0644]
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/forwarding/Makefile
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/udpgro_frglist.sh [new file with mode: 0755]
tools/testing/selftests/rcutorture/bin/functions.sh
tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
tools/testing/selftests/rcutorture/bin/kvm-remote.sh
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/bin/torture.sh
tools/testing/selftests/rcutorture/configs/rcu/RUDE01
tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
tools/testing/selftests/rcutorture/configs/rcu/TASKS01
tools/testing/selftests/rcutorture/configs/rcu/TASKS02
tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
tools/testing/selftests/rcutorture/configs/rcu/TASKS03
tools/testing/selftests/rcutorture/configs/rcu/TRACE01
tools/testing/selftests/rcutorture/configs/rcu/TRACE02
tools/testing/selftests/rcutorture/configs/rcu/TREE04
tools/testing/selftests/rcutorture/configs/rcu/TREE07
tools/testing/selftests/rcutorture/configs/rcu/TREE09
tools/testing/selftests/rcutorture/configs/rcu/TREE10
tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
tools/testing/selftests/rcutorture/configs/rcuscale/TREE
tools/testing/selftests/rcutorture/configs/refscale/CFcommon
tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
tools/testing/selftests/rcutorture/configs/scf/PREEMPT
tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
tools/testing/selftests/seccomp/Makefile
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/vm/Makefile
tools/thermal/lib/Build [new file with mode: 0644]
tools/thermal/lib/Makefile [new file with mode: 0644]
tools/thermal/lib/libthermal_tools.pc.template [new file with mode: 0644]
tools/thermal/lib/log.c [new file with mode: 0644]
tools/thermal/lib/log.h [new file with mode: 0644]
tools/thermal/lib/mainloop.c [new file with mode: 0644]
tools/thermal/lib/mainloop.h [new file with mode: 0644]
tools/thermal/lib/thermal-tools.h [new file with mode: 0644]
tools/thermal/lib/uptimeofday.c [new file with mode: 0644]
tools/thermal/lib/uptimeofday.h [new file with mode: 0644]
tools/thermal/thermal-engine/Build [new file with mode: 0644]
tools/thermal/thermal-engine/Makefile [new file with mode: 0644]
tools/thermal/thermal-engine/thermal-engine.c [new file with mode: 0644]
tools/thermal/thermometer/Build [new file with mode: 0644]
tools/thermal/thermometer/Makefile [new file with mode: 0644]
tools/thermal/thermometer/thermometer.8 [new file with mode: 0644]
tools/thermal/thermometer/thermometer.c [new file with mode: 0644]
tools/thermal/thermometer/thermometer.conf [new file with mode: 0644]
virt/kvm/eventfd.c
virt/kvm/kvm_main.c

index ea1ba4a9a77e082e9d440d4f6528c19b2f8ecb2f..6d484937f90147f913a62167f29018f808d4fec5 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -205,6 +205,7 @@ Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
 Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
+Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
 Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
 Kay Sievers <kay.sievers@vrfy.org>
 Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
@@ -250,6 +251,7 @@ Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
+Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
 Mathieu Othacehe <m.othacehe@gmail.com>
 Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
 Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
@@ -396,6 +398,7 @@ Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
 Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
+Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
 Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
index 12c3f895cd2fff37d729ff44583486eee3748167..b312242d4f408deb91b40441d87a0f440e93b759 100644 (file)
@@ -467,3 +467,39 @@ Description:       These files provide the maximum powered required for line card
                feeding and line card configuration Id.
 
                The files are read only.
+
+What:          /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
+Date:          May 2022
+KernelVersion: 5.19
+Contact:       Vadim Pasternak <vadimpmellanox.com>
+Description:   This file allows to reset PHY 88E1548 when attribute is set 0
+               due to some abnormal PHY behavior.
+               Expected behavior:
+               When phy_reset is written 1, all PHY 88E1548 are released
+               from the reset state, when 0 - are hold in reset state.
+
+               The files are read/write.
+
+What:          /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
+Date:          May 2022
+KernelVersion: 5.19
+Contact:       Vadim Pasternak <vadimpmellanox.com>
+Description:   This file allows to reset ASIC MT52132 when attribute is set 0
+               due to some abnormal ASIC behavior.
+               Expected behavior:
+               When mac_reset is written 1, the ASIC MT52132 is released
+               from the reset state, when 0 - is hold in reset state.
+
+               The files are read/write.
+
+What:          /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
+Date:          May 2022
+KernelVersion: 5.19
+Contact:       Vadim Pasternak <vadimpmellanox.com>
+Description:   This file shows QSFP ports power status. The value is set to 0
+               when one of any QSFP ports is plugged. The value is set to 1 when
+               there are no any QSFP ports are plugged.
+               The possible values are:
+               0 - Power good, 1 - Not power good.
+
+               The files are read only.
index 839fab811b1864de229c4a913a94c83469bd30a6..db17fc8a0c9f6e3416631a9804efce98bc17c2c4 100644 (file)
@@ -27,8 +27,9 @@ Description:
                                [fowner=] [fgroup=]]
                        lsm:    [[subj_user=] [subj_role=] [subj_type=]
                                 [obj_user=] [obj_role=] [obj_type=]]
-                       option: [[appraise_type=]] [template=] [permit_directio]
-                               [appraise_flag=] [appraise_algos=] [keyrings=]
+                       option: [digest_type=] [template=] [permit_directio]
+                               [appraise_type=] [appraise_flag=]
+                               [appraise_algos=] [keyrings=]
                  base:
                        func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
                                [FIRMWARE_CHECK]
@@ -47,10 +48,21 @@ Description:
                        fgroup:= decimal value
                  lsm:  are LSM specific
                  option:
-                       appraise_type:= [imasig] [imasig|modsig]
+                       appraise_type:= [imasig] | [imasig|modsig] | [sigv3]
+                           where 'imasig' is the original or the signature
+                               format v2.
+                           where 'modsig' is an appended signature,
+                           where 'sigv3' is the signature format v3. (Currently
+                               limited to fsverity digest based signatures
+                               stored in security.ima xattr. Requires
+                               specifying "digest_type=verity" first.)
+
                        appraise_flag:= [check_blacklist]
                        Currently, blacklist check is only for files signed with appended
                        signature.
+                       digest_type:= verity
+                           Require fs-verity's file digest instead of the
+                           regular IMA file hash.
                        keyrings:= list of keyrings
                        (eg, .builtin_trusted_keys|.ima). Only valid
                        when action is "measure" and func is KEY_CHECK.
@@ -149,3 +161,30 @@ Description:
                security.ima xattr of a file:
 
                        appraise func=SETXATTR_CHECK appraise_algos=sha256,sha384,sha512
+
+               Example of a 'measure' rule requiring fs-verity's digests
+               with indication of type of digest in the measurement list.
+
+                       measure func=FILE_CHECK digest_type=verity \
+                               template=ima-ngv2
+
+               Example of 'measure' and 'appraise' rules requiring fs-verity
+               signatures (format version 3) stored in security.ima xattr.
+
+               The 'measure' rule specifies the 'ima-sigv3' template option,
+               which includes the indication of type of digest and the file
+               signature in the measurement list.
+
+                       measure func=BPRM_CHECK digest_type=verity \
+                               template=ima-sigv3
+
+
+               The 'appraise' rule specifies the type and signature format
+               version (sigv3) required.
+
+                       appraise func=BPRM_CHECK digest_type=verity \
+                               appraise_type=sigv3
+
+               All of these policy rules could, for example, be constrained
+               either based on a filesystem's UUID (fsuuid) or based on LSM
+               labels.
diff --git a/Documentation/ABI/testing/securityfs-secrets-coco b/Documentation/ABI/testing/securityfs-secrets-coco
new file mode 100644 (file)
index 0000000..f2b6909
--- /dev/null
@@ -0,0 +1,51 @@
+What:          security/secrets/coco
+Date:          February 2022
+Contact:       Dov Murik <dovmurik@linux.ibm.com>
+Description:
+               Exposes confidential computing (coco) EFI secrets to
+               userspace via securityfs.
+
+               EFI can declare memory area used by confidential computing
+               platforms (such as AMD SEV and SEV-ES) for secret injection by
+               the Guest Owner during VM's launch.  The secrets are encrypted
+               by the Guest Owner and decrypted inside the trusted enclave,
+               and therefore are not readable by the untrusted host.
+
+               The efi_secret module exposes the secrets to userspace.  Each
+               secret appears as a file under <securityfs>/secrets/coco,
+               where the filename is the GUID of the entry in the secrets
+               table.  This module is loaded automatically by the EFI driver
+               if the EFI secret area is populated.
+
+               Two operations are supported for the files: read and unlink.
+               Reading the file returns the content of secret entry.
+               Unlinking the file overwrites the secret data with zeroes and
+               removes the entry from the filesystem.  A secret cannot be read
+               after it has been unlinked.
+
+               For example, listing the available secrets::
+
+                 # modprobe efi_secret
+                 # ls -l /sys/kernel/security/secrets/coco
+                 -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+                 -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+                 -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+                 -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+               Reading the secret data by reading a file::
+
+                 # cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+                 the-content-of-the-secret-data
+
+               Wiping a secret by unlinking a file::
+
+                 # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+                 # ls -l /sys/kernel/security/secrets/coco
+                 -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+                 -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+                 -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+               Note: The binary format of the secrets table injected by the
+               Guest Owner is described in
+               drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
+               the EFI secret area".
index 8516f08806dd992b04841d48d4ec55410d9189fb..475b9a372657be033fe97d97c8117d0228988b6b 100644 (file)
@@ -370,3 +370,84 @@ Description:
 
                'unknown' means software cannot determine the state, or
                the reported state is invalid.
+
+What:          /sys/class/regulator/.../under_voltage
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               under_voltage.  This indicates if the device reports an
+               under-voltage fault (1) or not (0).
+
+What:          /sys/class/regulator/.../over_current
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               over_current.  This indicates if the device reports an
+               over-current fault (1) or not (0).
+
+What:          /sys/class/regulator/.../regulation_out
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               regulation_out.  This indicates if the device reports an
+               out-of-regulation fault (1) or not (0).
+
+What:          /sys/class/regulator/.../fail
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               fail.  This indicates if the device reports an output failure
+               (1) or not (0).
+
+What:          /sys/class/regulator/.../over_temp
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               over_temp.  This indicates if the device reports an
+               over-temperature fault (1) or not (0).
+
+What:          /sys/class/regulator/.../under_voltage_warn
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               under_voltage_warn.  This indicates if the device reports an
+               under-voltage warning (1) or not (0).
+
+What:          /sys/class/regulator/.../over_current_warn
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               over_current_warn.  This indicates if the device reports an
+               over-current warning (1) or not (0).
+
+What:          /sys/class/regulator/.../over_voltage_warn
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               over_voltage_warn.  This indicates if the device reports an
+               over-voltage warning (1) or not (0).
+
+What:          /sys/class/regulator/.../over_temp_warn
+Date:          April 2022
+KernelVersion: 5.18
+Contact:       Zev Weiss <zev@bewilderbeest.net>
+Description:
+               Some regulator directories will contain a field called
+               over_temp_warn.  This indicates if the device reports an
+               over-temperature warning (1) or not (0).
index a74dfe52dd76d4ecbe89464e6c0b1325cd6bb589..7faf719af16502c8fe01be06ebf13eabb2487c53 100644 (file)
@@ -29,7 +29,7 @@ Description:
 What:           /sys/module/xen_blkback/parameters/buffer_squeeze_duration_ms
 Date:           December 2019
 KernelVersion:  5.6
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                 When memory pressure is reported to blkback this option
                 controls the duration in milliseconds that blkback will not
@@ -39,7 +39,7 @@ Description:
 What:           /sys/module/xen_blkback/parameters/feature_persistent
 Date:           September 2020
 KernelVersion:  5.10
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                 Whether to enable the persistent grants feature or not.  Note
                 that this option only takes effect on newly created backends.
index 61fd173fabfe3b078fa59a6805d0ce6c17dee363..7f646c58832e6f0e1a40eef13b81043108929820 100644 (file)
@@ -12,7 +12,7 @@ Description:
 What:           /sys/module/xen_blkfront/parameters/feature_persistent
 Date:           September 2020
 KernelVersion:  5.10
-Contact:        SeongJae Park <sj@kernel.org>
+Contact:        Maximilian Heyne <mheyne@amazon.de>
 Description:
                 Whether to enable the persistent grants feature or not.  Note
                 that this option only takes effect on newly created frontends.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-ifs b/Documentation/ABI/testing/sysfs-platform-intel-ifs
new file mode 100644 (file)
index 0000000..486d6d2
--- /dev/null
@@ -0,0 +1,39 @@
+What:          /sys/devices/virtual/misc/intel_ifs_<N>/run_test
+Date:          April 21 2022
+KernelVersion: 5.19
+Contact:       "Jithu Joseph" <jithu.joseph@intel.com>
+Description:   Write <cpu#> to trigger IFS test for one online core.
+               Note that the test is per core. The cpu# can be
+               for any thread on the core. Running on one thread
+               completes the test for the core containing that thread.
+               Example: to test the core containing cpu5: echo 5 >
+               /sys/devices/platform/intel_ifs.<N>/run_test
+
+What:          /sys/devices/virtual/misc/intel_ifs_<N>/status
+Date:          April 21 2022
+KernelVersion: 5.19
+Contact:       "Jithu Joseph" <jithu.joseph@intel.com>
+Description:   The status of the last test. It can be one of "pass", "fail"
+               or "untested".
+
+What:          /sys/devices/virtual/misc/intel_ifs_<N>/details
+Date:          April 21 2022
+KernelVersion: 5.19
+Contact:       "Jithu Joseph" <jithu.joseph@intel.com>
+Description:   Additional information regarding the last test. The details file reports
+               the hex value of the SCAN_STATUS MSR. Note that the error_code field
+               may contain driver defined software code not defined in the Intel SDM.
+
+What:          /sys/devices/virtual/misc/intel_ifs_<N>/image_version
+Date:          April 21 2022
+KernelVersion: 5.19
+Contact:       "Jithu Joseph" <jithu.joseph@intel.com>
+Description:   Version (hexadecimal) of loaded IFS binary image. If no scan image
+               is loaded reports "none".
+
+What:          /sys/devices/virtual/misc/intel_ifs_<N>/reload
+Date:          April 21 2022
+KernelVersion: 5.19
+Contact:       "Jithu Joseph" <jithu.joseph@intel.com>
+Description:   Write "1" (or "y" or "Y") to reload the IFS image from
+               /lib/firmware/intel/ifs/ff-mm-ss.scan.
index f4efd6897b0914520a615f4f2779c850c35cc0da..b34990c7c3778d7c5658615e7a18d6ec88682e4e 100644 (file)
@@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to
 and from either dyntick-idle or user mode, so that this counter has an
 even value when the CPU is in dyntick-idle mode or user mode and an odd
 value otherwise. The transitions to/from user mode need to be counted
-for user mode adaptive-ticks support (see timers/NO_HZ.txt).
+for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).
 
 The ``->rcu_need_heavy_qs`` field is used to record the fact that the
 RCU core code would really like to see a quiescent state from the
index 6f89cf1e567d099aa3c4f963a7b8c628fe19a062..c9c957c85bac1a5bdd4a69af5136ea4600a19250 100644 (file)
@@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace
 period also drove it to completion. This straightforward approach had
 the disadvantage of needing to account for POSIX signals sent to user
 tasks, so more recent implemementations use the Linux kernel's
-`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
+workqueues (see Documentation/core-api/workqueue.rst).
 
 The requesting task still does counter snapshotting and funnel-lock
 processing, but the task reaching the top of the funnel lock does a
index 45278e2974c04c13df06c63fe3f6deaaf7ac1ccf..04ed8bf27a0eae4086ad7219046074c8e6455e3b 100644 (file)
@@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the
 outermost RCU read-side critical section containing that
 rcu_dereference(), unless protection of the corresponding data
 element has been passed from RCU to some other synchronization
-mechanism, most commonly locking or `reference
-counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+mechanism, most commonly locking or reference counting
+(see ../../rcuref.rst).
 
 .. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
 .. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
@@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In
 three APIs are therefore implemented by separate functions that check
 for voluntary context switches.
 
+Tasks Rude RCU
+~~~~~~~~~~~~~~
+
+Some forms of tracing need to wait for all preemption-disabled regions
+of code running on any online CPU, including those executed when RCU is
+not watching.  This means that synchronize_rcu() is insufficient, and
+Tasks Rude RCU must be used instead.  This flavor of RCU does its work by
+forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
+moniker.  And this operation is considered to be quite rude by real-time
+workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
+by battery-powered systems that don't want their idle CPUs to be awakened.
+
+The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
+consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
+and rcu_barrier_tasks_rude().
+
+Tasks Trace RCU
+~~~~~~~~~~~~~~~
+
+Some forms of tracing need to sleep in readers, but cannot tolerate
+SRCU's read-side overhead, which includes a full memory barrier in both
+srcu_read_lock() and srcu_read_unlock().  This need is handled by a
+Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
+readers.  Real-time systems that cannot tolerate IPIs may build their
+kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
+the expense of adding full memory barriers to the read-side primitives.
+
+The tasks-trace-RCU API is also reasonably compact,
+consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
+rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
+synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
+
 Possible Future Changes
 -----------------------
 
index 4051ea3871eff0075843d9eee1725b178e4090fd..a5f2ff8fc54c2ad70c603d250d7c23256331e920 100644 (file)
@@ -33,8 +33,8 @@ Situation 1: Hash Tables
 
 Hash tables are often implemented as an array, where each array entry
 has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in the listRCU.txt document.  This approach also applies
-to other array-of-list situations, such as radix trees.
+as described in listRCU.rst.  This approach also applies to other
+array-of-list situations, such as radix trees.
 
 .. _static_arrays:
 
index f4545b7c9a63d29c7b2353802e11d2e58659343f..42cc5d891bd26e4d3841c04e1bad52d01fcb62b7 100644 (file)
@@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome!
                prevents destructive compiler optimizations.  However,
                with a bit of devious creativity, it is possible to
                mishandle the return value from rcu_dereference().
-               Please see rcu_dereference.txt in this directory for
-               more information.
+               Please see rcu_dereference.rst for more information.
 
                The rcu_dereference() primitive is used by the
                various "_rcu()" list-traversal primitives, such
@@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome!
                primitives.  This is particularly useful in code that
                is common to readers and updaters.  However, lockdep
                will complain if you access rcu_dereference() outside
-               of an RCU read-side critical section.  See lockdep.txt
+               of an RCU read-side critical section.  See lockdep.rst
                to learn what to do about this.
 
                Of course, neither rcu_dereference() nor the "_rcu()"
@@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome!
        primitives when the update-side lock is held is that doing so
        can be quite helpful in reducing code bloat when common code is
        shared between readers and updaters.  Additional primitives
-       are provided for this case, as discussed in lockdep.txt.
+       are provided for this case, as discussed in lockdep.rst.
 
        One exception to this rule is when data is only ever added to
        the linked data structure, and is never removed during any
@@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome!
        both rcu_barrier() and synchronize_rcu(), if necessary, using
        something like workqueues to to execute them concurrently.
 
-       See rcubarrier.txt for more information.
+       See rcubarrier.rst for more information.
index 0e03c6ef3147a3ea06cef16fd3fd931ce7c4081a..3cfe01ba9a4944b20b89399ffeee9cdfd7dfb4d9 100644 (file)
@@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period
 must be long enough that any readers accessing the item being deleted have
 since dropped their references.  For example, an RCU-protected deletion
 from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element.  See the
-:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
-using RCU with linked lists.
+a grace period to elapse, then free the element.  See listRCU.rst for more
+information on using RCU with linked lists.
 
 Frequently Asked Questions
 --------------------------
@@ -50,7 +49,7 @@ Frequently Asked Questions
 - If I am running on a uniprocessor kernel, which can only do one
   thing at a time, why should I wait for a grace period?
 
-  See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.
+  See UP.rst for more information.
 
 - How can I see where RCU is currently used in the Linux kernel?
 
@@ -64,13 +63,13 @@ Frequently Asked Questions
 
 - What guidelines should I follow when writing code that uses RCU?
 
-  See the checklist.txt file in this directory.
+  See checklist.rst.
 
 - Why the name "RCU"?
 
   "RCU" stands for "read-copy update".
-  :ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
-  this name came from, search for "read-copy update" to find it.
+  listRCU.rst has more information on where this name came from, search
+  for "read-copy update" to find it.
 
 - I hear that RCU is patented?  What is with that?
 
index a9fc774bc400080ba78bd1fd334f8e6eff12b3f1..ca4692775ad41a68fb35175cc510a18ffcaaf492 100644 (file)
@@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to
 protect read-mostly linked lists and
 objects using SLAB_TYPESAFE_BY_RCU allocations.
 
-Please read the basics in Documentation/RCU/listRCU.rst
+Please read the basics in listRCU.rst.
 
 Using 'nulls'
 =============
index 78404625bad26bbc7bfadf6374be7136bb5bbc25..794837eb519b94949dff617259a72ab2c1f2d2b6 100644 (file)
@@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
        Stall-warning messages may be enabled and disabled completely via
        /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
 
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+       Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+       the expedited grace period. This parameter defines the period
+       of time that RCU will wait from the beginning of an expedited
+       grace period until it issues an RCU CPU stall warning. This time
+       period is normally 20 milliseconds on Android devices.  A zero
+       value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+       after conversion to milliseconds.
+
+       This configuration parameter may be changed at runtime via the
+       /sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+       this parameter is checked only at the beginning of a cycle. If you
+       are in a current stall cycle, setting it to a new value will change
+       the timeout for the -next- stall.
+
+       Stall-warning messages may be enabled and disabled completely via
+       /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
 RCU_STALL_DELAY_DELTA
 ---------------------
 
index c34d2212eaca23fca6bd8025be82a6534682d470..77ea260efd1207e797d84bf895c6c7d26063467a 100644 (file)
@@ -224,7 +224,7 @@ synchronize_rcu()
        be delayed.  This property results in system resilience in face
        of denial-of-service attacks.  Code using call_rcu() should limit
        update rate in order to gain this same sort of resilience.  See
-       checklist.txt for some approaches to limiting the update rate.
+       checklist.rst for some approaches to limiting the update rate.
 
 rcu_assign_pointer()
 ^^^^^^^^^^^^^^^^^^^^
@@ -318,7 +318,7 @@ rcu_dereference()
        must prohibit.  The rcu_dereference_protected() variant takes
        a lockdep expression to indicate which locks must be acquired
        by the caller. If the indicated protection is not provided,
-       a lockdep splat is emitted.  See Documentation/RCU/Design/Requirements/Requirements.rst
+       a lockdep splat is emitted.  See Design/Requirements/Requirements.rst
        and the API's code comments for more details and example usage.
 
 ..     [2] If the list_for_each_entry_rcu() instance might be used by
@@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon.
 
 This section shows a simple use of the core RCU API to protect a
 global pointer to a dynamically allocated structure.  More-typical
-uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
-:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
+uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
 ::
 
        struct foo {
@@ -482,10 +481,9 @@ So, to sum up:
        RCU read-side critical sections that might be referencing that
        data item.
 
-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
-<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
-<NMI_rcu_doc>`.
+See checklist.rst for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.rst,
+arrayRCU.rst, and NMI-RCU.rst.
 
 .. _4_whatisRCU:
 
@@ -579,7 +577,7 @@ to avoid having to write your own callback::
 
        kfree_rcu(old_fp, rcu);
 
-Again, see checklist.txt for additional rules governing the use of RCU.
+Again, see checklist.rst for additional rules governing the use of RCU.
 
 .. _5_whatisRCU:
 
@@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise.  The smp_mb__after_spinlock()
 promotes synchronize_rcu() to a full memory barrier in compliance with
 the "Memory-Barrier Guarantees" listed in:
 
-       Documentation/RCU/Design/Requirements/Requirements.rst
+       Design/Requirements/Requirements.rst
 
 It is possible to nest rcu_read_lock(), since reader-writer locks may
 be recursively acquired.  Note also that rcu_read_lock() is immune
index 860fe651d6453eed1652f721cb048c8b3840be33..5e40b3f437f90c2d202198aa6bfac5707fa7ac63 100644 (file)
@@ -37,11 +37,7 @@ Pressure interface
 Pressure information for each resource is exported through the
 respective file in /proc/pressure/ -- cpu, memory, and io.
 
-The format for CPU is as such::
-
-       some avg10=0.00 avg60=0.00 avg300=0.00 total=0
-
-and for memory and IO::
+The format is as such::
 
        some avg10=0.00 avg60=0.00 avg300=0.00 total=0
        full avg10=0.00 avg60=0.00 avg300=0.00 total=0
@@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
 still doing productive work. As such, time spent in this subset of the
 stall state is tracked separately and exported in the "full" averages.
 
+CPU full is undefined at the system level, but has been reported
+since 5.13, so it is set to zero for backward compatibility.
+
 The ratios (in %) are tracked as recent trends over ten, sixty, and
 three hundred second windows, which gives insight into short term events
 as well as medium and long term trends. The total absolute stall time
index 3f1cc5e317ed4a5ad001082c9c589b6008f68db9..2d67cdfbf9c9ac7cc78b56ab182391ad66189ac8 100644 (file)
                        Defaults to zero when built as a module and to
                        10 seconds when built into the kernel.
 
-       clearcpuid=BITNUM[,BITNUM...] [X86]
+       clearcpuid=X[,X...] [X86]
                        Disable CPUID feature X for the kernel. See
                        arch/x86/include/asm/cpufeatures.h for the valid bit
-                       numbers. Note the Linux specific bits are not necessarily
-                       stable over kernel options, but the vendor specific
+                       numbers X. Note the Linux-specific bits are not necessarily
+                       stable over kernel options, but the vendor-specific
                        ones should be.
+                       X can also be a string as appearing in the flags: line
+                       in /proc/cpuinfo which does not have the above
+                       instability issue. However, not all features have names
+                       in /proc/cpuinfo.
+                       Note that using this option will taint your kernel.
                        Also note that user programs calling CPUID directly
                        or using the feature without checking anything
                        will still see it. This just prevents it from
                        Documentation/admin-guide/kdump/kdump.rst for an example.
 
        crashkernel=size[KMG],high
-                       [KNL, X86-64] range could be above 4G. Allow kernel
+                       [KNL, X86-64, ARM64] range could be above 4G. Allow kernel
                        to allocate physical memory region from top, so could
                        be above 4G if system have more than 4G ram installed.
                        Otherwise memory region will be allocated below 4G, if
                        that require some amount of low memory, e.g. swiotlb
                        requires at least 64M+32K low memory, also enough extra
                        low memory is needed to make sure DMA buffers for 32-bit
-                       devices won't run out. Kernel would try to allocate at
+                       devices won't run out. Kernel would try to allocate
                        at least 256M below 4G automatically.
-                       This one let user to specify own low range under 4G
+                       This one lets the user specify own low range under 4G
                        for second kernel instead.
                        0: to disable low allocation.
                        It will be ignored when crashkernel=X,high is not used
                        or memory reserved is below 4G.
 
+                       [KNL, ARM64] range in low memory.
+                       This one lets the user specify a low range in the
+                       DMA zone for the crash dump kernel.
+                       It will be ignored when crashkernel=X,high is not used
+                       or memory reserved is located in the DMA zones.
+
        cryptomgr.notests
                        [KNL] Disable crypto self-tests
 
 
        ima_template=   [IMA]
                        Select one of defined IMA measurements template formats.
-                       Formats: { "ima" | "ima-ng" | "ima-sig" }
+                       Formats: { "ima" | "ima-ng" | "ima-ngv2" | "ima-sig" |
+                                  "ima-sigv2" }
                        Default: "ima-ng"
 
        ima_template_fmt=
                        when set.
                        Format: <int>
 
-       libata.force=   [LIBATA] Force configurations.  The format is comma-
-                       separated list of "[ID:]VAL" where ID is
-                       PORT[.DEVICE].  PORT and DEVICE are decimal numbers
-                       matching port, link or device.  Basically, it matches
-                       the ATA ID string printed on console by libata.  If
-                       the whole ID part is omitted, the last PORT and DEVICE
-                       values are used.  If ID hasn't been specified yet, the
-                       configuration applies to all ports, links and devices.
+       libata.force=   [LIBATA] Force configurations.  The format is comma-
+                       separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
+                       PORT and DEVICE are decimal numbers matching port, link
+                       or device.  Basically, it matches the ATA ID string
+                       printed on console by libata.  If the whole ID part is
+                       omitted, the last PORT and DEVICE values are used.  If
+                       ID hasn't been specified yet, the configuration applies
+                       to all ports, links and devices.
 
                        If only DEVICE is omitted, the parameter applies to
                        the port and all links and devices behind it.  DEVICE
                        host link and device attached to it.
 
                        The VAL specifies the configuration to force.  As long
-                       as there's no ambiguity shortcut notation is allowed.
+                       as there is no ambiguity, shortcut notation is allowed.
                        For example, both 1.5 and 1.5G would work for 1.5Gbps.
                        The following configurations can be forced.
 
                          udma[/][16,25,33,44,66,100,133] notation is also
                          allowed.
 
+                       * nohrst, nosrst, norst: suppress hard, soft and both
+                         resets.
+
+                       * rstonce: only attempt one reset during hot-unplug
+                         link recovery.
+
+                       * [no]dbdelay: Enable or disable the extra 200ms delay
+                         before debouncing a link PHY and device presence
+                         detection.
+
                        * [no]ncq: Turn on or off NCQ.
 
-                       * [no]ncqtrim: Turn off queued DSM TRIM.
+                       * [no]ncqtrim: Enable or disable queued DSM TRIM.
+
+                       * [no]ncqati: Enable or disable NCQ trim on ATI chipset.
+
+                       * [no]trim: Enable or disable (unqueued) TRIM.
+
+                       * trim_zero: Indicate that TRIM command zeroes data.
+
+                       * max_trim_128m: Set 128M maximum trim size limit.
+
+                       * [no]dma: Turn on or off DMA transfers.
+
+                       * atapi_dmadir: Enable ATAPI DMADIR bridge support.
+
+                       * atapi_mod16_dma: Enable the use of ATAPI DMA for
+                         commands that are not a multiple of 16 bytes.
+
+                       * [no]dmalog: Enable or disable the use of the
+                         READ LOG DMA EXT command to access logs.
+
+                       * [no]iddevlog: Enable or disable access to the
+                         identify device data log.
 
-                       * nohrst, nosrst, norst: suppress hard, soft
-                         and both resets.
+                       * [no]logdir: Enable or disable access to the general
+                         purpose log directory.
 
-                       * rstonce: only attempt one reset during
-                         hot-unplug link recovery
+                       * max_sec_128: Set transfer size limit to 128 sectors.
 
-                       * dump_id: dump IDENTIFY data.
+                       * max_sec_1024: Set or clear transfer size limit to
+                         1024 sectors.
 
-                       * atapi_dmadir: Enable ATAPI DMADIR bridge support
+                       * max_sec_lba48: Set or clear transfer size limit to
+                         65535 sectors.
+
+                       * [no]lpm: Enable or disable link power management.
+
+                       * [no]setxfer: Indicate if transfer speed mode setting
+                         should be skipped.
+
+                       * dump_id: Dump IDENTIFY data.
 
                        * disable: Disable this device.
 
                                               mds=off [X86]
                                               tsx_async_abort=off [X86]
                                               kvm.nx_huge_pages=off [X86]
+                                              srbds=off [X86,INTEL]
                                               no_entry_flush [PPC]
                                               no_uaccess_flush [PPC]
 
 
        nocache         [ARM]
 
-       noclflush       [BUGS=X86] Don't use the CLFLUSH instruction
-
        delayacct       [KNL] Enable per-task delay accounting
 
        nodsp           [SH] Disable hardware DSP at boot time.
 
        noexec          [IA-64]
 
-       noexec          [X86]
-                       On X86-32 available only on PAE configured kernels.
-                       noexec=on: enable non-executable mappings (default)
-                       noexec=off: disable non-executable mappings
-
-       nosmap          [X86,PPC]
+       nosmap          [PPC]
                        Disable SMAP (Supervisor Mode Access Prevention)
                        even if it is supported by processor.
 
-       nosmep          [X86,PPC64s]
+       nosmep          [PPC64s]
                        Disable SMEP (Supervisor Mode Execution Prevention)
                        even if it is supported by processor.
 
 
        nosbagart       [IA-64]
 
-       nosep           [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
-
        nosgx           [X86-64,SGX] Disables Intel SGX kernel support.
 
        nosmp           [SMP] Tells an SMP kernel to act as a UP kernel,
 
        rcupdate.rcu_cpu_stall_timeout= [KNL]
                        Set timeout for RCU CPU stall warning messages.
+                       The value is in seconds and the maximum allowed
+                       value is 300 seconds.
+
+       rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+                       Set timeout for expedited RCU CPU stall warning
+                       messages.  The value is in milliseconds
+                       and the maximum allowed value is 21000
+                       milliseconds. Please note that this value is
+                       adjusted to an arch timer tick resolution.
+                       Setting this to zero causes the value from
+                       rcupdate.rcu_cpu_stall_timeout to be used (after
+                       conversion from seconds to milliseconds).
 
        rcupdate.rcu_expedited= [KNL]
                        Use expedited grace-period primitives, for
                        number avoids disturbing real-time workloads,
                        but lengthens grace periods.
 
+       rcupdate.rcu_task_stall_info= [KNL]
+                       Set initial timeout in jiffies for RCU task stall
+                       informational messages, which give some indication
+                       of the problem for those not patient enough to
+                       wait for ten minutes.  Informational messages are
+                       only printed prior to the stall-warning message
+                       for a given grace period. Disable with a value
+                       less than or equal to zero.  Defaults to ten
+                       seconds.  A change in value does not take effect
+                       until the beginning of the next grace period.
+
+       rcupdate.rcu_task_stall_info_mult= [KNL]
+                       Multiplier for time interval between successive
+                       RCU task stall informational messages for a given
+                       RCU tasks grace period.  This value is clamped
+                       to one through ten, inclusive.  It defaults to
+                       the value three, so that the first informational
+                       message is printed 10 seconds into the grace
+                       period, the second at 40 seconds, the third at
+                       160 seconds, and then the stall warning at 600
+                       seconds would prevent a fourth at 640 seconds.
+
        rcupdate.rcu_task_stall_timeout= [KNL]
-                       Set timeout in jiffies for RCU task stall warning
-                       messages.  Disable with a value less than or equal
-                       to zero.
+                       Set timeout in jiffies for RCU task stall
+                       warning messages.  Disable with a value less
+                       than or equal to zero.  Defaults to ten minutes.
+                       A change in value does not take effect until
+                       the beginning of the next grace period.
 
        rcupdate.rcu_self_test= [KNL]
                        Run the RCU early boot self tests
 
        serialnumber    [BUGS=X86-32]
 
+       sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst
+
        shapers=        [NET]
                        Maximal number of shapers.
 
        smart2=         [HW]
                        Format: <io1>[,<io2>[,...,<io8>]]
 
+       smp.csd_lock_timeout= [KNL]
+                       Specify the period of time in milliseconds
+                       that smp_call_function() and friends will wait
+                       for a CPU to release the CSD lock.  This is
+                       useful when diagnosing bugs involving CPUs
+                       disabling interrupts for extended periods
+                       of time.  Defaults to 5,000 milliseconds, and
+                       setting a value of zero disables this feature.
+                       This feature may be more efficiently disabled
+                       using the csdlock_debug- kernel parameter.
+
        smsc-ircc2.nopnp        [HW] Don't use PNP to discover SMC devices
        smsc-ircc2.ircc_cfg=    [HW] Device configuration I/O port
        smsc-ircc2.ircc_sir=    [HW] SIR base I/O port
                        off:    Disable mitigation and remove
                                performance impact to RDRAND and RDSEED
 
+       srcutree.big_cpu_lim [KNL]
+                       Specifies the number of CPUs constituting a
+                       large system, such that srcu_struct structures
+                       should immediately allocate an srcu_node array.
+                       This kernel-boot parameter defaults to 128,
+                       but takes effect only when the low-order four
+                       bits of srcutree.convert_to_big is equal to 3
+                       (decide at boot).
+
+       srcutree.convert_to_big [KNL]
+                       Specifies under what conditions an SRCU tree
+                       srcu_struct structure will be converted to big
+                       form, that is, with an rcu_node tree:
+
+                                  0:  Never.
+                                  1:  At init_srcu_struct() time.
+                                  2:  When rcutorture decides to.
+                                  3:  Decide at boot time (default).
+                               0x1X:  Above plus if high contention.
+
+                       Either way, the srcu_node tree will be sized based
+                       on the actual runtime number of CPUs (nr_cpu_ids)
+                       instead of the compile-time CONFIG_NR_CPUS.
+
        srcutree.counter_wrap_check [KNL]
                        Specifies how frequently to check for
                        grace-period sequence counter wrap for the
                        expediting.  Set to zero to disable automatic
                        expediting.
 
+       srcutree.small_contention_lim [KNL]
+                       Specifies the number of update-side contention
+                       events per jiffy will be tolerated before
+                       initiating a conversion of an srcu_struct
+                       structure to big form.  Note that the value of
+                       srcutree.convert_to_big must have the 0x10 bit
+                       set for contention-based conversions to occur.
+
        ssbd=           [ARM64,HW]
                        Speculative Store Bypass Disable control
 
                        sources:
                        - "tpm"
                        - "tee"
+                       - "caam"
                        If not specified then it defaults to iterating through
                        the trust source list starting with TPM and assigns the
                        first trust source as a backend which is initialized
                        successfully during iteration.
 
+       trusted.rng=    [KEYS]
+                       Format: <string>
+                       The RNG used to generate key material for trusted keys.
+                       Can be one of:
+                       - "kernel"
+                       - the same value as trusted.source: "tpm" or "tee"
+                       - "default"
+                       If not specified, "default" is used. In this case,
+                       the RNG's choice is left to each individual trust source.
+
        tsc=            Disable clocksource stability checks for TSC.
                        Format: <string>
                        [x86] reliable: mark tsc clocksource as reliable, this
index 1144ea3229a37d7bff99844351cda2d3b0d4a1cc..e9c18dabc5523bcb60a95e039b14ee4ba031e70e 100644 (file)
@@ -994,6 +994,9 @@ This is a directory, with the following entries:
 * ``boot_id``: a UUID generated the first time this is retrieved, and
   unvarying after that;
 
+* ``uuid``: a UUID generated every time this is retrieved (this can
+  thus be used to generate UUIDs at will);
+
 * ``entropy_avail``: the pool's entropy count, in bits;
 
 * ``poolsize``: the entropy pool size, in bits;
@@ -1001,10 +1004,7 @@ This is a directory, with the following entries:
 * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
   number of seconds between urandom pool reseeding). This file is
   writable for compatibility purposes, but writing to it has no effect
-  on any RNG behavior.
-
-* ``uuid``: a UUID generated every time this is retrieved (this can
-  thus be used to generate UUIDs at will);
+  on any RNG behavior;
 
 * ``write_wakeup_threshold``: when the entropy count drops below this
   (as a number of bits), processes waiting to write to ``/dev/random``
index 29884b261aa9cc40f73dc3cc91fe1d719da835b4..8aefa1001ae522c0a09ac87608f2b4d8d94ff296 100644 (file)
@@ -350,6 +350,16 @@ Before jumping into the kernel, the following conditions must be met:
 
     - SMCR_EL2.FA64 (bit 31) must be initialised to 0b1.
 
+  For CPUs with the Memory Tagging Extension feature (FEAT_MTE2):
+
+  - If EL3 is present:
+
+    - SCR_EL3.ATA (bit 26) must be initialised to 0b1.
+
+  - If the kernel is entered at EL1 and EL2 is present:
+
+    - HCR_EL2.ATA (bit 56) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented
index a8f30963e550d372763b3b1a63a7fb38d733b533..f8d818eaaff59f65ade8dc9809a40980ce169be8 100644 (file)
@@ -264,6 +264,39 @@ HWCAP2_MTE3
     Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
     by Documentation/arm64/memory-tagging-extension.rst.
 
+HWCAP2_SME
+
+    Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
+    by Documentation/arm64/sme.rst.
+
+HWCAP2_SME_I16I64
+
+    Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
+
+HWCAP2_SME_F64F64
+
+    Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
+
+HWCAP2_SME_I8I32
+
+    Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
+
+HWCAP2_SME_F16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
+
+HWCAP2_SME_B16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
+
+HWCAP2_SME_F32F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
+
+HWCAP2_SME_FA64
+
+    Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
index 4f840bac083eb5f8374da17c8ae806ba3957c6cc..ae21f811883022a4990a4bcd2a0f0226d5410a60 100644 (file)
@@ -21,6 +21,7 @@ ARM64 Architecture
     perf
     pointer-authentication
     silicon-errata
+    sme
     sve
     tagged-address-abi
     tagged-pointers
index 466cb9e89047fb51e5d2308d44cf4b16fd1e1113..d27db84d585ed223e7a4cbe220ebcbd1ff6ddaa6 100644 (file)
@@ -189,6 +189,9 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo4xx Silver  | N/A             | ARM64_ERRATUM_1024718       |
 +----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Gold    | N/A             | ARM64_ERRATUM_1286807       |
++----------------+-----------------+-----------------+-----------------------------+
+
 +----------------+-----------------+-----------------+-----------------------------+
 | Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arm64/sme.rst b/Documentation/arm64/sme.rst
new file mode 100644 (file)
index 0000000..8ba677b
--- /dev/null
@@ -0,0 +1,428 @@
+===================================================
+Scalable Matrix Extension support for AArch64 Linux
+===================================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Matrix Extension (SME).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.  It should be read in conjunction with the SVE
+documentation in sve.rst which provides details on the Streaming SVE mode
+included in SME.
+
+This document does not aim to describe the SME architecture or programmer's
+model.  To aid understanding, a minimal description of relevant programmer's
+model features for SME is included in Appendix A.
+
+
+1.  General
+-----------
+
+* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
+  register state and TPIDR2_EL0 are tracked per thread.
+
+* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
+  AT_HWCAP2 entry.  Presence of this flag implies the presence of the SME
+  instructions and registers, and the Linux-specific system interfaces
+  described in this document.  SME is reported in /proc/cpuinfo as "sme".
+
+* Support for the execution of SME instructions in userspace can also be
+  detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
+  instruction, and checking that the value of the SME field is nonzero. [3]
+
+  It does not guarantee the presence of the system interfaces described in the
+  following sections: software that needs to verify that those interfaces are
+  present must check for HWCAP2_SME instead.
+
+* There are a number of optional SME features, presence of these is reported
+  through AT_HWCAP2 through:
+
+       HWCAP2_SME_I16I64
+       HWCAP2_SME_F64F64
+       HWCAP2_SME_I8I32
+       HWCAP2_SME_F16F32
+       HWCAP2_SME_B16F32
+       HWCAP2_SME_F32F32
+       HWCAP2_SME_FA64
+
+  This list may be extended over time as the SME architecture evolves.
+
+  These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
+  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
+  cpu-feature-registers.txt for details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+  NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets.  The recommended way
+  of detecting support for these regsets is to connect to a target process
+  first and then attempt a
+
+       ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
+
+* Whenever ZA register values are exchanged in memory between userspace and
+  the kernel, the register value is encoded in memory as a series of horizontal
+  vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
+  used for SVE vectors.
+
+* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
+  in which case it is set to 0.
+
+2.  Vector lengths
+------------------
+
+SME defines a second vector length similar to the SVE vector length which is
+controls the size of the streaming mode SVE vectors and the ZA matrix array.
+The ZA matrix is square with each side having as many bytes as a streaming
+mode SVE vector.
+
+
+3.  Sharing of streaming and non-streaming mode SVE state
+---------------------------------------------------------
+
+It is implementation defined which if any parts of the SVE state are shared
+between streaming and non-streaming modes.  When switching between modes
+via software interfaces such as ptrace if no register content is provided as
+part of switching no state will be assumed to be shared and everything will
+be zeroed.
+
+
+4.  System call behaviour
+-------------------------
+
+* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
+  ZA matrix are preserved.
+
+* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
+  as per the standard SVE ABI.
+
+* Neither the SVE registers nor ZA are used to pass arguments to or receive
+  results from any syscall.
+
+* On process creation (eg, clone()) the newly created process will have
+  PSTATE.SM cleared.
+
+* All other SME state of a thread, including the currently configured vector
+  length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
+  length (if any), is preserved across all syscalls, subject to the specific
+  exceptions for execve() described in section 6.
+
+
+5.  Signal handling
+-------------------
+
+* Signal handlers are invoked with streaming mode and ZA disabled.
+
+* A new signal frame record za_context encodes the ZA register contents on
+  signal delivery. [1]
+
+* The signal frame record for ZA always contains basic metadata, in particular
+  the thread's vector length (in za_context.vl).
+
+* The ZA matrix may or may not be included in the record, depending on
+  the value of PSTATE.ZA.  The registers are present if and only if:
+  za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+  in which case PSTATE.ZA == 1.
+
+* If matrix data is present, the remainder of the record has a vl-dependent
+  size and layout.  Macros ZA_SIG_* are defined [1] to facilitate access to
+  them.
+
+* The matrix is stored as a series of horizontal vectors in the same format as
+  is used for SVE vectors.
+
+* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
+  space is allocated on the stack, an extra_context record is written in
+  __reserved[] referencing this space.  za_context is then written in the
+  extra space.  Refer to [1] for further details about this mechanism.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no za_context record in the signal frame, or if the record is
+  present but contains no register data as described in the previous section,
+  then ZA is disabled.
+
+* If za_context is present in the signal frame and contains matrix data then
+  PSTATE.ZA is set to 1 and ZA is populated with the specified data.
+
+* The vector length cannot be changed via signal return.  If za_context.vl in
+  the signal frame does not match the current vector length, the signal return
+  attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6.  prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SME vector
+length:
+
+prctl(PR_SME_SET_VL, unsigned long arg)
+
+    Sets the vector length of the calling thread and related flags, where
+    arg == vl | flags.  Other threads of the calling process are unaffected.
+
+    vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+    flags:
+
+       PR_SME_VL_INHERIT
+
+           Inherit the current vector length across execve().  Otherwise, the
+           vector length is reset to the system default at execve().  (See
+           Section 9.)
+
+       PR_SME_SET_VL_ONEXEC
+
+           Defer the requested vector length change until the next execve()
+           performed by this thread.
+
+           The effect is equivalent to implicit execution of the following
+           call immediately after the next execve() (if any) by the thread:
+
+               prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
+
+           This allows launching of a new program with a different vector
+           length, while avoiding runtime side effects in the caller.
+
+           Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
+           immediately.
+
+
+    Return value: a nonnegative on success, or a negative value on error:
+       EINVAL: SME not supported, invalid vector length requested, or
+           invalid flags.
+
+
+    On success:
+
+    * Either the calling thread's vector length or the deferred vector length
+      to be applied at the next execve() by the thread (dependent on whether
+      PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
+      supported by the system that is less than or equal to vl.  If vl ==
+      SVE_VL_MAX, the value set will be the largest value supported by the
+      system.
+
+    * Any previously outstanding deferred vector length change in the calling
+      thread is cancelled.
+
+    * The returned value describes the resulting configuration, encoded as for
+      PR_SME_GET_VL.  The vector length reported in this value is the new
+      current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
+      present in arg; otherwise, the reported vector length is the deferred
+      vector length that will be applied at the next execve() by the calling
+      thread.
+
+    * Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
+      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      unspecified, including both streaming and non-streaming SVE state.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+    * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SME_GET_VL)
+
+    Gets the vector length of the calling thread.
+
+    The following flag may be OR-ed into the result:
+
+       PR_SME_VL_INHERIT
+
+           Vector length will be inherited across execve().
+
+    There is no way to determine whether there is an outstanding deferred
+    vector length change (which would only normally be the case between a
+    fork() or vfork() and the corresponding execve() in typical use).
+
+    To extract the vector length from the result, bitwise and it with
+    PR_SME_VL_LEN_MASK.
+
+    Return value: a nonnegative value on success, or a negative value on error:
+       EINVAL: SME not supported.
+
+
+7.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
+  state via PTRACE_GETREGSET and  PTRACE_SETREGSET, this is documented in
+  sve.rst.
+
+* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
+  PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+  Refer to [2] for definitions.
+
+The regset data starts with struct user_za_header, containing:
+
+    size
+
+       Size of the complete regset, in bytes.
+       This depends on vl and possibly on other things in the future.
+
+       If a call to PTRACE_GETREGSET requests less data than the value of
+       size, the caller can allocate a larger buffer and retry in order to
+       read the complete regset.
+
+    max_size
+
+       Maximum size in bytes that the regset can grow to for the target
+       thread.  The regset won't grow bigger than this even if the target
+       thread changes its vector length etc.
+
+    vl
+
+       Target thread's current streaming vector length, in bytes.
+
+    max_vl
+
+       Maximum possible streaming vector length for the target thread.
+
+    flags
+
+       Zero or more of the following flags, which have the same
+       meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+           SME_PT_VL_INHERIT
+
+           SME_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+  those documented for PR_SME_SET_VL.
+
+  The caller must make a further GETREGSET call if it needs to know what VL is
+  actually set by SETREGSET, unless is it known in advance that the requested
+  VL is supported.
+
+* The size and layout of the payload depends on the header fields.  The
+  SME_PT_ZA_*() macros are provided to facilitate access to the data.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+  case the vector length and flags are changed and PSTATE.ZA is set to 0
+  (along with any consequences of those changes).  If a payload is provided
+  then PSTATE.ZA will be set to 1.
+
+* For SETREGSET, if the requested VL is not supported, the effect will be the
+  same as if the payload were omitted, except that an EIO error is reported.
+  No attempt is made to translate the payload data to the correct layout
+  for the vector length actually set.  It is up to the caller to translate the
+  payload layout for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8.  ELF coredump extensions
+---------------------------
+
+* NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.
+
+* A NT_ARM_ZA note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
+  when the coredump was generated.
+
+
+9.  System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+  mechanism is provided for administrators, distro maintainers and developers
+  to set the default vector length for userspace processes:
+
+/proc/sys/abi/sme_default_vector_length
+
+    Writing the text representation of an integer to this file sets the system
+    default vector length to the specified value, unless the value is greater
+    than the maximum vector length supported by the system in which case the
+    default vector length is set to that maximum.
+
+    The result can be determined by reopening the file and reading its
+    contents.
+
+    At boot, the default vector length is initially set to 32 or the maximum
+    supported vector length, whichever is smaller and supported.  This
+    determines the initial vector length of the init process (PID 1).
+
+    Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+  the system default vector length, unless
+
+    * PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
+      calling thread, or
+
+    * a deferred vector length change is pending, established via the
+      PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+  of any existing process or thread that does not make an execve() call.
+
+
+Appendix A.  SME programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1.  Registers
+---------------
+
+In A64 state, SME adds the following:
+
+* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
+  features are available.  When supported EL0 software may enter and leave
+  streaming mode at any time.
+
+  For best system performance it is strongly encouraged for software to enable
+  streaming mode only when it is actively being used.
+
+* A new vector length controlling the size of ZA and the Z registers when in
+  streaming mode, separately to the vector length used for SVE when not in
+  streaming mode.  There is no requirement that either the currently selected
+  vector length or the set of vector lengths supported for the two modes in
+  a given system have any relationship.  The streaming mode vector length
+  is referred to as SVL.
+
+* A new ZA matrix register.  This is a square matrix of SVLxSVL bits.  Most
+  operations on ZA require that streaming mode be enabled but ZA can be
+  enabled without streaming mode in order to load, save and retain data.
+
+  For best system performance it is strongly encouraged for software to enable
+  ZA only when it is actively being used.
+
+* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
+  SMSTOP instructions or by access to the SVCR system register:
+
+  * PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
+    data while if it is 0 then ZA can not be accessed.  When PSTATE.ZA is
+    changed from 0 to 1 all bits in ZA are cleared.
+
+  * PSTATE.SM, if this is 1 then the PE is in streaming mode.  When the value
+    of PSTATE.SM is changed then it is implementation defined if the subset
+    of the floating point register bits valid in both modes may be retained.
+    Any other bits will be cleared.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+    AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+    AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arm64/cpu-feature-registers.rst
index 9d9a4de5bc34a31444dba9945e8f5c042160cbbb..93c2c299058497c2a2dee3a14fcd7056e47d9134 100644 (file)
@@ -7,7 +7,9 @@ Author: Dave Martin <Dave.Martin@arm.com>
 Date:   4 August 2017
 
 This document outlines briefly the interface provided to userspace by Linux in
-order to support use of the ARM Scalable Vector Extension (SVE).
+order to support use of the ARM Scalable Vector Extension (SVE), including
+interactions with Streaming SVE mode added by the Scalable Matrix Extension
+(SME).
 
 This is an outline of the most important features and issues only and not
 intended to be exhaustive.
@@ -23,6 +25,10 @@ model features for SVE is included in Appendix A.
 * SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
   tracked per-thread.
 
+* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
+  in the system, when it is not supported and these interfaces are used to
+  access streaming mode FFR is read and written as zero.
+
 * The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
   AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
   instructions and registers, and the Linux-specific system interfaces
@@ -53,10 +59,19 @@ model features for SVE is included in Appendix A.
   which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
   cpu-feature-registers.txt for details.
 
+* On hardware that supports the SME extensions, HWCAP2_SME will also be
+  reported in the AT_HWCAP2 aux vector entry.  Among other things SME adds
+  streaming mode which provides a subset of the SVE feature set using a
+  separate SME vector length and the same Z/V registers.  See sme.rst
+  for more details.
+
 * Debuggers should restrict themselves to interacting with the target via the
   NT_ARM_SVE regset.  The recommended way of detecting support for this regset
   is to connect to a target process first and then attempt a
-  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).  Note that when SME is
+  present and streaming SVE mode is in use the FPSIMD subset of registers
+  will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
+  in the target.
 
 * Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
   between userspace and the kernel, the register value is encoded in memory in
@@ -126,6 +141,11 @@ the SVE instruction set architecture.
   are only present in fpsimd_context.  For convenience, the content of V0..V31
   is duplicated between sve_context and fpsimd_context.
 
+* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
+  if set indicates that the thread is in streaming mode and the vector length
+  and register data (if present) describe the streaming SVE data and vector
+  length.
+
 * The signal frame record for SVE always contains basic metadata, in particular
   the thread's vector length (in sve_context.vl).
 
@@ -170,6 +190,11 @@ When returning from a signal handler:
   the signal frame does not match the current vector length, the signal return
   attempt is treated as illegal, resulting in a forced SIGSEGV.
 
+* It is permitted to enter or leave streaming mode by setting or clearing
+  the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
+  when doing so sve_context.vl and any register data are appropriate for the
+  vector length in the new mode.
+
 
 6.  prctl extensions
 --------------------
@@ -265,8 +290,14 @@ prctl(PR_SVE_GET_VL)
 7.  ptrace extensions
 ---------------------
 
-* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
-  PTRACE_SETREGSET.
+* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
+  PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
+  streaming mode SVE registers and NT_ARM_SVE describes the
+  non-streaming mode SVE registers.
+
+  In this description a register set is referred to as being "live" when
+  the target is in the appropriate streaming or non-streaming mode and is
+  using data beyond the subset shared with the FPSIMD Vn registers.
 
   Refer to [2] for definitions.
 
@@ -297,7 +328,7 @@ The regset data starts with struct user_sve_header, containing:
 
     flags
 
-       either
+       at most one of
 
            SVE_PT_REGS_FPSIMD
 
@@ -331,6 +362,10 @@ The regset data starts with struct user_sve_header, containing:
 
            SVE_PT_VL_ONEXEC (SETREGSET only).
 
+       If neither FPSIMD nor SVE flags are provided then no register
+       payload is available, this is only possible when SME is implemented.
+
+
 * The effects of changing the vector length and/or flags are equivalent to
   those documented for PR_SVE_SET_VL.
 
@@ -346,6 +381,13 @@ The regset data starts with struct user_sve_header, containing:
   case only the vector length and flags are changed (along with any
   consequences of those changes).
 
+* In systems supporting SME when in streaming mode a GETREGSET for
+  NT_REG_SVE will return only the user_sve_header with no register data,
+  similarly a GETREGSET for NT_REG_SSVE will not return any register data
+  when not in streaming mode.
+
+* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
+
 * For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
   requested VL is not supported, the effect will be the same as if the
   payload were omitted, except that an EIO error is reported.  No
@@ -355,17 +397,25 @@ The regset data starts with struct user_sve_header, containing:
   unspecified.  It is up to the caller to translate the payload layout
   for the actual VL and retry.
 
+* Where SME is implemented it is not possible to GETREGSET the register
+  state for normal SVE when in streaming mode, nor the streaming mode
+  register state when in normal mode, regardless of the implementation defined
+  behaviour of the hardware for sharing data between the two modes.
+
+* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
+  streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
+  if the target was not in streaming mode.
+
 * The effect of writing a partial, incomplete payload is unspecified.
 
 
 8.  ELF coredump extensions
 ---------------------------
 
-* A NT_ARM_SVE note will be added to each coredump for each thread of the
-  dumped process.  The contents will be equivalent to the data that would have
-  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
-  when the coredump was generated.
-
+* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.
 
 9.  System runtime configuration
 --------------------------------
index 52ea7b6b2fe8eb668c6c7fbd4389124de19af0a4..7964fe134277b8b8337d620e524de7d24a15354e 100644 (file)
@@ -218,7 +218,6 @@ current *struct* is::
                int (*tray_move)(struct cdrom_device_info *, int);
                int (*lock_door)(struct cdrom_device_info *, int);
                int (*select_speed)(struct cdrom_device_info *, int);
-               int (*select_disc)(struct cdrom_device_info *, int);
                int (*get_last_session) (struct cdrom_device_info *,
                                         struct cdrom_multisession *);
                int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@@ -419,15 +418,6 @@ this `auto-selection` capability, the decision should be made on the
 current disc loaded and the return value should be positive. A negative
 return value indicates an error.
 
-::
-
-       int select_disc(struct cdrom_device_info *cdi, int number)
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
 ::
 
        int get_last_session(struct cdrom_device_info *cdi,
index 729e24864fe738a3f5bd79a62d56d9e7c4542c85..22ec68f244210681d89ab36445ecf3f67f7a597d 100644 (file)
@@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
 .. c:function:: u64 ktime_get_mono_fast_ns( void )
                u64 ktime_get_raw_fast_ns( void )
                u64 ktime_get_boot_fast_ns( void )
+               u64 ktime_get_tai_fast_ns( void )
                u64 ktime_get_real_fast_ns( void )
 
        These variants are safe to call from any context, including from
index c060c7914cae6573c68af4f4f0cd9eaf17c0ad7b..c4e4a9eab658056d96b9708a4f8b9146adc360fc 100644 (file)
@@ -26,6 +26,7 @@ properties:
       - items:
           - enum:
               - renesas,sata-r8a774b1     # RZ/G2N
+              - renesas,sata-r8a774e1     # RZ/G2H
               - renesas,sata-r8a7795      # R-Car H3
               - renesas,sata-r8a77965     # R-Car M3-N
           - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
deleted file mode 100644 (file)
index 58fc8a6..0000000
+++ /dev/null
@@ -1,212 +0,0 @@
-* Rockchip rk3399 DMC (Dynamic Memory Controller) device
-
-Required properties:
-- compatible:           Must be "rockchip,rk3399-dmc".
-- devfreq-events:       Node to get DDR loading, Refer to
-                        Documentation/devicetree/bindings/devfreq/event/
-                        rockchip-dfi.txt
-- clocks:               Phandles for clock specified in "clock-names" property
-- clock-names :                 The name of clock used by the DFI, must be
-                        "pclk_ddr_mon";
-- operating-points-v2:  Refer to Documentation/devicetree/bindings/opp/opp-v2.yaml
-                        for details.
-- center-supply:        DMC supply node.
-- status:               Marks the node enabled/disabled.
-- rockchip,pmu:                 Phandle to the syscon managing the "PMU general register
-                        files".
-
-Optional properties:
-- interrupts:           The CPU interrupt number. The interrupt specifier
-                        format depends on the interrupt controller.
-                        It should be a DCF interrupt. When DDR DVFS finishes
-                        a DCF interrupt is triggered.
-- rockchip,pmu:                 Phandle to the syscon managing the "PMU general register
-                        files".
-
-Following properties relate to DDR timing:
-
-- rockchip,dram_speed_bin :      Value reference include/dt-bindings/clock/rk3399-ddr.h,
-                                 it selects the DDR3 cl-trp-trcd type. It must be
-                                 set according to "Speed Bin" in DDR3 datasheet,
-                                 DO NOT use a smaller "Speed Bin" than specified
-                                 for the DDR3 being used.
-
-- rockchip,pd_idle :             Configure the PD_IDLE value. Defines the
-                                 power-down idle period in which memories are
-                                 placed into power-down mode if bus is idle
-                                 for PD_IDLE DFI clock cycles.
-
-- rockchip,sr_idle :             Configure the SR_IDLE value. Defines the
-                                 self-refresh idle period in which memories are
-                                 placed into self-refresh mode if bus is idle
-                                 for SR_IDLE * 1024 DFI clock cycles (DFI
-                                 clocks freq is half of DRAM clock), default
-                                 value is "0".
-
-- rockchip,sr_mc_gate_idle :     Defines the memory self-refresh and controller
-                                 clock gating idle period. Memories are placed
-                                 into self-refresh mode and memory controller
-                                 clock arg gating started if bus is idle for
-                                 sr_mc_gate_idle*1024 DFI clock cycles.
-
-- rockchip,srpd_lite_idle :      Defines the self-refresh power down idle
-                                 period in which memories are placed into
-                                 self-refresh power down mode if bus is idle
-                                 for srpd_lite_idle * 1024 DFI clock cycles.
-                                 This parameter is for LPDDR4 only.
-
-- rockchip,standby_idle :        Defines the standby idle period in which
-                                 memories are placed into self-refresh mode.
-                                 The controller, pi, PHY and DRAM clock will
-                                 be gated if bus is idle for standby_idle * DFI
-                                 clock cycles.
-
-- rockchip,dram_dll_dis_freq :   Defines the DDR3 DLL bypass frequency in MHz.
-                                 When DDR frequency is less than DRAM_DLL_DISB_FREQ,
-                                 DDR3 DLL will be bypassed. Note: if DLL was bypassed,
-                                 the odt will also stop working.
-
-- rockchip,phy_dll_dis_freq :    Defines the PHY dll bypass frequency in
-                                 MHz (Mega Hz). When DDR frequency is less than
-                                 DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
-                                 Note: PHY DLL and PHY ODT are independent.
-
-- rockchip,ddr3_odt_dis_freq :   When the DRAM type is DDR3, this parameter defines
-                                 the ODT disable frequency in MHz (Mega Hz).
-                                 when the DDR frequency is  less then ddr3_odt_dis_freq,
-                                 the ODT on the DRAM side and controller side are
-                                 both disabled.
-
-- rockchip,ddr3_drv :            When the DRAM type is DDR3, this parameter defines
-                                 the DRAM side driver strength in ohms. Default
-                                 value is 40.
-
-- rockchip,ddr3_odt :            When the DRAM type is DDR3, this parameter defines
-                                 the DRAM side ODT strength in ohms. Default value
-                                 is 120.
-
-- rockchip,phy_ddr3_ca_drv :     When the DRAM type is DDR3, this parameter defines
-                                 the phy side CA line (incluing command line,
-                                 address line and clock line) driver strength.
-                                 Default value is 40.
-
-- rockchip,phy_ddr3_dq_drv :     When the DRAM type is DDR3, this parameter defines
-                                 the PHY side DQ line (including DQS/DQ/DM line)
-                                 driver strength. Default value is 40.
-
-- rockchip,phy_ddr3_odt :        When the DRAM type is DDR3, this parameter defines
-                                 the PHY side ODT strength. Default value is 240.
-
-- rockchip,lpddr3_odt_dis_freq : When the DRAM type is LPDDR3, this parameter defines
-                                 then ODT disable frequency in MHz (Mega Hz).
-                                 When DDR frequency is less then ddr3_odt_dis_freq,
-                                 the ODT on the DRAM side and controller side are
-                                 both disabled.
-
-- rockchip,lpddr3_drv :                  When the DRAM type is LPDDR3, this parameter defines
-                                 the DRAM side driver strength in ohms. Default
-                                 value is 34.
-
-- rockchip,lpddr3_odt :                  When the DRAM type is LPDDR3, this parameter defines
-                                 the DRAM side ODT strength in ohms. Default value
-                                 is 240.
-
-- rockchip,phy_lpddr3_ca_drv :   When the DRAM type is LPDDR3, this parameter defines
-                                 the PHY side CA line (including command line,
-                                 address line and clock line) driver strength.
-                                 Default value is 40.
-
-- rockchip,phy_lpddr3_dq_drv :   When the DRAM type is LPDDR3, this parameter defines
-                                 the PHY side DQ line (including DQS/DQ/DM line)
-                                 driver strength. Default value is 40.
-
-- rockchip,phy_lpddr3_odt :      When dram type is LPDDR3, this parameter define
-                                 the phy side odt strength, default value is 240.
-
-- rockchip,lpddr4_odt_dis_freq : When the DRAM type is LPDDR4, this parameter
-                                 defines the ODT disable frequency in
-                                 MHz (Mega Hz). When the DDR frequency is less then
-                                 ddr3_odt_dis_freq, the ODT on the DRAM side and
-                                 controller side are both disabled.
-
-- rockchip,lpddr4_drv :                  When the DRAM type is LPDDR4, this parameter defines
-                                 the DRAM side driver strength in ohms. Default
-                                 value is 60.
-
-- rockchip,lpddr4_dq_odt :       When the DRAM type is LPDDR4, this parameter defines
-                                 the DRAM side ODT on DQS/DQ line strength in ohms.
-                                 Default value is 40.
-
-- rockchip,lpddr4_ca_odt :       When the DRAM type is LPDDR4, this parameter defines
-                                 the DRAM side ODT on CA line strength in ohms.
-                                 Default value is 40.
-
-- rockchip,phy_lpddr4_ca_drv :   When the DRAM type is LPDDR4, this parameter defines
-                                 the PHY side CA line (including command address
-                                 line) driver strength. Default value is 40.
-
-- rockchip,phy_lpddr4_ck_cs_drv : When the DRAM type is LPDDR4, this parameter defines
-                                 the PHY side clock line and CS line driver
-                                 strength. Default value is 80.
-
-- rockchip,phy_lpddr4_dq_drv :   When the DRAM type is LPDDR4, this parameter defines
-                                 the PHY side DQ line (including DQS/DQ/DM line)
-                                 driver strength. Default value is 80.
-
-- rockchip,phy_lpddr4_odt :      When the DRAM type is LPDDR4, this parameter defines
-                                 the PHY side ODT strength. Default value is 60.
-
-Example:
-       dmc_opp_table: dmc_opp_table {
-               compatible = "operating-points-v2";
-
-               opp00 {
-                       opp-hz = /bits/ 64 <300000000>;
-                       opp-microvolt = <900000>;
-               };
-               opp01 {
-                       opp-hz = /bits/ 64 <666000000>;
-                       opp-microvolt = <900000>;
-               };
-       };
-
-       dmc: dmc {
-               compatible = "rockchip,rk3399-dmc";
-               devfreq-events = <&dfi>;
-               interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
-               clocks = <&cru SCLK_DDRC>;
-               clock-names = "dmc_clk";
-               operating-points-v2 = <&dmc_opp_table>;
-               center-supply = <&ppvar_centerlogic>;
-               upthreshold = <15>;
-               downdifferential = <10>;
-               rockchip,ddr3_speed_bin = <21>;
-               rockchip,pd_idle = <0x40>;
-               rockchip,sr_idle = <0x2>;
-               rockchip,sr_mc_gate_idle = <0x3>;
-               rockchip,srpd_lite_idle = <0x4>;
-               rockchip,standby_idle = <0x2000>;
-               rockchip,dram_dll_dis_freq = <300>;
-               rockchip,phy_dll_dis_freq = <125>;
-               rockchip,auto_pd_dis_freq = <666>;
-               rockchip,ddr3_odt_dis_freq = <333>;
-               rockchip,ddr3_drv = <40>;
-               rockchip,ddr3_odt = <120>;
-               rockchip,phy_ddr3_ca_drv = <40>;
-               rockchip,phy_ddr3_dq_drv = <40>;
-               rockchip,phy_ddr3_odt = <240>;
-               rockchip,lpddr3_odt_dis_freq = <333>;
-               rockchip,lpddr3_drv = <34>;
-               rockchip,lpddr3_odt = <240>;
-               rockchip,phy_lpddr3_ca_drv = <40>;
-               rockchip,phy_lpddr3_dq_drv = <40>;
-               rockchip,phy_lpddr3_odt = <240>;
-               rockchip,lpddr4_odt_dis_freq = <333>;
-               rockchip,lpddr4_drv = <60>;
-               rockchip,lpddr4_dq_odt = <40>;
-               rockchip,lpddr4_ca_odt = <40>;
-               rockchip,phy_lpddr4_ca_drv = <40>;
-               rockchip,phy_lpddr4_ck_cs_drv = <80>;
-               rockchip,phy_lpddr4_dq_drv = <80>;
-               rockchip,phy_lpddr4_odt = <60>;
-       };
index 7d9c083632b9419fd34a4a8c2495be5ed0403d32..22beb37f1bf12c59942e7352d1a745b5c4b0aa34 100644 (file)
@@ -61,6 +61,26 @@ patternProperties:
     $ref: /schemas/types.yaml#/definitions/uint32
     enum: [0, 1]
 
+  "adi,pin(5|10)-function":
+    description: |
+      Configures the function for pin 5 on the adi,adt7473 and adi,adt7475. Or
+      pin 10 on the adi,adt7476 and adi,adt7490.
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - pwm2
+      - smbalert#
+
+  "adi,pin(9|14)-function":
+    description: |
+      Configures the function for pin 9 on the adi,adt7473 and adi,adt7475. Or
+      pin 14 on the adi,adt7476 and adi,adt7490
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - tach4
+      - therm#
+      - smbalert#
+      - gpio
+
 required:
   - compatible
   - reg
@@ -79,6 +99,8 @@ examples:
         adi,bypass-attenuator-in0 = <1>;
         adi,bypass-attenuator-in1 = <0>;
         adi,pwm-active-state = <1 0 1>;
+        adi,pin10-function = "smbalert#";
+        adi,pin14-function = "tach4";
       };
     };
 
index 72980d083c210b675e0f4c5a1d9ce389d57d6011..8226e3b5d028e2bb670f8b6cabf5326f66080b84 100644 (file)
@@ -14,6 +14,7 @@ properties:
   compatible:
     enum:
       - adi,adt75
+      - atmel,at30ts74
       - dallas,ds1775
       - dallas,ds75
       - dallas,ds7505
diff --git a/Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml b/Documentation/devicetree/bindings/hwmon/microchip,lan966x.yaml
new file mode 100644 (file)
index 0000000..390dd67
--- /dev/null
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/microchip,lan966x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip LAN966x Hardware Monitor
+
+maintainers:
+  - Michael Walle <michael@walle.cc>
+
+description: |
+  Microchip LAN966x temperature monitor and fan controller
+
+properties:
+  compatible:
+    enum:
+      - microchip,lan9668-hwmon
+
+  reg:
+    items:
+      - description: PVT registers
+      - description: FAN registers
+
+  reg-names:
+    items:
+      - const: pvt
+      - const: fan
+
+  clocks:
+    maxItems: 1
+
+  '#thermal-sensor-cells':
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    hwmon: hwmon@e2010180 {
+        compatible = "microchip,lan9668-hwmon";
+        reg = <0xe2010180 0xc>,
+              <0xe20042a8 0xc>;
+        reg-names = "pvt", "fan";
+        clocks = <&sys_clk>;
+        #thermal-sensor-cells = <0>;
+    };
index 30db92977937b2310ca65200b5a80acd5bc20829..b046578498524d882c932bdf92cca78175f196a1 100644 (file)
@@ -34,6 +34,7 @@ properties:
       - nxp,sa56004
       - onnn,nct1008
       - ti,tmp451
+      - ti,tmp461
       - winbond,w83l771
 
 
@@ -52,10 +53,29 @@ properties:
   vcc-supply:
     description: phandle to the regulator that provides the +VCC supply
 
+  ti,extended-range-enable:
+    description: Set to enable extended range temperature.
+    type: boolean
+
 required:
   - compatible
   - reg
 
+allOf:
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - adi,adt7461
+                - adi,adt7461a
+                - ti,tmp451
+                - ti,tmp461
+    then:
+      properties:
+        ti,extended-range-enable: false
+
 additionalProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml b/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml
new file mode 100644 (file)
index 0000000..358b262
--- /dev/null
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/hwmon/nuvoton,nct6775.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton NCT6775 and compatible Super I/O chips
+
+maintainers:
+  - Zev Weiss <zev@bewilderbeest.net>
+
+properties:
+  compatible:
+    enum:
+      - nuvoton,nct6106
+      - nuvoton,nct6116
+      - nuvoton,nct6775
+      - nuvoton,nct6776
+      - nuvoton,nct6779
+      - nuvoton,nct6791
+      - nuvoton,nct6792
+      - nuvoton,nct6793
+      - nuvoton,nct6795
+      - nuvoton,nct6796
+      - nuvoton,nct6797
+      - nuvoton,nct6798
+
+  reg:
+    maxItems: 1
+
+  nuvoton,tsi-channel-mask:
+    description:
+      Bitmask indicating which TSI temperature sensor channels are
+      active.  LSB is TSI0, bit 1 is TSI1, etc.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    maximum: 0xff
+    default: 0
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        superio@4d {
+            compatible = "nuvoton,nct6779";
+            reg = <0x4d>;
+            nuvoton,tsi-channel-mask = <0x03>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
new file mode 100644 (file)
index 0000000..fe0ac08
--- /dev/null
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/ti,tmp401.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TMP401, TPM411 and TMP43x temperature sensor
+
+maintainers:
+  - Guenter Roeck <linux@roeck-us.net>
+
+description: |
+  ±1°C Remote and Local temperature sensor
+
+  Datasheets:
+  https://www.ti.com/lit/ds/symlink/tmp401.pdf
+  https://www.ti.com/lit/ds/symlink/tmp411.pdf
+  https://www.ti.com/lit/ds/symlink/tmp431.pdf
+  https://www.ti.com/lit/ds/symlink/tmp435.pdf
+
+properties:
+  compatible:
+    enum:
+      - ti,tmp401
+      - ti,tmp411
+      - ti,tmp431
+      - ti,tmp432
+      - ti,tmp435
+
+  reg:
+    maxItems: 1
+
+  ti,extended-range-enable:
+    description:
+      When set, this sensor measures over extended temperature range.
+    type: boolean
+
+  ti,n-factor:
+    description:
+      value to be used for converting remote channel measurements to
+      temperature.
+    $ref: /schemas/types.yaml#/definitions/int32
+    items:
+      minimum: -128
+      maximum: 127
+
+  ti,beta-compensation:
+    description:
+      value to select beta correction range.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 15
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,tmp401
+    then:
+      properties:
+        ti,n-factor: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,tmp401
+              - ti,tmp411
+    then:
+      properties:
+        ti,beta-compensation: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      sensor@4c {
+        compatible = "ti,tmp401";
+        reg = <0x4c>;
+      };
+    };
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      sensor@4c {
+        compatible = "ti,tmp431";
+        reg = <0x4c>;
+        ti,extended-range-enable;
+        ti,n-factor = <0x3b>;
+        ti,beta-compensation = <0x7>;
+      };
+    };
index b1770640f94bb378c7650709233db3b2e95db084..03ebd2665d0781e69ab388fc11e08bd63803f07f 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Mediatek's Keypad Controller device tree bindings
 
 maintainers:
-  - Fengping Yu <fengping.yu@mediatek.com>
+  - Mattijs Korpershoek <mkorpershoek@baylibre.com>
 
 allOf:
   - $ref: "/schemas/input/matrix-keymap.yaml#"
index b7197f78e1588754bab736f414af8833cc82f283..3912a89162f02b79daf687e9b4927104420251f2 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Generic Interrupt Controller, version 3
 
 maintainers:
-  - Marc Zyngier <marc.zyngier@arm.com>
+  - Marc Zyngier <maz@kernel.org>
 
 description: |
   AArch64 SMP cores are often associated with a GICv3, providing Private
@@ -78,7 +78,11 @@ properties:
       - GIC Hypervisor interface (GICH)
       - GIC Virtual CPU interface (GICV)
 
-      GICC, GICH and GICV are optional.
+      GICC, GICH and GICV are optional, but must be described if the CPUs
+      support them. Examples of such CPUs are ARM's implementations of the
+      ARMv8.0 architecture such as Cortex-A32, A34, A35, A53, A57, A72 and
+      A73 (this list is not exhaustive).
+
     minItems: 2
     maxItems: 4096   # Should be enough?
 
index af5147f9da7201d1d6a9e4d2a308fd67c226a628..84f778a99546bba898f7221f04296ee35b753f7b 100644 (file)
@@ -25,12 +25,6 @@ properties:
           - const: fsl,qoriq-memory-controller
       - enum:
           - fsl,bsc9132-memory-controller
-          - fsl,8540-memory-controller
-          - fsl,8541-memory-controller
-          - fsl,8544-memory-controller
-          - fsl,8548-memory-controller
-          - fsl,8555-memory-controller
-          - fsl,8568-memory-controller
           - fsl,mpc8536-memory-controller
           - fsl,mpc8540-memory-controller
           - fsl,mpc8541-memory-controller
diff --git a/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml
new file mode 100644 (file)
index 0000000..fb49203
--- /dev/null
@@ -0,0 +1,384 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# %YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/rockchip,rk3399-dmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3399 DMC (Dynamic Memory Controller) device
+
+maintainers:
+  - Brian Norris <briannorris@chromium.org>
+
+properties:
+  compatible:
+    enum:
+      - rockchip,rk3399-dmc
+
+  devfreq-events:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Node to get DDR loading. Refer to
+      Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt.
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: dmc_clk
+
+  operating-points-v2: true
+
+  center-supply:
+    description:
+      DMC regulator supply.
+
+  rockchip,pmu:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon managing the "PMU general register files".
+
+  interrupts:
+    maxItems: 1
+    description:
+      The CPU interrupt number. It should be a DCF interrupt. When DDR DVFS
+      finishes, a DCF interrupt is triggered.
+
+  rockchip,ddr3_speed_bin:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      For values, reference include/dt-bindings/clock/rk3399-ddr.h. Selects the
+      DDR3 cl-trp-trcd type. It must be set according to "Speed Bin" in DDR3
+      datasheet; DO NOT use a smaller "Speed Bin" than specified for the DDR3
+      being used.
+
+  rockchip,pd_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Configure the PD_IDLE value. Defines the power-down idle period in which
+      memories are placed into power-down mode if bus is idle for PD_IDLE DFI
+      clock cycles.
+      See also rockchip,pd-idle-ns.
+
+  rockchip,sr_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Configure the SR_IDLE value. Defines the self-refresh idle period in
+      which memories are placed into self-refresh mode if bus is idle for
+      SR_IDLE * 1024 DFI clock cycles (DFI clocks freq is half of DRAM clock).
+      See also rockchip,sr-idle-ns.
+    default: 0
+
+  rockchip,sr_mc_gate_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the memory self-refresh and controller clock gating idle period.
+      Memories are placed into self-refresh mode and memory controller clock
+      arg gating started if bus is idle for sr_mc_gate_idle*1024 DFI clock
+      cycles.
+      See also rockchip,sr-mc-gate-idle-ns.
+
+  rockchip,srpd_lite_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the self-refresh power down idle period in which memories are
+      placed into self-refresh power down mode if bus is idle for
+      srpd_lite_idle * 1024 DFI clock cycles. This parameter is for LPDDR4
+      only.
+      See also rockchip,srpd-lite-idle-ns.
+
+  rockchip,standby_idle:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the standby idle period in which memories are placed into
+      self-refresh mode. The controller, pi, PHY and DRAM clock will be gated
+      if bus is idle for standby_idle * DFI clock cycles.
+      See also rockchip,standby-idle-ns.
+
+  rockchip,dram_dll_dis_freq:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Defines the DDR3 DLL bypass frequency in MHz. When DDR frequency is less
+      than DRAM_DLL_DISB_FREQ, DDR3 DLL will be bypassed.
+      Note: if DLL was bypassed, the odt will also stop working.
+
+  rockchip,phy_dll_dis_freq:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Defines the PHY dll bypass frequency in MHz (Mega Hz). When DDR frequency
+      is less than DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
+      Note: PHY DLL and PHY ODT are independent.
+
+  rockchip,auto_pd_dis_freq:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Defines the auto PD disable frequency in MHz.
+
+  rockchip,ddr3_odt_dis_freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1000000  # In case anyone thought this was MHz.
+    description:
+      When the DRAM type is DDR3, this parameter defines the ODT disable
+      frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq,
+      the ODT on the DRAM side and controller side are both disabled.
+
+  rockchip,ddr3_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the DRAM side drive
+      strength in ohms.
+    default: 40
+
+  rockchip,ddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the DRAM side ODT
+      strength in ohms.
+    default: 120
+
+  rockchip,phy_ddr3_ca_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the phy side CA line
+      (incluing command line, address line and clock line) drive strength.
+    default: 40
+
+  rockchip,phy_ddr3_dq_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the PHY side DQ line
+      (including DQS/DQ/DM line) drive strength.
+    default: 40
+
+  rockchip,phy_ddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is DDR3, this parameter defines the PHY side ODT
+      strength.
+    default: 240
+
+  rockchip,lpddr3_odt_dis_freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1000000  # In case anyone thought this was MHz.
+    description:
+      When the DRAM type is LPDDR3, this parameter defines then ODT disable
+      frequency in Hz. When DDR frequency is less then ddr3_odt_dis_freq, the
+      ODT on the DRAM side and controller side are both disabled.
+
+  rockchip,lpddr3_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the DRAM side drive
+      strength in ohms.
+    default: 34
+
+  rockchip,lpddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the DRAM side ODT
+      strength in ohms.
+    default: 240
+
+  rockchip,phy_lpddr3_ca_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the PHY side CA line
+      (including command line, address line and clock line) drive strength.
+    default: 40
+
+  rockchip,phy_lpddr3_dq_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR3, this parameter defines the PHY side DQ line
+      (including DQS/DQ/DM line) drive strength.
+    default: 40
+
+  rockchip,phy_lpddr3_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When dram type is LPDDR3, this parameter define the phy side odt
+      strength, default value is 240.
+
+  rockchip,lpddr4_odt_dis_freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1000000  # In case anyone thought this was MHz.
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the ODT disable
+      frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq,
+      the ODT on the DRAM side and controller side are both disabled.
+
+  rockchip,lpddr4_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the DRAM side drive
+      strength in ohms.
+    default: 60
+
+  rockchip,lpddr4_dq_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on
+      DQS/DQ line strength in ohms.
+    default: 40
+
+  rockchip,lpddr4_ca_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on
+      CA line strength in ohms.
+    default: 40
+
+  rockchip,phy_lpddr4_ca_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side CA line
+      (including command address line) drive strength.
+    default: 40
+
+  rockchip,phy_lpddr4_ck_cs_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side clock
+      line and CS line drive strength.
+    default: 80
+
+  rockchip,phy_lpddr4_dq_drv:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side DQ line
+      (including DQS/DQ/DM line) drive strength.
+    default: 80
+
+  rockchip,phy_lpddr4_odt:
+    deprecated: true
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      When the DRAM type is LPDDR4, this parameter defines the PHY side ODT
+      strength.
+    default: 60
+
+  rockchip,pd-idle-ns:
+    description:
+      Configure the PD_IDLE value in nanoseconds. Defines the power-down idle
+      period in which memories are placed into power-down mode if bus is idle
+      for PD_IDLE nanoseconds.
+
+  rockchip,sr-idle-ns:
+    description:
+      Configure the SR_IDLE value in nanoseconds. Defines the self-refresh idle
+      period in which memories are placed into self-refresh mode if bus is idle
+      for SR_IDLE nanoseconds.
+    default: 0
+
+  rockchip,sr-mc-gate-idle-ns:
+    description:
+      Defines the memory self-refresh and controller clock gating idle period in nanoseconds.
+      Memories are placed into self-refresh mode and memory controller clock
+      arg gating started if bus is idle for sr_mc_gate_idle nanoseconds.
+
+  rockchip,srpd-lite-idle-ns:
+    description:
+      Defines the self-refresh power down idle period in which memories are
+      placed into self-refresh power down mode if bus is idle for
+      srpd_lite_idle nanoseonds. This parameter is for LPDDR4 only.
+
+  rockchip,standby-idle-ns:
+    description:
+      Defines the standby idle period in which memories are placed into
+      self-refresh mode. The controller, pi, PHY and DRAM clock will be gated
+      if bus is idle for standby_idle nanoseconds.
+
+  rockchip,pd-idle-dis-freq-hz:
+    description:
+      Defines the power-down idle disable frequency in Hz. When the DDR
+      frequency is greater than pd-idle-dis-freq, power-down idle is disabled.
+      See also rockchip,pd-idle-ns.
+
+  rockchip,sr-idle-dis-freq-hz:
+    description:
+      Defines the self-refresh idle disable frequency in Hz. When the DDR
+      frequency is greater than sr-idle-dis-freq, self-refresh idle is
+      disabled. See also rockchip,sr-idle-ns.
+
+  rockchip,sr-mc-gate-idle-dis-freq-hz:
+    description:
+      Defines the self-refresh and memory-controller clock gating disable
+      frequency in Hz. When the DDR frequency is greater than
+      sr-mc-gate-idle-dis-freq, the clock will not be gated when idle. See also
+      rockchip,sr-mc-gate-idle-ns.
+
+  rockchip,srpd-lite-idle-dis-freq-hz:
+    description:
+      Defines the self-refresh power down idle disable frequency in Hz. When
+      the DDR frequency is greater than srpd-lite-idle-dis-freq, memory will
+      not be placed into self-refresh power down mode when idle. See also
+      rockchip,srpd-lite-idle-ns.
+
+  rockchip,standby-idle-dis-freq-hz:
+    description:
+      Defines the standby idle disable frequency in Hz. When the DDR frequency
+      is greater than standby-idle-dis-freq, standby idle is disabled. See also
+      rockchip,standby-idle-ns.
+
+required:
+  - compatible
+  - devfreq-events
+  - clocks
+  - clock-names
+  - operating-points-v2
+  - center-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rk3399-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    memory-controller {
+      compatible = "rockchip,rk3399-dmc";
+      devfreq-events = <&dfi>;
+      rockchip,pmu = <&pmu>;
+      interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru SCLK_DDRC>;
+      clock-names = "dmc_clk";
+      operating-points-v2 = <&dmc_opp_table>;
+      center-supply = <&ppvar_centerlogic>;
+      rockchip,pd-idle-ns = <160>;
+      rockchip,sr-idle-ns = <10240>;
+      rockchip,sr-mc-gate-idle-ns = <40960>;
+      rockchip,srpd-lite-idle-ns = <61440>;
+      rockchip,standby-idle-ns = <81920>;
+      rockchip,ddr3_odt_dis_freq = <333000000>;
+      rockchip,lpddr3_odt_dis_freq = <333000000>;
+      rockchip,lpddr4_odt_dis_freq = <333000000>;
+      rockchip,pd-idle-dis-freq-hz = <1000000000>;
+      rockchip,sr-idle-dis-freq-hz = <1000000000>;
+      rockchip,sr-mc-gate-idle-dis-freq-hz = <1000000000>;
+      rockchip,srpd-lite-idle-dis-freq-hz = <0>;
+      rockchip,standby-idle-dis-freq-hz = <928000000>;
+    };
index dccd5ad969817af30c38b1a886af2313a544b43e..b672202fff4e424ada684c9d3f12ed91fb54853a 100644 (file)
@@ -31,7 +31,7 @@ properties:
           - const: brcm,sdhci-brcmstb
 
   reg:
-    minItems: 2
+    maxItems: 2
 
   reg-names:
     items:
@@ -65,15 +65,15 @@ unevaluatedProperties: false
 examples:
   - |
     mmc@84b0000 {
-      sd-uhs-sdr50;
-      sd-uhs-ddr50;
-      sd-uhs-sdr104;
-      sdhci,auto-cmd12;
       compatible = "brcm,bcm7216-sdhci",
                    "brcm,bcm7445-sdhci",
                    "brcm,sdhci-brcmstb";
       reg = <0x84b0000 0x260>, <0x84b0300 0x200>;
       reg-names = "host", "cfg";
+      sd-uhs-sdr50;
+      sd-uhs-ddr50;
+      sd-uhs-sdr104;
+      sdhci,auto-cmd12;
       interrupts = <0x0 0x26 0x4>;
       interrupt-names = "sdio0_0";
       clocks = <&scmi_clk 245>;
@@ -81,6 +81,11 @@ examples:
     };
 
     mmc@84b1000 {
+      compatible = "brcm,bcm7216-sdhci",
+                   "brcm,bcm7445-sdhci",
+                   "brcm,sdhci-brcmstb";
+      reg = <0x84b1000 0x260>, <0x84b1300 0x200>;
+      reg-names = "host", "cfg";
       mmc-ddr-1_8v;
       mmc-hs200-1_8v;
       mmc-hs400-1_8v;
@@ -88,11 +93,6 @@ examples:
       supports-cqe;
       non-removable;
       bus-width = <0x8>;
-      compatible = "brcm,bcm7216-sdhci",
-           "brcm,bcm7445-sdhci",
-            "brcm,sdhci-brcmstb";
-      reg = <0x84b1000 0x260>, <0x84b1300 0x200>;
-      reg-names = "host", "cfg";
       interrupts = <0x0 0x27 0x4>;
       interrupt-names = "sdio1_0";
       clocks = <&scmi_clk 245>;
index 7dbbcae9485cc79c97897b0e52b1f7f479867cb6..29339d0196ecb57e4a8d3fdda7efe8f55b46a940 100644 (file)
@@ -34,22 +34,47 @@ properties:
           - fsl,imx6ull-usdhc
           - fsl,imx7d-usdhc
           - fsl,imx7ulp-usdhc
+          - fsl,imx8mm-usdhc
           - fsl,imxrt1050-usdhc
           - nxp,s32g2-usdhc
       - items:
           - enum:
+              - fsl,imx8mq-usdhc
+          - const: fsl,imx7d-usdhc
+      - items:
+          - enum:
+              - fsl,imx8mn-usdhc
+              - fsl,imx8mp-usdhc
+              - fsl,imx93-usdhc
+              - fsl,imx8ulp-usdhc
+          - const: fsl,imx8mm-usdhc
+      - items:
+          - enum:
+              - fsl,imx8qm-usdhc
+          - const: fsl,imx8qxp-usdhc
+      - items:
+          - enum:
+              - fsl,imx8dxl-usdhc
               - fsl,imx8mm-usdhc
               - fsl,imx8mn-usdhc
               - fsl,imx8mp-usdhc
-              - fsl,imx8mq-usdhc
               - fsl,imx8qm-usdhc
               - fsl,imx8qxp-usdhc
           - const: fsl,imx7d-usdhc
+        deprecated: true
       - items:
           - enum:
-              - fsl,imx93-usdhc
-              - fsl,imx8ulp-usdhc
+              - fsl,imx8mn-usdhc
+              - fsl,imx8mp-usdhc
           - const: fsl,imx8mm-usdhc
+          - const: fsl,imx7d-usdhc
+        deprecated: true
+      - items:
+          - enum:
+              - fsl,imx8qm-usdhc
+          - const: fsl,imx8qxp-usdhc
+          - const: fsl,imx7d-usdhc
+        deprecated: true
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml b/Documentation/devicetree/bindings/mmc/marvell,dove-sdhci.yaml
new file mode 100644 (file)
index 0000000..7c9c652
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/marvell,dove-sdhci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell sdhci-dove controller
+
+maintainers:
+  - Adrian Hunter <adrian.hunter@intel.com>
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+allOf:
+  - $ref: mmc-controller.yaml#
+
+properties:
+  compatible:
+    const: marvell,dove-sdhci
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    sdio0: mmc@92000 {
+      compatible = "marvell,dove-sdhci";
+      reg = <0x92000 0x100>;
+      interrupts = <35>;
+      clocks = <&gate_clk 9>;
+    };
diff --git a/Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml b/Documentation/devicetree/bindings/mmc/marvell,orion-sdio.yaml
new file mode 100644 (file)
index 0000000..8a97ded
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/marvell,orion-sdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell orion-sdio controller
+
+maintainers:
+  - Nicolas Pitre <nico@fluxnic.net>
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+allOf:
+  - $ref: mmc-controller.yaml#
+
+properties:
+  compatible:
+    const: marvell,orion-sdio
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mmc@d00d4000 {
+      compatible = "marvell,orion-sdio";
+      reg = <0xd00d4000 0x200>;
+      interrupts = <54>;
+      clocks = <&gateclk 17>;
+    };
diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
deleted file mode 100644 (file)
index c51a62d..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-Marvell Xenon SDHCI Controller device tree bindings
-This file documents differences between the core mmc properties
-described by mmc.txt and the properties used by the Xenon implementation.
-
-Multiple SDHCs might be put into a single Xenon IP, to save size and cost.
-Each SDHC is independent and owns independent resources, such as register sets,
-clock and PHY.
-Each SDHC should have an independent device tree node.
-
-Required Properties:
-- compatible: should be one of the following
-  - "marvell,armada-3700-sdhci": For controllers on Armada-3700 SoC.
-  Must provide a second register area and marvell,pad-type.
-  - "marvell,armada-ap806-sdhci": For controllers on Armada AP806.
-  - "marvell,armada-ap807-sdhci": For controllers on Armada AP807.
-  - "marvell,armada-cp110-sdhci": For controllers on Armada CP110.
-
-- clocks:
-  Array of clocks required for SDHC.
-  Require at least input clock for Xenon IP core. For Armada AP806 and
-  CP110, the AXI clock is also mandatory.
-
-- clock-names:
-  Array of names corresponding to clocks property.
-  The input clock for Xenon IP core should be named as "core".
-  The input clock for the AXI bus must be named as "axi".
-
-- reg:
-  * For "marvell,armada-3700-sdhci", two register areas.
-    The first one for Xenon IP register. The second one for the Armada 3700 SoC
-    PHY PAD Voltage Control register.
-    Please follow the examples with compatible "marvell,armada-3700-sdhci"
-    in below.
-    Please also check property marvell,pad-type in below.
-
-  * For other compatible strings, one register area for Xenon IP.
-
-Optional Properties:
-- marvell,xenon-sdhc-id:
-  Indicate the corresponding bit index of current SDHC in
-  SDHC System Operation Control Register Bit[7:0].
-  Set/clear the corresponding bit to enable/disable current SDHC.
-  If Xenon IP contains only one SDHC, this property is optional.
-
-- marvell,xenon-phy-type:
-  Xenon support multiple types of PHYs.
-  To select eMMC 5.1 PHY, set:
-  marvell,xenon-phy-type = "emmc 5.1 phy"
-  eMMC 5.1 PHY is the default choice if this property is not provided.
-  To select eMMC 5.0 PHY, set:
-  marvell,xenon-phy-type = "emmc 5.0 phy"
-
-  All those types of PHYs can support eMMC, SD and SDIO.
-  Please note that this property only presents the type of PHY.
-  It doesn't stand for the entire SDHC type or property.
-  For example, "emmc 5.1 phy" doesn't mean that this Xenon SDHC only
-  supports eMMC 5.1.
-
-- marvell,xenon-phy-znr:
-  Set PHY ZNR value.
-  Only available for eMMC PHY.
-  Valid range = [0:0x1F].
-  ZNR is set as 0xF by default if this property is not provided.
-
-- marvell,xenon-phy-zpr:
-  Set PHY ZPR value.
-  Only available for eMMC PHY.
-  Valid range = [0:0x1F].
-  ZPR is set as 0xF by default if this property is not provided.
-
-- marvell,xenon-phy-nr-success-tun:
-  Set the number of required consecutive successful sampling points
-  used to identify a valid sampling window, in tuning process.
-  Valid range = [1:7].
-  Set as 0x4 by default if this property is not provided.
-
-- marvell,xenon-phy-tun-step-divider:
-  Set the divider for calculating TUN_STEP.
-  Set as 64 by default if this property is not provided.
-
-- marvell,xenon-phy-slow-mode:
-  If this property is selected, transfers will bypass PHY.
-  Only available when bus frequency lower than 55MHz in SDR mode.
-  Disabled by default. Please only try this property if timing issues
-  always occur with PHY enabled in eMMC HS SDR, SD SDR12, SD SDR25,
-  SD Default Speed and HS mode and eMMC legacy speed mode.
-
-- marvell,xenon-tun-count:
-  Xenon SDHC SoC usually doesn't provide re-tuning counter in
-  Capabilities Register 3 Bit[11:8].
-  This property provides the re-tuning counter.
-  If this property is not set, default re-tuning counter will
-  be set as 0x9 in driver.
-
-- marvell,pad-type:
-  Type of Armada 3700 SoC PHY PAD Voltage Controller register.
-  Only valid when "marvell,armada-3700-sdhci" is selected.
-  Two types: "sd" and "fixed-1-8v".
-  If "sd" is selected, SoC PHY PAD is set as 3.3V at the beginning and is
-  switched to 1.8V when later in higher speed mode.
-  If "fixed-1-8v" is selected, SoC PHY PAD is fixed 1.8V, such as for eMMC.
-  Please follow the examples with compatible "marvell,armada-3700-sdhci"
-  in below.
-
-Example:
-- For eMMC:
-
-       sdhci@aa0000 {
-               compatible = "marvell,armada-ap806-sdhci";
-               reg = <0xaa0000 0x1000>;
-               interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>
-               clocks = <&emmc_clk>,<&axi_clk>;
-               clock-names = "core", "axi";
-               bus-width = <4>;
-               marvell,xenon-phy-slow-mode;
-               marvell,xenon-tun-count = <11>;
-               non-removable;
-               no-sd;
-               no-sdio;
-
-               /* Vmmc and Vqmmc are both fixed */
-       };
-
-- For SD/SDIO:
-
-       sdhci@ab0000 {
-               compatible = "marvell,armada-cp110-sdhci";
-               reg = <0xab0000 0x1000>;
-               interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>
-               vqmmc-supply = <&sd_vqmmc_regulator>;
-               vmmc-supply = <&sd_vmmc_regulator>;
-               clocks = <&sdclk>, <&axi_clk>;
-               clock-names = "core", "axi";
-               bus-width = <4>;
-               marvell,xenon-tun-count = <9>;
-       };
-
-- For eMMC with compatible "marvell,armada-3700-sdhci":
-
-       sdhci@aa0000 {
-               compatible = "marvell,armada-3700-sdhci";
-               reg = <0xaa0000 0x1000>,
-                     <phy_addr 0x4>;
-               interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>
-               clocks = <&emmcclk>;
-               clock-names = "core";
-               bus-width = <8>;
-               mmc-ddr-1_8v;
-               mmc-hs400-1_8v;
-               non-removable;
-               no-sd;
-               no-sdio;
-
-               /* Vmmc and Vqmmc are both fixed */
-
-               marvell,pad-type = "fixed-1-8v";
-       };
-
-- For SD/SDIO with compatible "marvell,armada-3700-sdhci":
-
-       sdhci@ab0000 {
-               compatible = "marvell,armada-3700-sdhci";
-               reg = <0xab0000 0x1000>,
-                     <phy_addr 0x4>;
-               interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>
-               vqmmc-supply = <&sd_regulator>;
-               /* Vmmc is fixed */
-               clocks = <&sdclk>;
-               clock-names = "core";
-               bus-width = <4>;
-
-               marvell,pad-type = "sd";
-       };
diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
new file mode 100644 (file)
index 0000000..c79639e
--- /dev/null
@@ -0,0 +1,275 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/marvell,xenon-sdhci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Xenon SDHCI Controller
+
+description: |
+  This file documents differences between the core MMC properties described by
+  mmc-controller.yaml and the properties used by the Xenon implementation.
+
+  Multiple SDHCs might be put into a single Xenon IP, to save size and cost.
+  Each SDHC is independent and owns independent resources, such as register
+  sets, clock and PHY.
+
+  Each SDHC should have an independent device tree node.
+
+maintainers:
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - marvell,armada-cp110-sdhci
+          - marvell,armada-ap806-sdhci
+
+      - items:
+          - const: marvell,armada-ap807-sdhci
+          - const: marvell,armada-ap806-sdhci
+
+      - items:
+          - const: marvell,armada-3700-sdhci
+          - const: marvell,sdhci-xenon
+
+  reg:
+    minItems: 1
+    maxItems: 2
+    description: |
+      For "marvell,armada-3700-sdhci", two register areas.  The first one
+      for Xenon IP register. The second one for the Armada 3700 SoC PHY PAD
+      Voltage Control register.  Please follow the examples with compatible
+      "marvell,armada-3700-sdhci" in below.
+      Please also check property marvell,pad-type in below.
+
+      For other compatible strings, one register area for Xenon IP.
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    minItems: 1
+    items:
+      - const: core
+      - const: axi
+
+  marvell,xenon-sdhc-id:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 7
+    description: |
+      Indicate the corresponding bit index of current SDHC in SDHC System
+      Operation Control Register Bit[7:0].  Set/clear the corresponding bit to
+      enable/disable current SDHC.
+
+  marvell,xenon-phy-type:
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - "emmc 5.1 phy"
+      - "emmc 5.0 phy"
+    description: |
+      Xenon support multiple types of PHYs. To select eMMC 5.1 PHY, set:
+      marvell,xenon-phy-type = "emmc 5.1 phy" eMMC 5.1 PHY is the default
+      choice if this property is not provided.  To select eMMC 5.0 PHY, set:
+      marvell,xenon-phy-type = "emmc 5.0 phy"
+
+      All those types of PHYs can support eMMC, SD and SDIO. Please note that
+      this property only presents the type of PHY.  It doesn't stand for the
+      entire SDHC type or property.  For example, "emmc 5.1 phy" doesn't mean
+      that this Xenon SDHC only supports eMMC 5.1.
+
+  marvell,xenon-phy-znr:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 0x1f
+    default: 0xf
+    description: |
+      Set PHY ZNR value.
+      Only available for eMMC PHY.
+
+  marvell,xenon-phy-zpr:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 0x1f
+    default: 0xf
+    description: |
+      Set PHY ZPR value.
+      Only available for eMMC PHY.
+
+  marvell,xenon-phy-nr-success-tun:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    maximum: 7
+    default: 0x4
+    description: |
+      Set the number of required consecutive successful sampling points
+      used to identify a valid sampling window, in tuning process.
+
+  marvell,xenon-phy-tun-step-divider:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 64
+    description: |
+      Set the divider for calculating TUN_STEP.
+
+  marvell,xenon-phy-slow-mode:
+    type: boolean
+    description: |
+      If this property is selected, transfers will bypass PHY.
+      Only available when bus frequency lower than 55MHz in SDR mode.
+      Disabled by default. Please only try this property if timing issues
+      always occur with PHY enabled in eMMC HS SDR, SD SDR12, SD SDR25,
+      SD Default Speed and HS mode and eMMC legacy speed mode.
+
+  marvell,xenon-tun-count:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 0x9
+    description: |
+      Xenon SDHC SoC usually doesn't provide re-tuning counter in
+      Capabilities Register 3 Bit[11:8].
+      This property provides the re-tuning counter.
+
+allOf:
+  - $ref: mmc-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: marvell,armada-3700-sdhci
+
+    then:
+      properties:
+        reg:
+          items:
+            - description: Xenon IP registers
+            - description: Armada 3700 SoC PHY PAD Voltage Control register
+          minItems: 2
+
+        marvell,pad-type:
+          $ref: /schemas/types.yaml#/definitions/string
+          enum:
+            - sd
+            - fixed-1-8v
+          description: |
+            Type of Armada 3700 SoC PHY PAD Voltage Controller register.
+            If "sd" is selected, SoC PHY PAD is set as 3.3V at the beginning
+            and is switched to 1.8V when later in higher speed mode.
+            If "fixed-1-8v" is selected, SoC PHY PAD is fixed 1.8V, such as for
+            eMMC.
+            Please follow the examples with compatible
+            "marvell,armada-3700-sdhci" in below.
+
+      required:
+        - marvell,pad-type
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - marvell,armada-cp110-sdhci
+              - marvell,armada-ap807-sdhci
+              - marvell,armada-ap806-sdhci
+
+    then:
+      properties:
+        clocks:
+          minItems: 2
+
+        clock-names:
+          items:
+            - const: core
+            - const: axi
+
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    // For eMMC
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@aa0000 {
+      compatible = "marvell,armada-ap807-sdhci", "marvell,armada-ap806-sdhci";
+      reg = <0xaa0000 0x1000>;
+      interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&emmc_clk 0>, <&axi_clk 0>;
+      clock-names = "core", "axi";
+      bus-width = <4>;
+      marvell,xenon-phy-slow-mode;
+      marvell,xenon-tun-count = <11>;
+      non-removable;
+      no-sd;
+      no-sdio;
+
+      /* Vmmc and Vqmmc are both fixed */
+    };
+
+  - |
+    // For SD/SDIO
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@ab0000 {
+      compatible = "marvell,armada-cp110-sdhci";
+      reg = <0xab0000 0x1000>;
+      interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+      vqmmc-supply = <&sd_vqmmc_regulator>;
+      vmmc-supply = <&sd_vmmc_regulator>;
+      clocks = <&sdclk 0>, <&axi_clk 0>;
+      clock-names = "core", "axi";
+      bus-width = <4>;
+      marvell,xenon-tun-count = <9>;
+    };
+
+  - |
+    // For eMMC with compatible "marvell,armada-3700-sdhci":
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@aa0000 {
+      compatible = "marvell,armada-3700-sdhci", "marvell,sdhci-xenon";
+      reg = <0xaa0000 0x1000>,
+            <0x17808 0x4>;
+      interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&emmcclk 0>;
+      clock-names = "core";
+      bus-width = <8>;
+      mmc-ddr-1_8v;
+      mmc-hs400-1_8v;
+      non-removable;
+      no-sd;
+      no-sdio;
+
+      /* Vmmc and Vqmmc are both fixed */
+
+      marvell,pad-type = "fixed-1-8v";
+    };
+
+  - |
+    // For SD/SDIO with compatible "marvell,armada-3700-sdhci":
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mmc@ab0000 {
+      compatible = "marvell,armada-3700-sdhci", "marvell,sdhci-xenon";
+      reg = <0xab0000 0x1000>,
+            <0x17808 0x4>;
+      interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+      vqmmc-supply = <&sd_regulator>;
+      /* Vmmc is fixed */
+      clocks = <&sdclk 0>;
+      clock-names = "core";
+      bus-width = <4>;
+
+      marvell,pad-type = "sd";
+    };
index 513f3c8758aa49a130a878136c41b4e79638eef4..ff5ce89e511149d9efc95b0fbd0540feec9c3b78 100644 (file)
@@ -298,7 +298,10 @@ properties:
 
   vqmmc-supply:
     description:
-      Supply for the bus IO line power
+      Supply for the bus IO line power, such as a level shifter.
+      If the level shifter is controlled by a GPIO line, this shall
+      be modeled as a "regulator-fixed" with a GPIO line for
+      switching the level shifter on/off.
 
   mmc-pwrseq:
     $ref: /schemas/types.yaml#/definitions/phandle
index 297ada03e3decf5dd42fe431ad90253d3419c6ff..2a2e9fa8c1889e19a5bac42eaa454ea7515991a8 100644 (file)
@@ -40,7 +40,10 @@ properties:
           - const: mediatek,mt8183-mmc
 
   reg:
-    maxItems: 1
+    minItems: 1
+    items:
+      - description: base register (required).
+      - description: top base register (required for MT8183).
 
   clocks:
     description:
@@ -168,6 +171,16 @@ required:
   - vmmc-supply
   - vqmmc-supply
 
+if:
+  properties:
+    compatible:
+      contains:
+        const: mediatek,mt8183-mmc
+then:
+  properties:
+    reg:
+      minItems: 2
+
 unevaluatedProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/mmc/orion-sdio.txt b/Documentation/devicetree/bindings/mmc/orion-sdio.txt
deleted file mode 100644 (file)
index 10f0818..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-* Marvell orion-sdio controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the orion-sdio driver.
-
-- compatible: Should be "marvell,orion-sdio"
-- clocks: reference to the clock of the SDIO interface
-
-Example:
-
-       mvsdio@d00d4000 {
-               compatible = "marvell,orion-sdio";
-               reg = <0xd00d4000 0x200>;
-               interrupts = <54>;
-               clocks = <&gateclk 17>;
-       };
index 0566493c4def020546ee3157e6b7b10d748f7baf..0ab07759b47289c5950364b83cc2e38a310a0cdd 100644 (file)
@@ -186,6 +186,13 @@ properties:
     description: Clock Delay Buffer Select
     $ref: "/schemas/types.yaml#/definitions/uint32"
 
+  ti,fails-without-test-cd:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      When present, indicates that the CD line is not connected
+      and the controller is required to be forced into Test mode
+      to set the TESTCD bit.
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-dove.txt b/Documentation/devicetree/bindings/mmc/sdhci-dove.txt
deleted file mode 100644 (file)
index ae9aab9..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-* Marvell sdhci-dove controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the sdhci-pxav2 and sdhci-pxav3 drivers.
-
-- compatible: Should be "marvell,dove-sdhci".
-
-Example:
-
-sdio0: sdio@92000 {
-       compatible = "marvell,dove-sdhci";
-       reg = <0x92000 0x100>;
-       interrupts = <35>;
-};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
deleted file mode 100644 (file)
index 6216ed7..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-* Qualcomm SDHCI controller (sdhci-msm)
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the sdhci-msm driver.
-
-Required properties:
-- compatible: Should contain a SoC-specific string and a IP version string:
-       version strings:
-               "qcom,sdhci-msm-v4" for sdcc versions less than 5.0
-               "qcom,sdhci-msm-v5" for sdcc version 5.0
-               For SDCC version 5.0.0, MCI registers are removed from SDCC
-               interface and some registers are moved to HC. New compatible
-               string is added to support this change - "qcom,sdhci-msm-v5".
-       full compatible strings with SoC and version:
-               "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8226-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8953-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8916-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8992-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8994-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,msm8996-sdhci", "qcom,sdhci-msm-v4"
-               "qcom,qcs404-sdhci", "qcom,sdhci-msm-v5"
-               "qcom,sc7180-sdhci", "qcom,sdhci-msm-v5";
-               "qcom,sc7280-sdhci", "qcom,sdhci-msm-v5";
-               "qcom,sdm845-sdhci", "qcom,sdhci-msm-v5"
-               "qcom,sdx55-sdhci", "qcom,sdhci-msm-v5";
-               "qcom,sm8250-sdhci", "qcom,sdhci-msm-v5"
-       NOTE that some old device tree files may be floating around that only
-       have the string "qcom,sdhci-msm-v4" without the SoC compatible string
-       but doing that should be considered a deprecated practice.
-
-- reg: Base address and length of the register in the following order:
-       - Host controller register map (required)
-       - SD Core register map (required for controllers earlier than msm-v5)
-       - CQE register map (Optional, CQE support is present on SDHC instance meant
-                           for eMMC and version v4.2 and above)
-       - Inline Crypto Engine register map (optional)
-- reg-names: When CQE register map is supplied, below reg-names are required
-       - "hc" for Host controller register map
-       - "core" for SD core register map
-       - "cqhci" for CQE register map
-       - "ice" for Inline Crypto Engine register map (optional)
-- interrupts: Should contain an interrupt-specifiers for the interrupts:
-       - Host controller interrupt (required)
-- pinctrl-names: Should contain only one value - "default".
-- pinctrl-0: Should specify pin control groups used for this controller.
-- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock-names.
-- clock-names: Should contain the following:
-       "iface" - Main peripheral bus clock (PCLK/HCLK - AHB Bus clock) (required)
-       "core"  - SDC MMC clock (MCLK) (required)
-       "bus"   - SDCC bus voter clock (optional)
-       "xo"    - TCXO clock (optional)
-       "cal"   - reference clock for RCLK delay calibration (optional)
-       "sleep" - sleep clock for RCLK delay calibration (optional)
-       "ice" - clock for Inline Crypto Engine (optional)
-
-- qcom,ddr-config: Certain chipsets and platforms require particular settings
-       for the DDR_CONFIG register. Use this field to specify the register
-       value as per the Hardware Programming Guide.
-
-- qcom,dll-config: Chipset and Platform specific value. Use this field to
-       specify the DLL_CONFIG register value as per Hardware Programming Guide.
-
-Optional Properties:
-* Following bus parameters are required for interconnect bandwidth scaling:
-- interconnects: Pairs of phandles and interconnect provider specifier
-                to denote the edge source and destination ports of
-                the interconnect path.
-
-- interconnect-names: For sdhc, we have two main paths.
-               1. Data path : sdhc to ddr
-               2. Config path : cpu to sdhc
-               For Data interconnect path the name supposed to be
-               is "sdhc-ddr" and for config interconnect path it is
-               "cpu-sdhc".
-               Please refer to Documentation/devicetree/bindings/
-               interconnect/ for more details.
-
-Example:
-
-       sdhc_1: sdhci@f9824900 {
-               compatible = "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4";
-               reg = <0xf9824900 0x11c>, <0xf9824000 0x800>;
-               interrupts = <0 123 0>;
-               bus-width = <8>;
-               non-removable;
-
-               vmmc-supply = <&pm8941_l20>;
-               vqmmc-supply = <&pm8941_s3>;
-
-               pinctrl-names = "default";
-               pinctrl-0 = <&sdc1_clk &sdc1_cmd &sdc1_data>;
-
-               clocks = <&gcc GCC_SDCC1_APPS_CLK>, <&gcc GCC_SDCC1_AHB_CLK>;
-               clock-names = "core", "iface";
-               interconnects = <&qnoc MASTER_SDCC_ID &qnoc SLAVE_DDR_ID>,
-                               <&qnoc MASTER_CPU_ID &qnoc SLAVE_SDCC_ID>;
-               interconnect-names = "sdhc-ddr","cpu-sdhc";
-
-               qcom,dll-config = <0x000f642c>;
-               qcom,ddr-config = <0x80040868>;
-       };
-
-       sdhc_2: sdhci@f98a4900 {
-               compatible = "qcom,msm8974-sdhci", "qcom,sdhci-msm-v4";
-               reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
-               interrupts = <0 125 0>;
-               bus-width = <4>;
-               cd-gpios = <&msmgpio 62 0x1>;
-
-               vmmc-supply = <&pm8941_l21>;
-               vqmmc-supply = <&pm8941_l13>;
-
-               pinctrl-names = "default";
-               pinctrl-0 = <&sdc2_clk &sdc2_cmd &sdc2_data>;
-
-               clocks = <&gcc GCC_SDCC2_APPS_CLK>, <&gcc GCC_SDCC2_AHB_CLK>;
-               clock-names = "core", "iface";
-
-               qcom,dll-config = <0x0007642c>;
-               qcom,ddr-config = <0x80040868>;
-       };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
new file mode 100644 (file)
index 0000000..e423633
--- /dev/null
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/mmc/sdhci-msm.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Qualcomm SDHCI controller (sdhci-msm)
+
+maintainers:
+  - Bhupesh Sharma <bhupesh.sharma@linaro.org>
+
+description:
+  Secure Digital Host Controller Interface (SDHCI) present on
+  Qualcomm SOCs supports SD/MMC/SDIO devices.
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - qcom,apq8084-sdhci
+              - qcom,msm8226-sdhci
+              - qcom,msm8953-sdhci
+              - qcom,msm8974-sdhci
+              - qcom,msm8916-sdhci
+              - qcom,msm8992-sdhci
+              - qcom,msm8994-sdhci
+              - qcom,msm8996-sdhci
+              - qcom,qcs404-sdhci
+              - qcom,sc7180-sdhci
+              - qcom,sc7280-sdhci
+              - qcom,sdm630-sdhci
+              - qcom,sdm845-sdhci
+              - qcom,sdx55-sdhci
+              - qcom,sdx65-sdhci
+              - qcom,sm6125-sdhci
+              - qcom,sm6350-sdhci
+              - qcom,sm8150-sdhci
+              - qcom,sm8250-sdhci
+          - enum:
+              - qcom,sdhci-msm-v4 # for sdcc versions less than 5.0
+              - qcom,sdhci-msm-v5 # for sdcc version 5.0
+      - items:
+          - const: qcom,sdhci-msm-v4 # Deprecated (only for backward compatibility)
+                                     # for sdcc versions less than 5.0
+
+  reg:
+    minItems: 1
+    items:
+      - description: Host controller register map
+      - description: SD Core register map
+      - description: CQE register map
+      - description: Inline Crypto Engine register map
+
+  clocks:
+    minItems: 3
+    items:
+      - description: Main peripheral bus clock, PCLK/HCLK - AHB Bus clock
+      - description: SDC MMC clock, MCLK
+      - description: TCXO clock
+      - description: clock for Inline Crypto Engine
+      - description: SDCC bus voter clock
+      - description: reference clock for RCLK delay calibration
+      - description: sleep clock for RCLK delay calibration
+
+  clock-names:
+    minItems: 2
+    items:
+      - const: iface
+      - const: core
+      - const: xo
+      - const: ice
+      - const: bus
+      - const: cal
+      - const: sleep
+
+  interrupts:
+    maxItems: 2
+
+  interrupt-names:
+    items:
+      - const: hc_irq
+      - const: pwr_irq
+
+  pinctrl-names:
+    minItems: 1
+    items:
+      - const: default
+      - const: sleep
+
+  pinctrl-0:
+    description:
+      Should specify pin control groups used for this controller.
+
+  qcom,ddr-config:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: platform specific settings for DDR_CONFIG reg.
+
+  qcom,dll-config:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: platform specific settings for DLL_CONFIG reg.
+
+  iommus:
+    minItems: 1
+    maxItems: 8
+    description: |
+      phandle to apps_smmu node with sid mask.
+
+  interconnects:
+    items:
+      - description: data path, sdhc to ddr
+      - description: config path, cpu to sdhc
+
+  interconnect-names:
+    items:
+      - const: sdhc-ddr
+      - const: cpu-sdhc
+
+  power-domains:
+    description: A phandle to sdhci power domain node
+    maxItems: 1
+
+patternProperties:
+  '^opp-table(-[a-z0-9]+)?$':
+    if:
+      properties:
+        compatible:
+          const: operating-points-v2
+    then:
+      patternProperties:
+        '^opp-?[0-9]+$':
+          required:
+            - required-opps
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+
+additionalProperties: true
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/qcom,gcc-sm8250.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/power/qcom-rpmpd.h>
+
+    sdhc_2: sdhci@8804000 {
+      compatible = "qcom,sm8250-sdhci", "qcom,sdhci-msm-v5";
+      reg = <0 0x08804000 0 0x1000>;
+
+      interrupts = <GIC_SPI 204 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 222 IRQ_TYPE_LEVEL_HIGH>;
+      interrupt-names = "hc_irq", "pwr_irq";
+
+      clocks = <&gcc GCC_SDCC2_AHB_CLK>,
+               <&gcc GCC_SDCC2_APPS_CLK>,
+               <&rpmhcc RPMH_CXO_CLK>;
+      clock-names = "iface", "core", "xo";
+      iommus = <&apps_smmu 0x4a0 0x0>;
+      qcom,dll-config = <0x0007642c>;
+      qcom,ddr-config = <0x80040868>;
+      power-domains = <&rpmhpd SM8250_CX>;
+
+      operating-points-v2 = <&sdhc2_opp_table>;
+
+      sdhc2_opp_table: opp-table {
+        compatible = "operating-points-v2";
+
+        opp-19200000 {
+          opp-hz = /bits/ 64 <19200000>;
+          required-opps = <&rpmhpd_opp_min_svs>;
+        };
+
+        opp-50000000 {
+          opp-hz = /bits/ 64 <50000000>;
+          required-opps = <&rpmhpd_opp_low_svs>;
+        };
+
+        opp-100000000 {
+          opp-hz = /bits/ 64 <100000000>;
+          required-opps = <&rpmhpd_opp_svs>;
+        };
+
+        opp-202000000 {
+          opp-hz = /bits/ 64 <202000000>;
+          required-opps = <&rpmhpd_opp_svs_l1>;
+        };
+      };
+    };
index f300ced4cdf3679302e33cb1ac1e2e9d51b1146e..71f8e726d641cac9b60401df0593c23743d3b1a3 100644 (file)
@@ -17,6 +17,7 @@ properties:
   compatible:
     enum:
       - rockchip,rk3568-dwcmshc
+      - rockchip,rk3588-dwcmshc
       - snps,dwcmshc-sdhci
 
   reg:
diff --git a/Documentation/devicetree/bindings/mtd/aspeed-smc.txt b/Documentation/devicetree/bindings/mtd/aspeed-smc.txt
deleted file mode 100644 (file)
index 49f6528..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-* Aspeed Firmware Memory controller
-* Aspeed SPI Flash Memory Controller
-
-The Firmware Memory Controller in the Aspeed AST2500 SoC supports
-three chip selects, two of which are always of SPI type and the third
-can be SPI or NOR type flash. These bindings only describe SPI.
-
-The two SPI flash memory controllers in the AST2500 each support two
-chip selects.
-
-Required properties:
-  - compatible : Should be one of
-       "aspeed,ast2400-fmc" for the AST2400 Firmware Memory Controller
-       "aspeed,ast2400-spi" for the AST2400 SPI Flash memory Controller
-       "aspeed,ast2500-fmc" for the AST2500 Firmware Memory Controller
-       "aspeed,ast2500-spi" for the AST2500 SPI flash memory controllers
-
-  - reg : the first contains the control register location and length,
-          the second contains the memory window mapping address and length
-  - #address-cells : must be 1 corresponding to chip select child binding
-  - #size-cells : must be 0 corresponding to chip select child binding
-
-Optional properties:
-  - interrupts : Should contain the interrupt for the dma device if an
-    FMC
-
-The child nodes are the SPI flash modules which must have a compatible
-property as specified in bindings/mtd/jedec,spi-nor.txt
-
-Optionally, the child node can contain properties for SPI mode (may be
-ignored):
-  - spi-max-frequency - max frequency of spi bus
-
-
-Example:
-fmc: fmc@1e620000 {
-       compatible = "aspeed,ast2500-fmc";
-       reg = < 0x1e620000 0x94
-               0x20000000 0x02000000 >;
-       #address-cells = <1>;
-       #size-cells = <0>;
-       interrupts = <19>;
-       flash@0 {
-               reg = < 0 >;
-               compatible = "jedec,spi-nor";
-               /* spi-max-frequency = <>; */
-               /* m25p,fast-read; */
-               #address-cells = <1>;
-               #size-cells = <1>;
-       };
-};
diff --git a/Documentation/devicetree/bindings/mtd/elm.txt b/Documentation/devicetree/bindings/mtd/elm.txt
deleted file mode 100644 (file)
index 59ddc61..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Error location module
-
-Required properties:
-- compatible: Must be "ti,am3352-elm"
-- reg: physical base address and size of the registers map.
-- interrupts: Interrupt number for the elm.
-
-Optional properties:
-- ti,hwmods: Name of the hwmod associated to the elm
-
-Example:
-elm: elm@0 {
-       compatible = "ti,am3352-elm";
-       reg = <0x48080000 0x2000>;
-       interrupts = <4>;
-};
index 4abfb4cfc1571e1e7fe4665653230384f548170c..7149784a36ac7404d9c929285d29b3241a6d2b44 100644 (file)
@@ -50,10 +50,6 @@ properties:
     minItems: 1
     maxItems: 2
 
-  spi-max-frequency: true
-  spi-rx-bus-width: true
-  spi-tx-bus-width: true
-
   m25p,fast-read:
     type: boolean
     description:
@@ -74,8 +70,6 @@ properties:
       be used on such systems, to denote the absence of a reliable reset
       mechanism.
 
-  label: true
-
   partitions:
     type: object
 
@@ -99,8 +93,6 @@ examples:
         #size-cells = <0>;
 
         flash@0 {
-            #address-cells = <1>;
-            #size-cells = <1>;
             compatible = "spansion,m25p80", "jedec,spi-nor";
             reg = <0>;
             spi-max-frequency = <40000000>;
index ea4cace6a95537ac2a1fb02ef897d6352bd7ea8f..ad3ccd250802bf417e7b7ab0e1032f339065eb11 100644 (file)
@@ -19,7 +19,11 @@ maintainers:
 
 properties:
   compatible:
-    const: fixed-partitions
+    oneOf:
+      - const: fixed-partitions
+      - items:
+          - const: sercomm,sc-partitions
+          - const: fixed-partitions
 
   "#address-cells": true
 
@@ -27,7 +31,24 @@ properties:
 
 patternProperties:
   "@[0-9a-f]+$":
-    $ref: "partition.yaml#"
+    allOf:
+      - $ref: "partition.yaml#"
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: sercomm,sc-partitions
+        then:
+          properties:
+            sercomm,scpart-id:
+              description: Partition id in Sercomm partition map. Mtd
+                parser uses this id to find a record in the partition map
+                containing offset and size of the current partition. The
+                values from partition map overrides partition offset and
+                size defined in reg property of the dts. Frequently these
+                values are the same, but may differ if device has bad
+                eraseblocks on a flash.
+              $ref: /schemas/types.yaml#/definitions/uint32
 
 required:
   - "#address-cells"
@@ -52,6 +73,7 @@ examples:
             reg = <0x0100000 0x200000>;
         };
     };
+
   - |
     partitions {
         compatible = "fixed-partitions";
@@ -64,6 +86,7 @@ examples:
             reg = <0x00000000 0x1 0x00000000>;
         };
     };
+
   - |
     partitions {
         compatible = "fixed-partitions";
@@ -82,6 +105,7 @@ examples:
             reg = <0x2 0x00000000 0x1 0x00000000>;
         };
     };
+
   - |
     partitions {
         compatible = "fixed-partitions";
@@ -119,3 +143,30 @@ examples:
             };
         };
     };
+
+  - |
+    partitions {
+        compatible = "sercomm,sc-partitions", "fixed-partitions";
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        partition@0 {
+            label = "u-boot";
+            reg = <0x0 0x100000>;
+            sercomm,scpart-id = <0>;
+            read-only;
+        };
+
+        partition@100000 {
+            label = "dynamic partition map";
+            reg = <0x100000 0x100000>;
+            sercomm,scpart-id = <1>;
+        };
+
+        partition@200000 {
+            label = "Factory";
+            reg = <0x200000 0x100000>;
+            sercomm,scpart-id = <2>;
+            read-only;
+        };
+    };
index 2870d36361c4f99d8d3e47ec436352346c6fe0eb..7b18bc5cc8b3d73bd0b113239de8da37025b777a 100644 (file)
@@ -36,11 +36,15 @@ properties:
       - const: hclk
       - const: eclk
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
   - clocks
   - clock-names
+  - power-domains
   - interrupts
 
 unevaluatedProperties: false
@@ -56,6 +60,7 @@ examples:
         interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&sysctrl R9A06G032_HCLK_NAND>, <&sysctrl R9A06G032_CLK_NAND>;
         clock-names = "hclk", "eclk";
+        power-domains = <&sysctrl>;
         #address-cells = <1>;
         #size-cells = <0>;
     };
diff --git a/Documentation/devicetree/bindings/mtd/ti,elm.yaml b/Documentation/devicetree/bindings/mtd/ti,elm.yaml
new file mode 100644 (file)
index 0000000..87128c0
--- /dev/null
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/ti,elm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments Error Location Module (ELM).
+
+maintainers:
+  - Roger Quadros <rogerq@kernel.org>
+
+description:
+  ELM module is used together with GPMC and NAND Flash to detect
+  errors and the location of the error based on BCH algorithms
+  so they can be corrected if possible.
+
+properties:
+  compatible:
+    enum:
+      - ti,am3352-elm
+      - ti,am64-elm
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+    description: Functional clock.
+
+  clock-names:
+    items:
+      - const: fck
+
+  power-domains:
+    maxItems: 1
+
+  ti,hwmods:
+    description:
+      Name of the HWMOD associated with ELM. This is for legacy
+      platforms only.
+    $ref: /schemas/types.yaml#/definitions/string
+    deprecated: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,am64-elm
+    then:
+      required:
+        - clocks
+        - clock-names
+        - power-domains
+
+additionalProperties: false
+
+examples:
+  - |
+    elm: ecc@0 {
+        compatible = "ti,am3352-elm";
+        reg = <0x0 0x2000>;
+        interrupts = <4>;
+    };
index 2d4219ec7eda63367b5aaa8c0f096bd0f25b56c7..2e51072e794a6782f4535eadf825e7217151c1cb 100644 (file)
@@ -14,6 +14,8 @@ properties:
   compatible:
     enum:
       - arm,cmn-600
+      - arm,cmn-650
+      - arm,cmn-700
       - arm,ci-700
 
   reg:
index 57b68d6c7c70d4f1e9583bd77bdbf916c840b4fb..3666ac5b6518dd9d6651c5b3b89c127d658aa0bc 100644 (file)
@@ -33,7 +33,7 @@ patternProperties:
           $ref: "/schemas/types.yaml#/definitions/string"
           enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
                   ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, ESPI, ESPIALT,
-                  FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3,
+                  FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3,
                   GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5,
                   GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16,
                   I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5,
@@ -58,7 +58,7 @@ patternProperties:
           $ref: "/schemas/types.yaml#/definitions/string"
           enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
                   ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4,
-                  EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP,
+                  EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP,
                   GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
                   GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10,
                   I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5,
index 8a70696395a7de942d7ae0e69c86d8ae693e974a..22ad012660e94417cedf0dfb009a21dd8c74dd32 100644 (file)
@@ -6,12 +6,6 @@ The cache bindings explained below are Devicetree Specification compliant
 Required Properties:
 
 - compatible   : Should include one of the following:
-                 "fsl,8540-l2-cache-controller"
-                 "fsl,8541-l2-cache-controller"
-                 "fsl,8544-l2-cache-controller"
-                 "fsl,8548-l2-cache-controller"
-                 "fsl,8555-l2-cache-controller"
-                 "fsl,8568-l2-cache-controller"
                  "fsl,b4420-l2-cache-controller"
                  "fsl,b4860-l2-cache-controller"
                  "fsl,bsc9131-l2-cache-controller"
index 61dd5af80db6755945694298624f03c74e8709de..5d2d989de893c889f530d32388cd27fc0be50d6e 100644 (file)
@@ -31,7 +31,7 @@ properties:
         $ref: "regulator.yaml#"
 
         properties:
-          regulator-name:
+          regulator-compatible:
             pattern: "^vbuck[1-4]$"
 
     additionalProperties: false
index 9a90a92f2d7e15a15159d570c91239b35eafe31e..7034cdca54e04f0e29228ef456f1456799c71f68 100644 (file)
@@ -8,14 +8,14 @@ Documentation/devicetree/bindings/regulator/regulator.txt.
 
 The valid names for regulators are::
 BUCK:
-  buck_vdram1, buck_vcore, buck_vpa, buck_vproc11, buck_vproc12, buck_vgpu,
-  buck_vs2, buck_vmodem, buck_vs1
+  buck_vdram1, buck_vcore, buck_vcore_sshub, buck_vpa, buck_vproc11,
+  buck_vproc12, buck_vgpu, buck_vs2, buck_vmodem, buck_vs1
 LDO:
   ldo_vdram2, ldo_vsim1, ldo_vibr, ldo_vrf12, ldo_vio18, ldo_vusb, ldo_vcamio,
   ldo_vcamd, ldo_vcn18, ldo_vfe28, ldo_vsram_proc11, ldo_vcn28, ldo_vsram_others,
-  ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18, ldo_vmch, ldo_vbif28,
-  ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12, ldo_vrf18,
-  ldo_vcn33_bt, ldo_vcn33_wifi, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28,
+  ldo_vsram_others_sshub, ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18,
+  ldo_vmch, ldo_vbif28, ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12,
+  ldo_vrf18, ldo_vcn33_bt, ldo_vcn33_wifi, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28,
   ldo_vsim2
 
 Example:
@@ -354,5 +354,17 @@ Example:
                                regulator-max-microvolt = <3100000>;
                                regulator-enable-ramp-delay = <540>;
                        };
+
+                       mt6358_vcore_sshub_reg: buck_vcore_sshub {
+                               regulator-name = "vcore_sshub";
+                               regulator-min-microvolt = <500000>;
+                               regulator-max-microvolt = <1293750>;
+                       };
+
+                       mt6358_vsram_others_sshub_reg: ldo_vsram_others_sshub {
+                               regulator-name = "vsram_others_sshub";
+                               regulator-min-microvolt = <500000>;
+                               regulator-max-microvolt = <1293750>;
+                       };
                };
        };
index f70f2e758a002fd3f6750401e32b0df778713c4e..b539781e39aa45989fde397e206a13077cc464a6 100644 (file)
@@ -92,6 +92,17 @@ properties:
       LDO5CTRL_L or LDO5CTRL_H register. Use this if the SD_VSEL signal is
       connected to a host GPIO.
 
+  nxp,i2c-lt-enable:
+    type: boolean
+    description:
+      Indicates that the I2C Level Translator is used.
+
+  nxp,wdog_b-warm-reset:
+    type: boolean
+    description:
+      When WDOG_B signal is asserted a warm reset will be done instead of cold
+      reset.
+
 required:
   - compatible
   - reg
index e28ee9e467888818f1a3304474ee43e42dd13427..9a36bee750af080eff6e59af62572b67f5fda49e 100644 (file)
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Qualcomm Technologies, Inc. RPMh Regulators
 
 maintainers:
-  - David Collins <collinsd@codeaurora.org>
+  - Bjorn Andersson <bjorn.andersson@linaro.org>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 
 description: |
     rpmh-regulator devices support PMIC regulator management via the Voltage
@@ -78,7 +79,7 @@ properties:
         RPMh resource name suffix used for the regulators found
         on this PMIC.
     $ref: /schemas/types.yaml#/definitions/string
-    enum: [a, b, c, d, e, f]
+    enum: [a, b, c, d, e, f, h, k]
 
   qcom,always-wait-for-ack:
     description: |
@@ -94,35 +95,264 @@ properties:
   vdd-rgb-supply:
     description: Input supply phandle of rgb.
 
-  vin-lvs-1-2-supply:
-    description: Input supply phandle of one or more regulators.
-
-  vdd-bob-supply:
-    description: BOB regulator parent supply phandle.
-
   bob:
     type: object
     $ref: "regulator.yaml#"
     description: BOB regulator node.
 
 patternProperties:
-  "^vdd-s([0-9]+)-supply$":
-    description: Input supply phandle(s) of one or more regulators.
-
-  "^vdd-(l[0-9]+[-]){1,5}supply$":
-    description: Input supply phandle(s) of one or more regulators.
-
   "^(smps|ldo|lvs)[0-9]+$":
     type: object
     $ref: "regulator.yaml#"
     description: smps/ldo regulator nodes(s).
 
-additionalProperties: false
-
 required:
   - compatible
   - qcom,pmic-id
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm6150-rpmh-regulators
+    then:
+      properties:
+        vdd-l2-l3-supply: true
+        vdd-l4-l7-l8-supply: true
+        vdd-l5-l16-l17-l18-l19-supply: true
+        vdd-l10-l14-l15-supply: true
+        vdd-l11-l12-l13-supply: true
+      patternProperties:
+        "^vdd-l[169]-supply$": true
+        "^vdd-s[1-5]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm6150l-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+        vdd-l1-l8-supply: true
+        vdd-l2-l3-supply: true
+        vdd-l4-l5-l6-supply: true
+        vdd-l7-l11-supply: true
+        vdd-l9-l10-supply: true
+      patternProperties:
+        "^vdd-s[1-8]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm7325-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l4-l12-l15-supply: true
+        vdd-l2-l7-supply: true
+        vdd-l6-l9-l10-supply: true
+        vdd-l11-l17-l18-l19-supply: true
+        vdd-l13-supply: true
+        vdd-l14-l16-supply: true
+      patternProperties:
+        "^vdd-l[358]-supply$": true
+        "^vdd-s[1-8]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8005-rpmh-regulators
+    then:
+      patternProperties:
+        "^vdd-s[1-4]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8009-rpmh-regulators
+            - qcom,pm8009-1-rpmh-regulators
+    then:
+      properties:
+        vdd-l5-l6-supply: true
+      patternProperties:
+        "^vdd-l[1-47]-supply$": true
+        "^vdd-s[1-2]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8150-rpmh-regulators
+            - qcom,pmm8155au-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l8-l11-supply: true
+        vdd-l2-l10-supply: true
+        vdd-l3-l4-l5-l18-supply: true
+        vdd-l6-l9-supply: true
+        vdd-l7-l12-l14-l15-supply: true
+        vdd-l13-l16-l17-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|10)-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8150l-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+        vdd-l1-l8-supply: true
+        vdd-l2-l3-supply: true
+        vdd-l4-l5-l6-supply: true
+        vdd-l7-l11-supply: true
+        vdd-l9-l10-supply: true
+      patternProperties:
+        "^vdd-s[1-8]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8350-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l4-supply: true
+        vdd-l2-l7-supply: true
+        vdd-l3-l5-supply: true
+        vdd-l6-l9-l10-supply: true
+        vdd-l8-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|1[0-2])-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8350c-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+        vdd-l1-l12-supply: true
+        vdd-l2-l8-supply: true
+        vdd-l3-l4-l5-l7-l13-supply: true
+        vdd-l6-l9-l11-supply: true
+        vdd-l10-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|10)-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8450-rpmh-regulators
+    then:
+      patternProperties:
+        "^vdd-l[1-4]-supply$": true
+        "^vdd-s[1-6]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pm8998-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l27-supply: true
+        vdd-l2-l8-l17-supply: true
+        vdd-l3-l11-supply: true
+        vdd-l4-l5-supply: true
+        vdd-l6-supply: true
+        vdd-l7-l12-l14-l15-supply: true
+        vdd-l9-supply: true
+        vdd-l10-l23-l25-supply: true
+        vdd-l13-l19-l21-supply: true
+        vdd-l16-l28-supply: true
+        vdd-l18-l22-supply: true
+        vdd-l20-l24-supply: true
+        vdd-l26-supply: true
+        vin-lvs-1-2-supply: true
+      patternProperties:
+        "^vdd-s([1-9]|1[0-3])-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmg1110-rpmh-regulators
+    then:
+      properties:
+        vdd-s1-supply: true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmi8998-rpmh-regulators
+    then:
+      properties:
+        vdd-bob-supply:
+          description: BOB regulator parent supply phandle.
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmr735a-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l2-supply: true
+        vdd-l3-supply: true
+        vdd-l4-supply: true
+        vdd-l5-l6-supply: true
+        vdd-l7-bob-supply: true
+      patternProperties:
+        "^vdd-s[1-3]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmx55-rpmh-regulators
+    then:
+      properties:
+        vdd-l1-l2-supply: true
+        vdd-l3-l9-supply: true
+        vdd-l4-l12-supply: true
+        vdd-l5-l6-supply: true
+        vdd-l7-l8-supply: true
+        vdd-l10-l11-l13-supply: true
+      patternProperties:
+        "^vdd-l1[4-6]-supply$": true
+        "^vdd-s[1-7]-supply$": true
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmx65-rpmh-regulators
+    then:
+      properties:
+        vdd-l2-l18-supply: true
+        vdd-l5-l6-l16-supply: true
+        vdd-l8-l9-supply: true
+        vdd-l11-l13-supply: true
+      patternProperties:
+        "^vdd-l[1347]-supply$": true
+        "^vdd-l1[0245789]-supply$": true
+        "^vdd-l2[01]-supply$": true
+        "^vdd-s[1-8]-supply$": true
+
+unevaluatedProperties: false
+
 examples:
   - |
     #include <dt-bindings/regulator/qcom,rpmh-regulator.h>
index 235e593b3b2c060ea8d4302377a548531da7b557..091150c4e5795b401ddc02d70779277bb2e25f22 100644 (file)
@@ -17,9 +17,6 @@ description: |
   Datasheet is available at
   https://www.richtek.com/assets/product_file/RT4801H/DS4801H-00.pdf
 
-#The valid names for RT4801 regulator nodes are:
-#DSVP, DSVN
-
 properties:
   compatible:
     enum:
@@ -33,10 +30,13 @@ properties:
       The first one is ENP to enable DSVP, and second one is ENM to enable DSVN.
       Number of GPIO in the array list could be 1 or 2.
       If only one gpio is specified, only one gpio used to control ENP/ENM.
-      Else both are spefied, DSVP/DSVN could be controlled individually.
-      Othersie, this property not specified. treat both as always-on regulator.
+      Else if both are specified, DSVP/DSVN could be controlled individually.
+      If this property not specified, treat both as always-on regulators.
+
+      Property is deprecated. Use enable-gpios in each regulator.
     minItems: 1
     maxItems: 2
+    deprecated: true
 
 patternProperties:
   "^DSV(P|N)$":
@@ -45,6 +45,14 @@ patternProperties:
     description:
       Properties for single display bias regulator.
 
+    properties:
+      enable-gpios:
+        description:
+          GPIO to use to enable DSVP/DSVN regulator. One GPIO can be configured
+          for controlling both regulators.  If this property not specified for
+          any regulator, treat both as always-on regulators.
+        maxItems: 1
+
 required:
   - compatible
   - reg
@@ -60,19 +68,20 @@ examples:
         rt4801@73 {
             compatible = "richtek,rt4801";
             reg = <0x73>;
-            enable-gpios = <&gpio26 2 0>, <&gpio26 3 0>;
 
             dsvp: DSVP {
                 regulator-name = "rt4801,dsvp";
                 regulator-min-microvolt = <4000000>;
                 regulator-max-microvolt = <6000000>;
                 regulator-boot-on;
+                enable-gpios = <&gpio26 2 0>;
             };
             dsvn: DSVN {
                 regulator-name = "rt4801,dsvn";
                 regulator-min-microvolt = <4000000>;
                 regulator-max-microvolt = <6000000>;
                 regulator-boot-on;
+                enable-gpios = <&gpio26 3 0>;
             };
 
         };
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5759-regulator.yaml
new file mode 100644 (file)
index 0000000..0a4c957
--- /dev/null
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rt5759-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT5759 High Performance DCDC Converter
+
+maintainers:
+  - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+  The RT5759 is a high-performance, synchronous step-down DC-DC converter that
+  can deliver up to 9A output current from 3V to 6.5V input supply, The output
+  voltage can be programmable with I2C controlled 7-Bit VID.
+
+  Datasheet is available at
+  https://www.richtek.com/assets/product_file/RT5759/DS5759-00.pdf
+
+properties:
+  compatible:
+    enum:
+      - richtek,rt5759
+      - richtek,rt5759a
+
+  reg:
+    maxItems: 1
+
+  regulator-allowed-modes:
+    description: |
+      buck allowed operating mode
+        0: auto mode (PSKIP: pulse skipping)
+        1: force pwm mode
+    items:
+      enum: [0, 1]
+
+  richtek,watchdog-enable:
+    description: enable the external watchdog reset pin listening
+    type: boolean
+
+allOf:
+  - $ref: regulator.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: richtek,rt5759
+    then:
+      properties:
+        richtek,watchdog-enable: false
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  # example 1 for RT5759
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      regulator@62 {
+        compatible = "richtek,rt5759";
+        reg = <0x62>;
+        regulator-name = "rt5759-buck";
+        regulator-min-microvolt = <600000>;
+        regulator-max-microvolt = <1500000>;
+        regulator-boot-on;
+      };
+    };
+  # example 2 for RT5759A
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      regulator@62 {
+        compatible = "richtek,rt5759a";
+        reg = <0x62>;
+        regulator-name = "rt5759a-buck";
+        regulator-min-microvolt = <600000>;
+        regulator-max-microvolt = <1725000>;
+        regulator-boot-on;
+        richtek,watchdog-enable;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml b/Documentation/devicetree/bindings/regulator/siliconmitus,sm5703-regulator.yaml
new file mode 100644 (file)
index 0000000..9d84117
--- /dev/null
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/siliconmitus,sm5703-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silicon Mitus SM5703 multi function device regulators
+
+maintainers:
+  - Markuss Broks <markuss.broks@gmail.com>
+
+description: |
+  SM5703 regulators node should be a sub node of the SM5703 MFD node. See SM5703 MFD
+  bindings at Documentation/devicetree/bindings/mfd/siliconmitus,sm5703.yaml
+  Regulator nodes should be named as USBLDO_<number>, BUCK, VBUS, LDO_<number>.
+  The definition for each of these nodes is defined using the standard
+  binding for regulators at Documentation/devicetree/bindings/regulator/regulator.txt.
+
+properties:
+  buck:
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for the BUCK regulator.
+
+  vbus:
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for the VBUS regulator.
+
+patternProperties:
+  "^ldo[1-3]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for single LDO regulator.
+
+  "^usbldo[1-2]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for a single USBLDO regulator.
+
+additionalProperties: false
index 1218f21ba3204922245934543bf5d39ea2b287a0..75087c6e001c69a38f65aefa8118bc3d746e5616 100644 (file)
@@ -14,9 +14,6 @@ description: |
 maintainers:
   - Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
 
-allOf:
-  - $ref: "regulator.yaml#"
-
 # USB3 Controller
 
 properties:
@@ -36,27 +33,51 @@ properties:
     minItems: 1
     maxItems: 2
 
-  clock-names:
-    oneOf:
-      - items:          # for Pro4, Pro5
-          - const: gio
-          - const: link
-      - items:          # for others
-          - const: link
+  clock-names: true
 
   resets:
     minItems: 1
     maxItems: 2
 
-  reset-names:
-    oneOf:
-      - items:          # for Pro4, Pro5
-          - const: gio
-          - const: link
-      - items:
-          - const: link
+  reset-names: true
 
-additionalProperties: false
+allOf:
+  - $ref: "regulator.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - socionext,uniphier-pro4-usb3-regulator
+              - socionext,uniphier-pro5-usb3-regulator
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 2
+        clock-names:
+          items:
+            - const: gio
+            - const: link
+        resets:
+          minItems: 2
+          maxItems: 2
+        reset-names:
+          items:
+            - const: gio
+            - const: link
+    else:
+      properties:
+        clocks:
+          maxItems: 1
+        clock-names:
+          const: link
+        resets:
+          maxItems: 1
+        reset-names:
+          const: link
+
+unevaluatedProperties: false
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/reserved-memory/phram.yaml b/Documentation/devicetree/bindings/reserved-memory/phram.yaml
new file mode 100644 (file)
index 0000000..6c4db28
--- /dev/null
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reserved-memory/phram.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MTD/block device in RAM
+
+description: |
+  Specifies that the reserved memory region can be used as an MTD or block
+  device.
+
+  The "phram" node is named after the "MTD in PHysical RAM" driver which
+  provides an implementation of this functionality in Linux.
+
+maintainers:
+  - Vincent Whitchurch <vincent.whitchurch@axis.com>
+
+allOf:
+  - $ref: "reserved-memory.yaml"
+  - $ref: "/schemas/mtd/mtd.yaml"
+
+properties:
+  compatible:
+    const: phram
+
+  reg:
+    description: region of memory that can be used as an MTD/block device
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    reserved-memory {
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        phram: flash@12340000 {
+            compatible = "phram";
+            label = "rootfs";
+            reg = <0x12340000 0x00800000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
new file mode 100644 (file)
index 0000000..fa8f4ac
--- /dev/null
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/aspeed,ast2600-fmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Aspeed SMC controllers bindings
+
+maintainers:
+  - Chin-Ting Kuo <chin-ting_kuo@aspeedtech.com>
+  - Cédric Le Goater <clg@kaod.org>
+
+description: |
+  This binding describes the Aspeed Static Memory Controllers (FMC and
+  SPI) of the AST2400, AST2500 and AST2600 SOCs.
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - aspeed,ast2600-fmc
+      - aspeed,ast2600-spi
+      - aspeed,ast2500-fmc
+      - aspeed,ast2500-spi
+      - aspeed,ast2400-fmc
+      - aspeed,ast2400-spi
+
+  reg:
+    items:
+      - description: registers
+      - description: memory mapping
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/aspeed-scu-ic.h>
+    #include <dt-bindings/clock/ast2600-clock.h>
+
+    spi@1e620000 {
+        reg = <0x1e620000 0xc4>, <0x20000000 0x10000000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        compatible = "aspeed,ast2600-fmc";
+        clocks = <&syscon ASPEED_CLK_AHB>;
+        interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+
+        flash@0 {
+                reg = < 0 >;
+                compatible = "jedec,spi-nor";
+                spi-max-frequency = <50000000>;
+                spi-rx-bus-width = <2>;
+        };
+
+        flash@1 {
+                reg = < 1 >;
+                compatible = "jedec,spi-nor";
+                spi-max-frequency = <50000000>;
+                spi-rx-bus-width = <2>;
+        };
+
+        flash@2 {
+                reg = < 2 >;
+                compatible = "jedec,spi-nor";
+                spi-max-frequency = <50000000>;
+                spi-rx-bus-width = <2>;
+        };
+    };
index 5b1c7a2a6a3196520c90e34ec8e278b8ecf60718..360f76c226d92bb2d5df72b8c29a8e9302db3222 100644 (file)
@@ -18,7 +18,10 @@ properties:
     oneOf:
       - enum:
           - ingenic,jz4750-spi
+          - ingenic,jz4775-spi
           - ingenic,jz4780-spi
+          - ingenic,x1000-spi
+          - ingenic,x2000-spi
       - items:
           - enum:
               - ingenic,jz4760-spi
index 818130b11bb917f44c31b05079c6df2d4d6f8fc3..94ef0552bd4241a9e128b91ad2f0cd285606dc1e 100644 (file)
@@ -53,16 +53,20 @@ properties:
     maxItems: 1
 
   clocks:
+    minItems: 3
     items:
       - description: clock used for the parent clock
       - description: clock used for the muxes clock
       - description: clock used for the clock gate
+      - description: clock used for the AHB bus, this clock is optional
 
   clock-names:
+    minItems: 3
     items:
       - const: parent-clk
       - const: sel-clk
       - const: spi-clk
+      - const: hclk
 
   mediatek,pad-select:
     $ref: /schemas/types.yaml#/definitions/uint32-array
diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-snfi.yaml
new file mode 100644 (file)
index 0000000..6e6e02c
--- /dev/null
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-mtk-snfi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI-NAND flash controller for MediaTek ARM SoCs
+
+maintainers:
+  - Chuanhong Guo <gch981213@gmail.com>
+
+description: |
+  The Mediatek SPI-NAND flash controller is an extended version of
+  the Mediatek NAND flash controller. It can perform standard SPI
+  instructions with one continuous write and one read for up-to 0xa0
+  bytes. It also supports typical SPI-NAND page cache operations
+  in single, dual or quad IO mode with pipelined ECC encoding/decoding
+  using the accompanying ECC engine. There should be only one spi
+  slave device following generic spi bindings.
+
+allOf:
+  - $ref: /schemas/spi/spi-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7622-snand
+      - mediatek,mt7629-snand
+
+  reg:
+    items:
+      - description: core registers
+
+  interrupts:
+    items:
+      - description: NFI interrupt
+
+  clocks:
+    items:
+      - description: clock used for the controller
+      - description: clock used for the SPI bus
+
+  clock-names:
+    items:
+      - const: nfi_clk
+      - const: pad_clk
+
+  nand-ecc-engine:
+    description: device-tree node of the accompanying ECC engine.
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - nand-ecc-engine
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mt7622-clk.h>
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+      snfi: spi@1100d000 {
+        compatible = "mediatek,mt7622-snand";
+        reg = <0 0x1100d000 0 0x1000>;
+        interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&pericfg CLK_PERI_NFI_PD>, <&pericfg CLK_PERI_SNFI_PD>;
+        clock-names = "nfi_clk", "pad_clk";
+        nand-ecc-engine = <&bch>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        flash@0 {
+          compatible = "spi-nand";
+          reg = <0>;
+          spi-tx-bus-width = <4>;
+          spi-rx-bus-width = <4>;
+          nand-ecc-engine = <&snfi>;
+        };
+      };
+    };
index 5a60fba14bba0cc159b285f2fd1c9b5afc249db3..44d08aa3fd85db24d7ef206f52470d71f11637aa 100644 (file)
@@ -49,6 +49,7 @@ properties:
     maxItems: 2
 
   interconnect-names:
+    minItems: 1
     items:
       - const: qspi-config
       - const: qspi-memory
index 2c3c6bd6ec45b72e5a23080c2a817636306fa37a..f45d3b75d6de8f80c6465f8f86725182de7bf169 100644 (file)
@@ -21,6 +21,7 @@ properties:
           - enum:
               - renesas,rspi-r7s72100  # RZ/A1H
               - renesas,rspi-r7s9210   # RZ/A2
+              - renesas,r9a07g043-rspi # RZ/G2UL
               - renesas,r9a07g044-rspi # RZ/G2{L,LC}
               - renesas,r9a07g054-rspi # RZ/V2L
           - const: renesas,rspi-rz
@@ -124,6 +125,7 @@ allOf:
           contains:
             enum:
               - renesas,qspi
+              - renesas,r9a07g043-rspi
               - renesas,r9a07g044-rspi
               - renesas,r9a07g054-rspi
     then:
index a9b7388ca9aca31053791e0495936c6e0b41d5d9..e1587ddf7de330b9b1f63c5d82fc8ca3eee0a101 100644 (file)
@@ -18,6 +18,7 @@ description:
 properties:
   compatible:
     enum:
+      - qcom,sc8180x-lmh
       - qcom,sdm845-lmh
       - qcom,sm8150-lmh
 
index 3ea8c0c1f45f2f4788956fd62406e990b3beadc5..feb390d50696c4a7fe6915c333c969e3b069f717 100644 (file)
@@ -10,7 +10,9 @@ maintainers:
 
 properties:
   compatible:
-    const: qcom,spmi-adc-tm5
+    enum:
+      - qcom,spmi-adc-tm5
+      - qcom,spmi-adc-tm5-gen2
 
   reg:
     maxItems: 1
@@ -33,6 +35,7 @@ properties:
   qcom,avg-samples:
     $ref: /schemas/types.yaml#/definitions/uint32
     description: Number of samples to be used for measurement.
+            Not applicable for Gen2 ADC_TM peripheral.
     enum:
       - 1
       - 2
@@ -45,6 +48,7 @@ properties:
     $ref: /schemas/types.yaml#/definitions/uint32
     description: This parameter is used to decrease ADC sampling rate.
             Quicker measurements can be made by reducing decimation ratio.
+            Not applicable for Gen2 ADC_TM peripheral.
     enum:
       - 250
       - 420
@@ -93,6 +97,29 @@ patternProperties:
           - const: 1
           - enum: [ 1, 3, 4, 6, 20, 8, 10 ]
 
+      qcom,avg-samples:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description: Number of samples to be used for measurement.
+          This property in child node is applicable only for Gen2 ADC_TM peripheral.
+        enum:
+          - 1
+          - 2
+          - 4
+          - 8
+          - 16
+        default: 1
+
+      qcom,decimation:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description: This parameter is used to decrease ADC sampling rate.
+          Quicker measurements can be made by reducing decimation ratio.
+          This property in child node is applicable only for Gen2 ADC_TM peripheral.
+        enum:
+          - 85
+          - 340
+          - 1360
+        default: 1360
+
     required:
       - reg
       - io-channels
@@ -100,6 +127,31 @@ patternProperties:
     additionalProperties:
       false
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,spmi-adc-tm5
+
+    then:
+      patternProperties:
+        "^([-a-z0-9]*)@[0-7]$":
+          properties:
+            qcom,decimation: false
+            qcom,avg-samples: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,spmi-adc-tm5-gen2
+
+    then:
+      properties:
+        qcom,avg-samples: false
+        qcom,decimation: false
+
 required:
   - compatible
   - reg
@@ -124,7 +176,7 @@ examples:
             #size-cells = <0>;
             #io-channel-cells = <1>;
 
-            /* Other propreties are omitted */
+            /* Other properties are omitted */
             conn-therm@4f {
                 reg = <ADC5_AMUX_THM3_100K_PU>;
                 qcom,ratiometric;
@@ -148,4 +200,58 @@ examples:
             };
         };
     };
+
+  - |
+    #include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h>
+    #include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spmi_bus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        pmk8350_vadc: adc@3100 {
+            reg = <0x3100>;
+            compatible = "qcom,spmi-adc7";
+            #address-cells = <1>;
+            #size-cells = <0>;
+            #io-channel-cells = <1>;
+
+            /* Other properties are omitted */
+            xo-therm@44 {
+                reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+            };
+
+            conn-therm@47 {
+                reg = <PM8350_ADC7_AMUX_THM4_100K_PU>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+            };
+        };
+
+        pmk8350_adc_tm: adc-tm@3400 {
+            compatible = "qcom,spmi-adc-tm5-gen2";
+            reg = <0x3400>;
+            interrupts = <0x0 0x34 0x0 IRQ_TYPE_EDGE_RISING>;
+            #thermal-sensor-cells = <1>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            pmk8350-xo-therm@0 {
+                reg = <0>;
+                io-channels = <&pmk8350_vadc PMK8350_ADC7_AMUX_THM1_100K_PU>;
+                qcom,decimation = <340>;
+                qcom,ratiometric;
+                qcom,hw-settle-time-us = <200>;
+            };
+
+            conn-therm@1 {
+                reg = <1>;
+                io-channels = <&pmk8350_vadc PM8350_ADC7_AMUX_THM4_100K_PU>;
+                qcom,avg-samples = <2>;
+                qcom,ratiometric;
+                qcom,hw-settle-time-us = <200>;
+            };
+        };
+    };
 ...
index b6406bcc683f0ad8a8d3a78f6ebb4e2a431919b4..a24baf9b9f64be7de5a627c9c72d89b4d82c45f4 100644 (file)
@@ -19,10 +19,11 @@ description: |
 properties:
   compatible:
     oneOf:
-      - description: msm9860 TSENS based
+      - description: msm8960 TSENS based
         items:
           - enum:
               - qcom,ipq8064-tsens
+              - qcom,msm8960-tsens
 
       - description: v0.1 of TSENS
         items:
@@ -49,6 +50,7 @@ properties:
               - qcom,sc7180-tsens
               - qcom,sc7280-tsens
               - qcom,sc8180x-tsens
+              - qcom,sc8280xp-tsens
               - qcom,sdm630-tsens
               - qcom,sdm845-tsens
               - qcom,sm8150-tsens
@@ -116,6 +118,7 @@ allOf:
               - qcom,ipq8064-tsens
               - qcom,mdm9607-tsens
               - qcom,msm8916-tsens
+              - qcom,msm8960-tsens
               - qcom,msm8974-tsens
               - qcom,msm8976-tsens
               - qcom,qcs404-tsens
index ccab9511a042f2259c7a7408fba078a116292170..1d837339784817ea610767a467c73f9e0048f17f 100644 (file)
@@ -17,7 +17,9 @@ properties:
   compatible:
     items:
       - enum:
+          - renesas,r9a07g043-tsu # RZ/G2UL
           - renesas,r9a07g044-tsu # RZ/G2{L,LC}
+          - renesas,r9a07g054-tsu # RZ/V2L
       - const: renesas,rzg2l-tsu
 
   reg:
diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
new file mode 100644 (file)
index 0000000..c74f124
--- /dev/null
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/ti,j72xx-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments J72XX VTM (DTS) binding
+
+maintainers:
+  - Keerthy <j-keerthy@ti.com>
+
+properties:
+  compatible:
+    enum:
+      - ti,j721e-vtm
+      - ti,j7200-vtm
+
+  reg:
+    items:
+      - description: VTM cfg1 register space
+      - description: VTM cfg2 register space
+      - description: VTM efuse register space
+
+  power-domains:
+    maxItems: 1
+
+  "#thermal-sensor-cells":
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - power-domains
+  - "#thermal-sensor-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/soc/ti,sci_pm_domain.h>
+    wkup_vtm0: thermal-sensor@42040000 {
+        compatible = "ti,j721e-vtm";
+        reg = <0x42040000 0x350>,
+              <0x42050000 0x350>,
+              <0x43000300 0x10>;
+        power-domains = <&k3_pds 154 TI_SCI_PD_EXCLUSIVE>;
+        #thermal-sensor-cells = <1>;
+    };
+
+    mpu_thermal: mpu-thermal {
+        polling-delay-passive = <250>; /* milliseconds */
+        polling-delay = <500>; /* milliseconds */
+        thermal-sensors = <&wkup_vtm0 0>;
+
+        trips {
+            mpu_crit: mpu-crit {
+                temperature = <125000>; /* milliCelsius */
+                hysteresis = <2000>; /* milliCelsius */
+                type = "critical";
+            };
+        };
+    };
+...
index 550a2e5c9e05dd598ca20ed546182ed73e410c0b..c11520347a9d2b8843ccca5c518cb062d178e290 100644 (file)
@@ -143,6 +143,10 @@ properties:
           - infineon,xdpe12254
             # Infineon Multi-phase Digital VR Controller xdpe12284
           - infineon,xdpe12284
+            # Infineon Multi-phase Digital VR Controller xdpe15284
+          - infineon,xdpe15284
+            # Infineon Multi-phase Digital VR Controller xdpe152c4
+          - infineon,xdpe152c4
             # Injoinic IP5108 2.0A Power Bank IC with I2C
           - injoinic,ip5108
             # Injoinic IP5109 2.1A Power Bank IC with I2C
index 01430973ecec39477543965c1606c1d1e2042313..65ff22364fb3000600c699f9271f79377d4fded7 100644 (file)
@@ -1082,6 +1082,8 @@ patternProperties:
     description: Sensirion AG
   "^sensortek,.*":
     description: Sensortek Technology Corporation
+  "^sercomm,.*":
+    description: Sercomm (Suzhou) Corporation
   "^sff,.*":
     description: Small Form Factor Committee
   "^sgd,.*":
index 910b30a2a7d98b89ce3b9bc6afb7a88aa7c8025c..352ff53a2306ad897f5ef3e20ade4ca27f664471 100644 (file)
@@ -211,6 +211,7 @@ r200_reg_safe.h
 r300_reg_safe.h
 r420_reg_safe.h
 r600_reg_safe.h
+randstruct.seed
 randomize_layout_hash.h
 randomize_layout_seed.h
 recordmcount
index bbc53920d4dd699bcb62f88a3c528b299876cfbc..a1ddefa1f55f45849001fb1794712bc284ad2c1d 100644 (file)
@@ -417,30 +417,66 @@ struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
 If you do this, the additional irq_chip will be set up by gpiolib at the
 same time as setting up the rest of the GPIO functionality. The following
 is a typical example of a chained cascaded interrupt handler using
-the gpio_irq_chip:
+the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions
+call into the core gpiolib code:
 
 .. code-block:: c
 
-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name            = "my_gpio_irq",
+      .irq_ack         = my_gpio_ack_irq,
+      .irq_mask                = my_gpio_mask_irq,
+      .irq_unmask      = my_gpio_unmask_irq,
+      .irq_set_type    = my_gpio_set_irq_type,
+      .flags           = IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
   };
 
   int irq; /* from platform etc */
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   girq->parent_handler = ftgpio_gpio_irq_handler;
   girq->num_parents = 1;
   girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
@@ -458,23 +494,58 @@ the interrupt separately and go with it:
 
 .. code-block:: c
 
-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name            = "my_gpio_irq",
+      .irq_ack         = my_gpio_ack_irq,
+      .irq_mask                = my_gpio_mask_irq,
+      .irq_unmask      = my_gpio_unmask_irq,
+      .irq_set_type    = my_gpio_set_irq_type,
+      .flags           = IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
   };
 
   int irq; /* from platform etc */
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   ret = devm_request_threaded_irq(dev, irq, NULL,
                irq_thread_fn, IRQF_ONESHOT, "my-chip", g);
   if (ret < 0)
@@ -482,7 +553,7 @@ the interrupt separately and go with it:
 
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   /* This will let us handle the parent IRQ in the driver */
   girq->parent_handler = NULL;
   girq->num_parents = 0;
@@ -500,24 +571,61 @@ In this case the typical set-up will look like this:
   /* Typical state container with dynamic irqchip */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
       struct fwnode_handle *fwnode;
   };
 
-  int irq; /* from platform etc */
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+      irq_mask_mask_parent(d);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+
+      irq_mask_unmask_parent(d);
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name            = "my_gpio_irq",
+      .irq_ack         = my_gpio_ack_irq,
+      .irq_mask                = my_gpio_mask_irq,
+      .irq_unmask      = my_gpio_unmask_irq,
+      .irq_set_type    = my_gpio_set_irq_type,
+      .flags           = IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
+  };
+
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   girq->default_type = IRQ_TYPE_NONE;
   girq->handler = handle_bad_irq;
   girq->fwnode = g->fwnode;
@@ -605,8 +713,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .irq_disable() and .irq_enable() callbacks from the
 irqchip.
 
-When using the gpiolib irqchip helpers, these callbacks are automatically
-assigned.
+When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks
+are automatically assigned. This behaviour is deprecated and on its way
+to be removed from the kernel.
 
 
 Real-Time compliance for GPIO IRQ chips
index d477e296bda5f278dbf071baf7fb677b1772a379..311af516a3fd9c42a1eff43a2db4c392d0f449b9 100644 (file)
@@ -424,12 +424,6 @@ How commands are issued
 -----------------------
 
 Internal commands
-    First, qc is allocated and initialized using :c:func:`ata_qc_new_init`.
-    Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry
-    mechanism when qc is not available, internal commands are currently
-    issued only during initialization and error recovery, so no other
-    command is active and allocation is guaranteed to succeed.
-
     Once allocated qc's taskfile is initialized for the command to be
     executed. qc currently has two mechanisms to notify completion. One
     is via ``qc->complete_fn()`` callback and the other is completion
@@ -447,11 +441,6 @@ SCSI commands
     translated. No qc is involved in processing a simulated scmd. The
     result is computed right away and the scmd is completed.
 
-    For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a
-    qc and the scmd is translated into the qc. SCSI midlayer's
-    completion notification function pointer is stored into
-    ``qc->scsidone``.
-
     ``qc->complete_fn()`` callback is used for completion notification. ATA
     commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use
     :c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone``
index 83eafe1a7f68602ea59fb737fbfbed5bf8fcb1b9..ff21a83abe6253b52b9ab834bb51a5087c6af8eb 100644 (file)
@@ -27,5 +27,5 @@
     |       sparc: | TODO |
     |          um: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
index bb1c1801553e0f22913ff442aab03f9151b42985..72e7aadeda7ef9389e269f34018184a2e8d30f0d 100644 (file)
@@ -27,5 +27,5 @@
     |       sparc: |  ok  |
     |          um: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
index 5163a60a1c1ec5c5e42f365b054312535b09a465..c905aa3c1d815685f4afc96319cd441e65a71dde 100644 (file)
@@ -27,5 +27,5 @@
     |       sparc: |  ok  |
     |          um: | TODO |
     |         x86: |  ok  |
-    |      xtensa: | TODO |
+    |      xtensa: |  ok  |
     -----------------------
index 8cc536d08f51fec34cbea15f8283dda76cc72900..b7d42fd65e9d005fc68220026954729dc539a2f0 100644 (file)
@@ -70,12 +70,23 @@ must live on a read-write filesystem because they are independently
 updated and potentially user-installed, so dm-verity cannot be used.
 
 The base fs-verity feature is a hashing mechanism only; actually
-authenticating the files is up to userspace.  However, to meet some
-users' needs, fs-verity optionally supports a simple signature
-verification mechanism where users can configure the kernel to require
-that all fs-verity files be signed by a key loaded into a keyring; see
-`Built-in signature verification`_.  Support for fs-verity file hashes
-in IMA (Integrity Measurement Architecture) policies is also planned.
+authenticating the files may be done by:
+
+* Userspace-only
+
+* Builtin signature verification + userspace policy
+
+  fs-verity optionally supports a simple signature verification
+  mechanism where users can configure the kernel to require that
+  all fs-verity files be signed by a key loaded into a keyring;
+  see `Built-in signature verification`_.
+
+* Integrity Measurement Architecture (IMA)
+
+  IMA supports including fs-verity file digests and signatures in the
+  IMA measurement list and verifying fs-verity based file signatures
+  stored as security.ima xattrs, based on policy.
+
 
 User API
 ========
@@ -653,12 +664,12 @@ weren't already directly answered in other parts of this document.
     hashed and what to do with those hashes, such as log them,
     authenticate them, or add them to a measurement list.
 
-    IMA is planned to support the fs-verity hashing mechanism as an
-    alternative to doing full file hashes, for people who want the
-    performance and security benefits of the Merkle tree based hash.
-    But it doesn't make sense to force all uses of fs-verity to be
-    through IMA.  As a standalone filesystem feature, fs-verity
-    already meets many users' needs, and it's testable like other
+    IMA supports the fs-verity hashing mechanism as an alternative
+    to full file hashes, for those who want the performance and
+    security benefits of the Merkle tree based hash.  However, it
+    doesn't make sense to force all uses of fs-verity to be through
+    IMA.  fs-verity already meets many users' needs even as a
+    standalone filesystem feature, and it's testable like other
     filesystem features e.g. with xfstests.
 
 :Q: Isn't fs-verity useless because the attacker can just modify the
index 061744c436d99e0670462b0627ab1a177b2a3dbf..6a0dd99786f99b4fb0a36bfbb3cb46e4cde94864 100644 (file)
@@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
     HRTIMER:         0          0          0          0
        RCU:      1678       1769       2178       2250
 
-
-1.3 IDE devices in /proc/ide
-----------------------------
-
-The subdirectory /proc/ide contains information about all IDE devices of which
-the kernel  is  aware.  There is one subdirectory for each IDE controller, the
-file drivers  and a link for each IDE device, pointing to the device directory
-in the controller specific subtree.
-
-The file 'drivers' contains general information about the drivers used for the
-IDE devices::
-
-  > cat /proc/ide/drivers
-  ide-cdrom version 4.53
-  ide-disk version 1.08
-
-More detailed  information  can  be  found  in  the  controller  specific
-subdirectories. These  are  named  ide0,  ide1  and  so  on.  Each  of  these
-directories contains the files shown in table 1-6.
-
-
-.. table:: Table 1-6: IDE controller info in  /proc/ide/ide?
-
- ======= =======================================
- File    Content
- ======= =======================================
- channel IDE channel (0 or 1)
- config  Configuration (only for PCI/IDE bridge)
- mate    Mate name
- model   Type/Chipset of IDE controller
- ======= =======================================
-
-Each device  connected  to  a  controller  has  a separate subdirectory in the
-controllers directory.  The  files  listed in table 1-7 are contained in these
-directories.
-
-
-.. table:: Table 1-7: IDE device information
-
- ================ ==========================================
- File             Content
- ================ ==========================================
- cache            The cache
- capacity         Capacity of the medium (in 512Byte blocks)
- driver           driver and version
- geometry         physical and logical geometry
- identify         device identify block
- media            media type
- model            device identifier
- settings         device setup
- smart_thresholds IDE disk management thresholds
- smart_values     IDE disk management values
- ================ ==========================================
-
-The most  interesting  file is ``settings``. This file contains a nice
-overview of the drive parameters::
-
-  # cat /proc/ide/ide0/hda/settings
-  name                    value           min             max             mode
-  ----                    -----           ---             ---             ----
-  bios_cyl                526             0               65535           rw
-  bios_head               255             0               255             rw
-  bios_sect               63              0               63              rw
-  breada_readahead        4               0               127             rw
-  bswap                   0               0               1               r
-  file_readahead          72              0               2097151         rw
-  io_32bit                0               0               3               rw
-  keepsettings            0               0               1               rw
-  max_kb_per_request      122             1               127             rw
-  multcount               0               0               8               rw
-  nice1                   1               0               1               rw
-  nowerr                  0               0               1               rw
-  pio_mode                write-only      0               255             w
-  slow                    0               0               1               rw
-  unmaskirq               0               0               1               rw
-  using_dma               0               0               1               rw
-
-
-1.4 Networking info in /proc/net
+1.3 Networking info in /proc/net
 --------------------------------
 
 The subdirectory  /proc/net  follows  the  usual  pattern. Table 1-8 shows the
@@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
 current slaves of the bond, the link status of the slaves, and how
 many times the slaves link has failed.
 
-1.5 SCSI info
+1.4 SCSI info
 -------------
 
 If you  have  a  SCSI  host adapter in your system, you'll find a subdirectory
@@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
     Total transfers 0 (0 reads and 0 writes)
 
 
-1.6 Parallel port info in /proc/parport
+1.5 Parallel port info in /proc/parport
 ---------------------------------------
 
 The directory  /proc/parport  contains information about the parallel ports of
@@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
            number or none).
  ========= ====================================================================
 
-1.7 TTY info in /proc/tty
+1.6 TTY info in /proc/tty
 -------------------------
 
 Information about  the  available  and actually used tty's can be found in the
@@ -1463,7 +1385,7 @@ To see  which  tty's  are  currently in use, you can simply look into the file
   unknown              /dev/tty        4    1-63 console
 
 
-1.8 Miscellaneous kernel statistics in /proc/stat
+1.7 Miscellaneous kernel statistics in /proc/stat
 -------------------------------------------------
 
 Various pieces   of  information about  kernel activity  are  available in the
@@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
 softirq.
 
 
-1.9 Ext4 file system parameters
+1.8 Ext4 file system parameters
 -------------------------------
 
 Information about mounted ext4 file systems can be found in
@@ -1552,7 +1474,7 @@ in Table 1-12, below.
  mb_groups       details of multiblock allocator buddy cache of free blocks
  ==============  ==========================================================
 
-1.10 /proc/consoles
+1.9 /proc/consoles
 -------------------
 Shows registered system console lines.
 
index 6b213fe9a33e95c6cc5803f9833e1222e852c5ee..394b9f15dce059348673453000fa3c8f5ac5062f 100644 (file)
@@ -306,8 +306,15 @@ Further notes:
 Mount options
 -------------
 
-zonefs define the "errors=<behavior>" mount option to allow the user to specify
-zonefs behavior in response to I/O errors, inode size inconsistencies or zone
+zonefs defines several mount options:
+* errors=<behavior>
+* explicit-open
+
+"errors=<behavior>" option
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The "errors=<behavior>" option mount option allows the user to specify zonefs
+behavior in response to I/O errors, inode size inconsistencies or zone
 condition changes. The defined behaviors are as follow:
 
 * remount-ro (default)
@@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a
 read-only zone discovered at run-time, as indicated in the previous section.
 The size of the zone file is left unchanged from its last updated value.
 
+"explicit-open" option
+~~~~~~~~~~~~~~~~~~~~~~
+
 A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
 the number of zones that can be active, that is, zones that are in the
 implicit open, explicit open or closed conditions.  This potential limitation
@@ -341,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the
 to the device on the last close() of a zone file if the zone is not full nor
 empty.
 
+Runtime sysfs attributes
+------------------------
+
+zonefs defines several sysfs attributes for mounted devices.  All attributes
+are user readable and can be found in the directory /sys/fs/zonefs/<dev>/,
+where <dev> is the name of the mounted zoned block device.
+
+The attributes defined are as follows.
+
+* **max_wro_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that can be open for writing.  This number corresponds
+  to the maximum number of explicitly or implicitly open zones that the device
+  supports.  A value of 0 means that the device has no limit and that any zone
+  (any file) can be open for writing and written at any time, regardless of the
+  state of other zones.  When the *explicit-open* mount option is used, zonefs
+  will fail any open() system call requesting to open a sequential zone file for
+  writing when the number of sequential zone files already open for writing has
+  reached the *max_wro_seq_files* limit.
+* **nr_wro_seq_files**:  This attribute reports the current number of sequential
+  zone files open for writing.  When the "explicit-open" mount option is used,
+  this number can never exceed *max_wro_seq_files*.  If the *explicit-open*
+  mount option is not used, the reported number can be greater than
+  *max_wro_seq_files*.  In such case, it is the responsibility of the
+  application to not write simultaneously more than *max_wro_seq_files*
+  sequential zone files.  Failure to do so can result in write errors.
+* **max_active_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that are in an active state, that is, sequential zone
+  files that are partially writen (not empty nor full) or that have a zone that
+  is explicitly open (which happens only if the *explicit-open* mount option is
+  used).  This number is always equal to the maximum number of active zones that
+  the device supports.  A value of 0 means that the mounted device has no limit
+  on the number of sequential zone files that can be active.
+* **nr_active_seq_files**:  This attributes reports the current number of
+  sequential zone files that are active. If *max_active_seq_files* is not 0,
+  then the value of *nr_active_seq_files* can never exceed the value of
+  *nr_active_seq_files*, regardless of the use of the *explicit-open* mount
+  option.
+
 Zonefs User Space Tools
 =======================
 
index 47fb4d6d45570643949ab4678bcdd5229292e690..6b62425ef9cd298efcca014c74ee358c79a17c89 100644 (file)
@@ -167,8 +167,7 @@ The table below shows an example of its usage::
         Name (_DSD, Package () {
             ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
             Package () {
-                Package () {"interrupt-names",
-                Package (2) {"default", "alert"}},
+                Package () { "interrupt-names", Package () { "default", "alert" } },
             }
         ...
         })
index 3373e27b707d686e540a2d9ebbf0a682201fd374..717e28226cde9cda46d4bcaa45935468662176ee 100644 (file)
@@ -6,7 +6,9 @@ Kernel driver aquacomputer-d5next
 Supported devices:
 
 * Aquacomputer D5 Next watercooling pump
+* Aquacomputer Farbwerk RGB controller
 * Aquacomputer Farbwerk 360 RGB controller
+* Aquacomputer Octo fan controller
 
 Author: Aleksa Savic
 
@@ -28,7 +30,10 @@ seems to require sending it a complete configuration. That includes addressable
 RGB LEDs, for which there is no standard sysfs interface. Thus, that task is
 better suited for userspace tools.
 
-The Farbwerk 360 exposes four temperature sensors. Depending on the device,
+The Octo exposes four temperature sensors and eight PWM controllable fans, along
+with their speed (in RPM), power, voltage and current.
+
+The Farbwerk and Farbwerk 360 expose four temperature sensors. Depending on the device,
 not all sysfs and debugfs entries will be available.
 
 Usage notes
index e7e8f1640f457ba52d9d5f754cccd06f6d7079ea..78ca69eda877890fbfd42a3caaa64b21a4049a9c 100644 (file)
@@ -4,17 +4,20 @@ Kernel driver asus_ec_sensors
 =================================
 
 Supported boards:
- * PRIME X570-PRO,
- * Pro WS X570-ACE,
- * ROG CROSSHAIR VIII DARK HERO,
+ * PRIME X470-PRO
+ * PRIME X570-PRO
+ * Pro WS X570-ACE
+ * ProArt X570-CREATOR WIFI
+ * ROG CROSSHAIR VIII DARK HERO
  * ROG CROSSHAIR VIII HERO (WI-FI)
- * ROG CROSSHAIR VIII FORMULA,
- * ROG CROSSHAIR VIII HERO,
- * ROG CROSSHAIR VIII IMPACT,
- * ROG STRIX B550-E GAMING,
- * ROG STRIX B550-I GAMING,
- * ROG STRIX X570-E GAMING,
- * ROG STRIX X570-F GAMING,
+ * ROG CROSSHAIR VIII FORMULA
+ * ROG CROSSHAIR VIII HERO
+ * ROG CROSSHAIR VIII IMPACT
+ * ROG STRIX B550-E GAMING
+ * ROG STRIX B550-I GAMING
+ * ROG STRIX X570-E GAMING
+ * ROG STRIX X570-E GAMING WIFI II
+ * ROG STRIX X570-F GAMING
  * ROG STRIX X570-I GAMING
 
 Authors:
@@ -52,3 +55,5 @@ Module Parameters
                the path is mostly identical for them). If ASUS changes this path
                in a future BIOS update, this parameter can be used to override
                the stored in the driver value until it gets updated.
+               A special string ":GLOBAL_LOCK" can be passed to use the ACPI
+               global lock instead of a dedicated mutex.
index d3323a96665d6fa7ad16b1e7949a0f904c568060..e5d85e40972c287d63be6e5a439f9e5626b79af1 100644 (file)
@@ -86,6 +86,13 @@ probe the BIOS on your machine and discover the appropriate codes.
 
 Again, when you find new codes, we'd be happy to have your patches!
 
+``thermal`` interface
+---------------------------
+
+The driver also exports the fans as thermal cooling devices with
+``type`` set to ``dell-smm-fan[1-3]``. This allows for easy fan control
+using one of the thermal governors.
+
 Module parameters
 -----------------
 
@@ -324,6 +331,8 @@ Reading of fan types causes erratic fan behaviour.      Studio XPS 8000
 
                                                         Inspiron 580
 
+                                                        Inspiron 3505
+
 Fan-related SMM calls take too long (about 500ms).      Inspiron 7720
 
                                                         Vostro 3360
index c41eb61081036d05c13aefaa6ec7387eff87dc33..f3276b3a381a14256f409cb41355df481f2b53e7 100644 (file)
@@ -50,6 +50,10 @@ register/unregister functions::
 
   void devm_hwmon_device_unregister(struct device *dev);
 
+  char *hwmon_sanitize_name(const char *name);
+
+  char *devm_hwmon_sanitize_name(struct device *dev, const char *name);
+
 hwmon_device_register_with_groups registers a hardware monitoring device.
 The first parameter of this function is a pointer to the parent device.
 The name parameter is a pointer to the hwmon device name. The registration
@@ -72,7 +76,7 @@ hwmon_device_register_with_info is the most comprehensive and preferred means
 to register a hardware monitoring device. It creates the standard sysfs
 attributes in the hardware monitoring core, letting the driver focus on reading
 from and writing to the chip instead of having to bother with sysfs attributes.
-The parent device parameter cannot be NULL with non-NULL chip info. Its
+The parent device parameter as well as the chip parameter must not be NULL. Its
 parameters are described in more detail below.
 
 devm_hwmon_device_register_with_info is similar to
@@ -95,6 +99,18 @@ All supported hwmon device registration functions only accept valid device
 names. Device names including invalid characters (whitespace, '*', or '-')
 will be rejected. The 'name' parameter is mandatory.
 
+If the driver doesn't use a static device name (for example it uses
+dev_name()), and therefore cannot make sure the name only contains valid
+characters, hwmon_sanitize_name can be used. This convenience function
+will duplicate the string and replace any invalid characters with an
+underscore. It will allocate memory for the new string and it is the
+responsibility of the caller to release the memory when the device is
+removed.
+
+devm_hwmon_sanitize_name is the resource managed version of
+hwmon_sanitize_name; the memory will be freed automatically on device
+removal.
+
 Using devm_hwmon_device_register_with_info()
 --------------------------------------------
 
index 863b76289159708fa2321fd928bfd5d838e02e19..a72c16872ec2d4147fb4599c4c010d3917045157 100644 (file)
@@ -90,6 +90,7 @@ Hardware Monitoring Kernel Drivers
    jc42
    k10temp
    k8temp
+   lan966x
    lineage-pem
    lm25066
    lm63
@@ -223,6 +224,7 @@ Hardware Monitoring Kernel Drivers
    wm8350
    xgene-hwmon
    xdpe12284
+   xdpe152c4
    zl6100
 
 .. only::  subproject and html
diff --git a/Documentation/hwmon/lan966x.rst b/Documentation/hwmon/lan966x.rst
new file mode 100644 (file)
index 0000000..1d1724a
--- /dev/null
@@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver lan966x-hwmon
+===========================
+
+Supported chips:
+
+  * Microchip LAN9668 (sensor in SoC)
+
+    Prefix: 'lan9668-hwmon'
+
+    Datasheet: https://microchip-ung.github.io/lan9668_reginfo
+
+Authors:
+
+       Michael Walle <michael@walle.cc>
+
+Description
+-----------
+
+This driver implements support for the Microchip LAN9668 on-chip
+temperature sensor as well as its fan controller. It provides one
+temperature sensor and one fan controller. The temperature range
+of the sensor is specified from -40 to +125 degrees Celsius and
+its accuracy is +/- 5 degrees Celsius. The fan controller has a
+tacho input and a PWM output with a customizable PWM output
+frequency ranging from ~20Hz to ~650kHz.
+
+No alarms are supported by the SoC.
+
+The driver exports temperature values, fan tacho input and PWM
+settings via the following sysfs files:
+
+**temp1_input**
+
+**fan1_input**
+
+**pwm1**
+
+**pwm1_freq**
index 92c0a7d7808c1176c7e84c29ca060cf5ab59a59c..6a4eef8efbaf48fa4a9cae685a3b809ab0e11ae4 100644 (file)
@@ -21,6 +21,14 @@ Supported chips:
 
     Datasheet: Not published
 
+  * Maxim MAX16602
+
+    Prefix: 'max16602'
+
+    Addresses scanned: -
+
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX16602.pdf
+
 Author: Guenter Roeck <linux@roeck-us.net>
 
 
diff --git a/Documentation/hwmon/xdpe152c4.rst b/Documentation/hwmon/xdpe152c4.rst
new file mode 100644 (file)
index 0000000..ab92c32
--- /dev/null
@@ -0,0 +1,118 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver xdpe152
+=====================
+
+Supported chips:
+
+  * Infineon XDPE152C4
+
+    Prefix: 'xdpe152c4'
+
+  * Infineon XDPE15284
+
+    Prefix: 'xdpe15284'
+
+Authors:
+
+    Greg Schwendimann <greg.schwendimann@infineon.com>
+
+Description
+-----------
+
+This driver implements support for Infineon Digital Multi-phase Controller
+XDPE152C4 and XDPE15284 dual loop voltage regulators.
+The devices are compliant with:
+
+- Intel VR13, VR13HC and VR14 rev 1.86
+  converter specification.
+- Intel SVID rev 1.93. protocol.
+- PMBus rev 1.3.1 interface.
+
+Devices support linear format for reading input and output voltage, input
+and output current, input and output power and temperature.
+
+Devices support two pages for telemetry.
+
+The driver provides for current: input, maximum and critical thresholds
+and maximum and critical alarms. Low Critical thresholds and Low critical alarm are
+supported only for current output.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "iin" and 3, 4 for "iout":
+
+**curr[1-4]_crit**
+
+**curr[1-4]_crit_alarm**
+
+**curr[1-4]_input**
+
+**curr[1-4]_label**
+
+**curr[1-4]_max**
+
+**curr[1-4]_max_alarm**
+
+**curr[3-4]_lcrit**
+
+**curr[3-4]_lcrit_alarm**
+
+**curr[3-4]_rated_max**
+
+The driver provides for voltage: input, critical and low critical thresholds
+and critical and low critical alarms.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "vin" and 3, 4 for "vout":
+
+**in[1-4]_min**
+
+**in[1-4]_crit**
+
+**in[1-4_crit_alarm**
+
+**in[1-4]_input**
+
+**in[1-4]_label**
+
+**in[1-4]_max**
+
+**in[1-4]_max_alarm**
+
+**in[1-4]_min**
+
+**in[1-4]_min_alarm**
+
+**in[3-4]_lcrit**
+
+**in[3-4]_lcrit_alarm**
+
+**in[3-4]_rated_max**
+
+**in[3-4]_rated_min**
+
+The driver provides for power: input and alarms.
+The driver exports the following attributes for via the sysfs files, where
+indexes 1, 2 are for "pin" and 3, 4 for "pout":
+
+**power[1-2]_alarm**
+
+**power[1-4]_input**
+
+**power[1-4]_label**
+
+**power[1-4]_max**
+
+**power[1-4]_rated_max**
+
+The driver provides for temperature: input, maximum and critical thresholds
+and maximum and critical alarms.
+The driver exports the following attributes for via the sysfs files:
+
+**temp[1-2]_crit**
+
+**temp[1-2]_crit_alarm**
+
+**temp[1-2]_input**
+
+**temp[1-2]_max**
+
+**temp[1-2]_max_alarm**
index 3b25655e441bc77cfcb825df10f6d64e2de1e6a8..071f0151a7a4e2414edeafb5d41f349cb46f55d6 100644 (file)
@@ -99,10 +99,10 @@ unreproducible parts can be treated as sources:
 Structure randomisation
 -----------------------
 
-If you enable ``CONFIG_GCC_PLUGIN_RANDSTRUCT``, you will need to
-pre-generate the random seed in
-``scripts/gcc-plugins/randomize_layout_seed.h`` so the same value
-is used in rebuilds.
+If you enable ``CONFIG_RANDSTRUCT``, you will need to pre-generate
+the random seed in ``scripts/basic/randstruct.seed`` so the same
+value is used by each build. See ``scripts/gen-randstruct-seed.sh``
+for details.
 
 Debug info conflicts
 --------------------
index 49549aab41b42e6fce3f183edcd4653067a3abfd..feb257b7f3501e5f70da3d4ce62c8fafb5f9ca20 100644 (file)
@@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
 (static + dynamic). These power values might be coming directly from
 experiments and measurements.
 
+Registration of 'artificial' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is an option to provide a custom callback for drivers missing detailed
+knowledge about power value for each performance state. The callback
+.get_cost() is optional and provides the 'cost' values used by the EAS.
+This is useful for platforms that only provide information on relative
+efficiency between CPU types, where one could use the information to
+create an abstract power model. But even an abstract power model can
+sometimes be hard to fit in, given the input power value size restrictions.
+The .get_cost() allows to provide the 'cost' values which reflect the
+efficiency of the CPUs. This would allow to provide EAS information which
+has different relation than what would be forced by the EM internal
+formulas calculating 'cost' values. To register an EM for such platform, the
+driver must set the flag 'milliwatts' to 0, provide .get_power() callback
+and provide .get_cost() callback. The EM framework would handle such platform
+properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
+platform. Special care should be taken by other frameworks which are using EM
+to test and treat this flag properly.
+
 Registration of 'simple' EM
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -181,8 +201,8 @@ EM framework::
 
   -> drivers/cpufreq/foo_cpufreq.c
 
-  01   static int est_power(unsigned long *mW, unsigned long *KHz,
-  02                   struct device *dev)
+  01   static int est_power(struct device *dev, unsigned long *mW,
+  02                   unsigned long *KHz)
   03   {
   04           long freq, power;
   05
index 6f8f36e10e8ba2ee62f2917b295b7a61103e4f4c..95999302d279fe4e96bede18aeaba84bceccc253 100644 (file)
@@ -244,10 +244,11 @@ disclosure of a particular issue, unless requested by a response team or by
 an involved disclosed party. The current ambassadors list:
 
   ============= ========================================================
-  ARM           Grant Likely <grant.likely@arm.com>
   AMD          Tom Lendacky <tom.lendacky@amd.com>
-  IBM Z         Christian Borntraeger <borntraeger@de.ibm.com>
-  IBM Power     Anton Blanchard <anton@linux.ibm.com>
+  Ampere       Darren Hart <darren@os.amperecomputing.com>
+  ARM          Catalin Marinas <catalin.marinas@arm.com>
+  IBM Power    Anton Blanchard <anton@linux.ibm.com>
+  IBM Z                Christian Borntraeger <borntraeger@de.ibm.com>
   Intel                Tony Luck <tony.luck@intel.com>
   Qualcomm     Trilok Soni <tsoni@codeaurora.org>
 
index c74f4a81588b24c55e99a194419c5f393be742de..572a3289c9cbf3b893404b01fe372403ee6010c7 100644 (file)
@@ -437,6 +437,20 @@ in a private repository which allows interested people to easily pull the
 series for testing. The usual way to offer this is a git URL in the cover
 letter of the patch series.
 
+Testing
+^^^^^^^
+
+Code should be tested before submitting to the tip maintainers.  Anything
+other than minor changes should be built, booted and tested with
+comprehensive (and heavyweight) kernel debugging options enabled.
+
+These debugging options can be found in kernel/configs/x86_debug.config
+and can be added to an existing kernel config by running:
+
+       make x86_debug.config
+
+Some of these options are x86-specific and can be left out when testing
+on other architectures.
 
 Coding style notes
 ------------------
index 1a91d92950a79a4a7e3684d99dc8522f22a234e8..15b4add314fc9f1660940e7ecfe5d4f601777f00 100644 (file)
@@ -66,12 +66,13 @@ descriptors by adding their identifier to the format string
    calculated with the SHA1 or MD5 hash algorithm;
  - 'n': the name of the event (i.e. the file name), with size up to 255 bytes;
  - 'd-ng': the digest of the event, calculated with an arbitrary hash
-   algorithm (field format: [<hash algo>:]digest, where the digest
-   prefix is shown only if the hash algorithm is not SHA1 or MD5);
+   algorithm (field format: <hash algo>:digest);
+ - 'd-ngv2': same as d-ng, but prefixed with the "ima" or "verity" digest type
+   (field format: <digest type>:<hash algo>:digest);
  - 'd-modsig': the digest of the event without the appended modsig;
  - 'n-ng': the name of the event, without size limitations;
- - 'sig': the file signature, or the EVM portable signature if the file
-   signature is not found;
+ - 'sig': the file signature, based on either the file's/fsverity's digest[1],
+   or the EVM portable signature, if 'security.ima' contains a file hash.
  - 'modsig' the appended file signature;
  - 'buf': the buffer data that was used to generate the hash without size limitations;
  - 'evmsig': the EVM portable signature;
@@ -88,7 +89,9 @@ Below, there is the list of defined template descriptors:
 
  - "ima": its format is ``d|n``;
  - "ima-ng" (default): its format is ``d-ng|n-ng``;
+ - "ima-ngv2": its format is ``d-ngv2|n-ng``;
  - "ima-sig": its format is ``d-ng|n-ng|sig``;
+ - "ima-sigv2": its format is ``d-ngv2|n-ng|sig``;
  - "ima-buf": its format is ``d-ng|n-ng|buf``;
  - "ima-modsig": its format is ``d-ng|n-ng|sig|d-modsig|modsig``;
  - "evm-sig": its format is ``d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode``;
index 16335de04e8c6d55e8832a86f6c7b653cdace85e..6ed8d2fa6f9ef6ff647e3f74c4f1bc9a25f07659 100644 (file)
@@ -17,3 +17,4 @@ Security Documentation
    tpm/index
    digsig
    landlock
+   secrets/index
index f614dad7de12f90aec85b32955986bd76126a39f..0bfb4c33974890db4bfa4bcd91d5c52730104d54 100644 (file)
@@ -35,6 +35,13 @@ safe.
          Rooted to Hardware Unique Key (HUK) which is generally burnt in on-chip
          fuses and is accessible to TEE only.
 
+     (3) CAAM (Cryptographic Acceleration and Assurance Module: IP on NXP SoCs)
+
+         When High Assurance Boot (HAB) is enabled and the CAAM is in secure
+         mode, trust is rooted to the OTPMK, a never-disclosed 256-bit key
+         randomly generated and fused into each SoC at manufacturing time.
+         Otherwise, a common fixed test key is used instead.
+
   *  Execution isolation
 
      (1) TPM
@@ -46,6 +53,10 @@ safe.
          Customizable set of operations running in isolated execution
          environment verified via Secure/Trusted boot process.
 
+     (3) CAAM
+
+         Fixed set of operations running in isolated execution environment.
+
   * Optional binding to platform integrity state
 
      (1) TPM
@@ -63,6 +74,11 @@ safe.
          Relies on Secure/Trusted boot process for platform integrity. It can
          be extended with TEE based measured boot process.
 
+     (3) CAAM
+
+         Relies on the High Assurance Boot (HAB) mechanism of NXP SoCs
+         for platform integrity.
+
   *  Interfaces and APIs
 
      (1) TPM
@@ -74,10 +90,13 @@ safe.
          TEEs have well-documented, standardized client interface and APIs. For
          more details refer to ``Documentation/staging/tee.rst``.
 
+     (3) CAAM
+
+         Interface is specific to silicon vendor.
 
   *  Threat model
 
-     The strength and appropriateness of a particular TPM or TEE for a given
+     The strength and appropriateness of a particular trust source for a given
      purpose must be assessed when using them to protect security-relevant data.
 
 
@@ -87,22 +106,32 @@ Key Generation
 Trusted Keys
 ------------
 
-New keys are created from random numbers generated in the trust source. They
-are encrypted/decrypted using a child key in the storage key hierarchy.
-Encryption and decryption of the child key must be protected by a strong
-access control policy within the trust source.
+New keys are created from random numbers. They are encrypted/decrypted using
+a child key in the storage key hierarchy. Encryption and decryption of the
+child key must be protected by a strong access control policy within the
+trust source. The random number generator in use differs according to the
+selected trust source:
 
-  *  TPM (hardware device) based RNG
+  *  TPM: hardware device based RNG
 
-     Strength of random numbers may vary from one device manufacturer to
-     another.
+     Keys are generated within the TPM. Strength of random numbers may vary
+     from one device manufacturer to another.
 
-  *  TEE (OP-TEE based on Arm TrustZone) based RNG
+  *  TEE: OP-TEE based on Arm TrustZone based RNG
 
      RNG is customizable as per platform needs. It can either be direct output
      from platform specific hardware RNG or a software based Fortuna CSPRNG
      which can be seeded via multiple entropy sources.
 
+  *  CAAM: Kernel RNG
+
+     The normal kernel random number generator is used. To seed it from the
+     CAAM HWRNG, enable CRYPTO_DEV_FSL_CAAM_RNG_API and ensure the device
+     is probed.
+
+Users may override this by specifying ``trusted.rng=kernel`` on the kernel
+command-line to override the used RNG with the kernel's random number pool.
+
 Encrypted Keys
 --------------
 
@@ -189,6 +218,19 @@ Usage::
 specific to TEE device implementation.  The key length for new keys is always
 in bytes. Trusted Keys can be 32 - 128 bytes (256 - 1024 bits).
 
+Trusted Keys usage: CAAM
+------------------------
+
+Usage::
+
+    keyctl add trusted name "new keylen" ring
+    keyctl add trusted name "load hex_blob" ring
+    keyctl print keyid
+
+"keyctl print" returns an ASCII hex copy of the sealed key, which is in a
+CAAM-specific format.  The key length for new keys is always in bytes.
+Trusted Keys can be 32 - 128 bytes (256 - 1024 bits).
+
 Encrypted Keys usage
 --------------------
 
index 3df68cb1d10fa7bf542632c92397bfa6f87a1838..5c77730b44791e0c9fbca8ca939973c07c3c3022 100644 (file)
@@ -7,7 +7,7 @@ Landlock LSM: kernel documentation
 ==================================
 
 :Author: Mickaël Salaün
-:Date: March 2021
+:Date: May 2022
 
 Landlock's goal is to create scoped access-control (i.e. sandboxing).  To
 harden a whole system, this feature should be available to any process,
@@ -42,6 +42,21 @@ Guiding principles for safe access controls
 * Computation related to Landlock operations (e.g. enforcing a ruleset) shall
   only impact the processes requesting them.
 
+Design choices
+==============
+
+Filesystem access rights
+------------------------
+
+All access rights are tied to an inode and what can be accessed through it.
+Reading the content of a directory doesn't imply to be allowed to read the
+content of a listed inode.  Indeed, a file name is local to its parent
+directory, and an inode can be referenced by multiple file names thanks to
+(hard) links.  Being able to unlink a file only has a direct impact on the
+directory, not the unlinked inode.  This is the reason why
+`LANDLOCK_ACCESS_FS_REMOVE_FILE` or `LANDLOCK_ACCESS_FS_REFER` are not allowed
+to be tied to files but only to directories.
+
 Tests
 =====
 
diff --git a/Documentation/security/secrets/coco.rst b/Documentation/security/secrets/coco.rst
new file mode 100644 (file)
index 0000000..262e7ab
--- /dev/null
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Confidential Computing secrets
+==============================
+
+This document describes how Confidential Computing secret injection is handled
+from the firmware to the operating system, in the EFI driver and the efi_secret
+kernel module.
+
+
+Introduction
+============
+
+Confidential Computing (coco) hardware such as AMD SEV (Secure Encrypted
+Virtualization) allows guest owners to inject secrets into the VMs
+memory without the host/hypervisor being able to read them.  In SEV,
+secret injection is performed early in the VM launch process, before the
+guest starts running.
+
+The efi_secret kernel module allows userspace applications to access these
+secrets via securityfs.
+
+
+Secret data flow
+================
+
+The guest firmware may reserve a designated memory area for secret injection,
+and publish its location (base GPA and length) in the EFI configuration table
+under a ``LINUX_EFI_COCO_SECRET_AREA_GUID`` entry
+(``adf956ad-e98c-484c-ae11-b51c7d336447``).  This memory area should be marked
+by the firmware as ``EFI_RESERVED_TYPE``, and therefore the kernel should not
+be use it for its own purposes.
+
+During the VM's launch, the virtual machine manager may inject a secret to that
+area.  In AMD SEV and SEV-ES this is performed using the
+``KVM_SEV_LAUNCH_SECRET`` command (see [sev]_).  The strucutre of the injected
+Guest Owner secret data should be a GUIDed table of secret values; the binary
+format is described in ``drivers/virt/coco/efi_secret/efi_secret.c`` under
+"Structure of the EFI secret area".
+
+On kernel start, the kernel's EFI driver saves the location of the secret area
+(taken from the EFI configuration table) in the ``efi.coco_secret`` field.
+Later it checks if the secret area is populated: it maps the area and checks
+whether its content begins with ``EFI_SECRET_TABLE_HEADER_GUID``
+(``1e74f542-71dd-4d66-963e-ef4287ff173b``).  If the secret area is populated,
+the EFI driver will autoload the efi_secret kernel module, which exposes the
+secrets to userspace applications via securityfs.  The details of the
+efi_secret filesystem interface are in [secrets-coco-abi]_.
+
+
+Application usage example
+=========================
+
+Consider a guest performing computations on encrypted files.  The Guest Owner
+provides the decryption key (= secret) using the secret injection mechanism.
+The guest application reads the secret from the efi_secret filesystem and
+proceeds to decrypt the files into memory and then performs the needed
+computations on the content.
+
+In this example, the host can't read the files from the disk image
+because they are encrypted.  Host can't read the decryption key because
+it is passed using the secret injection mechanism (= secure channel).
+Host can't read the decrypted content from memory because it's a
+confidential (memory-encrypted) guest.
+
+Here is a simple example for usage of the efi_secret module in a guest
+to which an EFI secret area with 4 secrets was injected during launch::
+
+       # ls -la /sys/kernel/security/secrets/coco
+       total 0
+       drwxr-xr-x 2 root root 0 Jun 28 11:54 .
+       drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+       -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+       -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+       -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+       -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+       # hd /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+       00000000  74 68 65 73 65 2d 61 72  65 2d 74 68 65 2d 6b 61  |these-are-the-ka|
+       00000010  74 61 2d 73 65 63 72 65  74 73 00 01 02 03 04 05  |ta-secrets......|
+       00000020  06 07                                             |..|
+       00000022
+
+       # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+       # ls -la /sys/kernel/security/secrets/coco
+       total 0
+       drwxr-xr-x 2 root root 0 Jun 28 11:55 .
+       drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+       -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+       -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+       -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+
+References
+==========
+
+See [sev-api-spec]_ for more info regarding SEV ``LAUNCH_SECRET`` operation.
+
+.. [sev] Documentation/virt/kvm/amd-memory-encryption.rst
+.. [secrets-coco-abi] Documentation/ABI/testing/securityfs-secrets-coco
+.. [sev-api-spec] https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf
diff --git a/Documentation/security/secrets/index.rst b/Documentation/security/secrets/index.rst
new file mode 100644 (file)
index 0000000..ced34e9
--- /dev/null
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Secrets documentation
+=====================
+
+.. toctree::
+
+   coco
index 682948fc88a34a42a647bc90dae310c1ee6c5803..2ad91dbebd7cb61e0056421c5ab117bb4e5a00cb 100644 (file)
@@ -718,6 +718,9 @@ CDROMPLAYBLK
 
 
 CDROMGETSPINDOWN
+       Obsolete, was ide-cd only
+
+
        usage::
 
          char spindown;
@@ -736,6 +739,9 @@ CDROMGETSPINDOWN
 
 
 CDROMSETSPINDOWN
+       Obsolete, was ide-cd only
+
+
        usage::
 
          char spindown
index f35552ff19ba82441d7dcb2a7e2d89f6d9284c0b..b8ea5949396481776392399e2cc34ffc41148e4a 100644 (file)
@@ -1,14 +1,14 @@
 .. SPDX-License-Identifier: GPL-2.0
 .. Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
 .. Copyright © 2019-2020 ANSSI
-.. Copyright © 2021 Microsoft Corporation
+.. Copyright © 2021-2022 Microsoft Corporation
 
 =====================================
 Landlock: unprivileged access control
 =====================================
 
 :Author: Mickaël Salaün
-:Date: March 2021
+:Date: May 2022
 
 The goal of Landlock is to enable to restrict ambient rights (e.g. global
 filesystem access) for a set of processes.  Because Landlock is a stackable
@@ -18,6 +18,13 @@ is expected to help mitigate the security impact of bugs or
 unexpected/malicious behaviors in user space applications.  Landlock empowers
 any process, including unprivileged ones, to securely restrict themselves.
 
+We can quickly make sure that Landlock is enabled in the running system by
+looking for "landlock: Up and running" in kernel logs (as root): ``dmesg | grep
+landlock || journalctl -kg landlock`` .  Developers can also easily check for
+Landlock support with a :ref:`related system call <landlock_abi_versions>`.  If
+Landlock is not currently supported, we need to :ref:`configure the kernel
+appropriately <kernel_support>`.
+
 Landlock rules
 ==============
 
@@ -29,14 +36,15 @@ the thread enforcing it, and its future children.
 Defining and enforcing a security policy
 ----------------------------------------
 
-We first need to create the ruleset that will contain our rules.  For this
+We first need to define the ruleset that will contain our rules.  For this
 example, the ruleset will contain rules that only allow read actions, but write
 actions will be denied.  The ruleset then needs to handle both of these kind of
-actions.
+actions.  This is required for backward and forward compatibility (i.e. the
+kernel and user space may not know each other's supported restrictions), hence
+the need to be explicit about the denied-by-default access rights.
 
 .. code-block:: c
 
-    int ruleset_fd;
     struct landlock_ruleset_attr ruleset_attr = {
         .handled_access_fs =
             LANDLOCK_ACCESS_FS_EXECUTE |
@@ -51,9 +59,34 @@ actions.
             LANDLOCK_ACCESS_FS_MAKE_SOCK |
             LANDLOCK_ACCESS_FS_MAKE_FIFO |
             LANDLOCK_ACCESS_FS_MAKE_BLOCK |
-            LANDLOCK_ACCESS_FS_MAKE_SYM,
+            LANDLOCK_ACCESS_FS_MAKE_SYM |
+            LANDLOCK_ACCESS_FS_REFER,
     };
 
+Because we may not know on which kernel version an application will be
+executed, it is safer to follow a best-effort security approach.  Indeed, we
+should try to protect users as much as possible whatever the kernel they are
+using.  To avoid binary enforcement (i.e. either all security features or
+none), we can leverage a dedicated Landlock command to get the current version
+of the Landlock ABI and adapt the handled accesses.  Let's check if we should
+remove the `LANDLOCK_ACCESS_FS_REFER` access right which is only supported
+starting with the second version of the ABI.
+
+.. code-block:: c
+
+    int abi;
+
+    abi = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
+    if (abi < 2) {
+        ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER;
+    }
+
+This enables to create an inclusive ruleset that will contain our rules.
+
+.. code-block:: c
+
+    int ruleset_fd;
+
     ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
     if (ruleset_fd < 0) {
         perror("Failed to create a ruleset");
@@ -92,6 +125,11 @@ descriptor.
         return 1;
     }
 
+It may also be required to create rules following the same logic as explained
+for the ruleset creation, by filtering access rights according to the Landlock
+ABI version.  In this example, this is not required because
+`LANDLOCK_ACCESS_FS_REFER` is not allowed by any rule.
+
 We now have a ruleset with one rule allowing read access to ``/usr`` while
 denying all other handled accesses for the filesystem.  The next step is to
 restrict the current thread from gaining more privileges (e.g. thanks to a SUID
@@ -125,6 +163,27 @@ ruleset.
 
 Full working code can be found in `samples/landlock/sandboxer.c`_.
 
+Good practices
+--------------
+
+It is recommended setting access rights to file hierarchy leaves as much as
+possible.  For instance, it is better to be able to have ``~/doc/`` as a
+read-only hierarchy and ``~/tmp/`` as a read-write hierarchy, compared to
+``~/`` as a read-only hierarchy and ``~/tmp/`` as a read-write hierarchy.
+Following this good practice leads to self-sufficient hierarchies that don't
+depend on their location (i.e. parent directories).  This is particularly
+relevant when we want to allow linking or renaming.  Indeed, having consistent
+access rights per directory enables to change the location of such directory
+without relying on the destination directory access rights (except those that
+are required for this operation, see `LANDLOCK_ACCESS_FS_REFER` documentation).
+Having self-sufficient hierarchies also helps to tighten the required access
+rights to the minimal set of data.  This also helps avoid sinkhole directories,
+i.e.  directories where data can be linked to but not linked from.  However,
+this depends on data organization, which might not be controlled by developers.
+In this case, granting read-write access to ``~/tmp/``, instead of write-only
+access, would potentially allow to move ``~/tmp/`` to a non-readable directory
+and still keep the ability to list the content of ``~/tmp/``.
+
 Layers of file path access rights
 ---------------------------------
 
@@ -192,6 +251,58 @@ To be allowed to use :manpage:`ptrace(2)` and related syscalls on a target
 process, a sandboxed process should have a subset of the target process rules,
 which means the tracee must be in a sub-domain of the tracer.
 
+Compatibility
+=============
+
+Backward and forward compatibility
+----------------------------------
+
+Landlock is designed to be compatible with past and future versions of the
+kernel.  This is achieved thanks to the system call attributes and the
+associated bitflags, particularly the ruleset's `handled_access_fs`.  Making
+handled access right explicit enables the kernel and user space to have a clear
+contract with each other.  This is required to make sure sandboxing will not
+get stricter with a system update, which could break applications.
+
+Developers can subscribe to the `Landlock mailing list
+<https://subspace.kernel.org/lists.linux.dev.html>`_ to knowingly update and
+test their applications with the latest available features.  In the interest of
+users, and because they may use different kernel versions, it is strongly
+encouraged to follow a best-effort security approach by checking the Landlock
+ABI version at runtime and only enforcing the supported features.
+
+.. _landlock_abi_versions:
+
+Landlock ABI versions
+---------------------
+
+The Landlock ABI version can be read with the sys_landlock_create_ruleset()
+system call:
+
+.. code-block:: c
+
+    int abi;
+
+    abi = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
+    if (abi < 0) {
+        switch (errno) {
+        case ENOSYS:
+            printf("Landlock is not supported by the current kernel.\n");
+            break;
+        case EOPNOTSUPP:
+            printf("Landlock is currently disabled.\n");
+            break;
+        }
+        return 0;
+    }
+    if (abi >= 2) {
+        printf("Landlock supports LANDLOCK_ACCESS_FS_REFER.\n");
+    }
+
+The following kernel interfaces are implicitly supported by the first ABI
+version.  Features only supported from a specific version are explicitly marked
+as such.
+
 Kernel interface
 ================
 
@@ -228,21 +339,6 @@ Enforcing a ruleset
 Current limitations
 ===================
 
-File renaming and linking
--------------------------
-
-Because Landlock targets unprivileged access controls, it is needed to properly
-handle composition of rules.  Such property also implies rules nesting.
-Properly handling multiple layers of ruleset, each one of them able to restrict
-access to files, also implies to inherit the ruleset restrictions from a parent
-to its hierarchy.  Because files are identified and restricted by their
-hierarchy, moving or linking a file from one directory to another implies to
-propagate the hierarchy constraints.  To protect against privilege escalations
-through renaming or linking, and for the sake of simplicity, Landlock currently
-limits linking and renaming to the same directory.  Future Landlock evolutions
-will enable more flexibility for renaming and linking, with dedicated ruleset
-flags.
-
 Filesystem topology modification
 --------------------------------
 
@@ -267,8 +363,8 @@ restrict such paths with dedicated ruleset flags.
 Ruleset layers
 --------------
 
-There is a limit of 64 layers of stacked rulesets.  This can be an issue for a
-task willing to enforce a new ruleset in complement to its 64 inherited
+There is a limit of 16 layers of stacked rulesets.  This can be an issue for a
+task willing to enforce a new ruleset in complement to its 16 inherited
 rulesets.  Once this limit is reached, sys_landlock_restrict_self() returns
 E2BIG.  It is then strongly suggested to carefully build rulesets once in the
 life of a thread, especially for applications able to launch other applications
@@ -281,6 +377,44 @@ Memory usage
 Kernel memory allocated to create rulesets is accounted and can be restricted
 by the Documentation/admin-guide/cgroup-v1/memory.rst.
 
+Previous limitations
+====================
+
+File renaming and linking (ABI 1)
+---------------------------------
+
+Because Landlock targets unprivileged access controls, it needs to properly
+handle composition of rules.  Such property also implies rules nesting.
+Properly handling multiple layers of rulesets, each one of them able to
+restrict access to files, also implies inheritance of the ruleset restrictions
+from a parent to its hierarchy.  Because files are identified and restricted by
+their hierarchy, moving or linking a file from one directory to another implies
+propagation of the hierarchy constraints, or restriction of these actions
+according to the potentially lost constraints.  To protect against privilege
+escalations through renaming or linking, and for the sake of simplicity,
+Landlock previously limited linking and renaming to the same directory.
+Starting with the Landlock ABI version 2, it is now possible to securely
+control renaming and linking thanks to the new `LANDLOCK_ACCESS_FS_REFER`
+access right.
+
+.. _kernel_support:
+
+Kernel support
+==============
+
+Landlock was first introduced in Linux 5.13 but it must be configured at build
+time with `CONFIG_SECURITY_LANDLOCK=y`.  Landlock must also be enabled at boot
+time as the other security modules.  The list of security modules enabled by
+default is set with `CONFIG_LSM`.  The kernel configuration should then
+contains `CONFIG_LSM=landlock,[...]` with `[...]`  as the list of other
+potentially useful security modules for the running system (see the
+`CONFIG_LSM` help).
+
+If the running kernel doesn't have `landlock` in `CONFIG_LSM`, then we can
+still enable it by adding ``lsm=landlock,[...]`` to
+Documentation/admin-guide/kernel-parameters.rst thanks to the bootloader
+configuration.
+
 Questions and answers
 =====================
 
index 539e9d4a4860c2405fac7cd63341a2c148c643be..d1e2b9193f09844d8d35ab6cbe048f9b0a897052 100644 (file)
@@ -271,6 +271,16 @@ notifying process it will be replaced. The supervisor can also add an FD, and
 respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
 value will be the injected file descriptor number.
 
+The notifying process can be preempted, resulting in the notification being
+aborted. This can be problematic when trying to take actions on behalf of the
+notifying process that are long-running and typically retryable (mounting a
+filesytem). Alternatively, at filter installation time, the
+``SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV`` flag can be set. This flag makes it
+such that when a user notification is received by the supervisor, the notifying
+process will ignore non-fatal signals until the response is sent. Signals that
+are sent prior to the notification being received by userspace are handled
+normally.
+
 It is worth noting that ``struct seccomp_data`` contains the values of register
 arguments to the syscall, but does not contain pointers to memory. The task's
 memory is accessible to suitably privileged traces via ``ptrace()`` or
diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst
new file mode 100644 (file)
index 0000000..bf593e8
--- /dev/null
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+The Definitive SEV Guest API Documentation
+===================================================================
+
+1. General description
+======================
+
+The SEV API is a set of ioctls that are used by the guest or hypervisor
+to get or set a certain aspect of the SEV virtual machine. The ioctls belong
+to the following classes:
+
+ - Hypervisor ioctls: These query and set global attributes which affect the
+   whole SEV firmware.  These ioctl are used by platform provisioning tools.
+
+ - Guest ioctls: These query and set attributes of the SEV virtual machine.
+
+2. API description
+==================
+
+This section describes ioctls that is used for querying the SEV guest report
+from the SEV firmware. For each ioctl, the following information is provided
+along with a description:
+
+  Technology:
+      which SEV technology provides this ioctl. SEV, SEV-ES, SEV-SNP or all.
+
+  Type:
+      hypervisor or guest. The ioctl can be used inside the guest or the
+      hypervisor.
+
+  Parameters:
+      what parameters are accepted by the ioctl.
+
+  Returns:
+      the return value.  General error numbers (-ENOMEM, -EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device.
+The ioctl accepts struct snp_user_guest_request. The input and output structure is
+specified through the req_data and resp_data field respectively. If the ioctl fails
+to execute due to a firmware error, then fw_err code will be set otherwise the
+fw_err will be set to 0x00000000000000ff.
+
+The firmware checks that the message sequence counter is one greater than
+the guests message sequence counter. If guest driver fails to increment message
+counter (e.g. counter overflow), then -EIO will be returned.
+
+::
+
+        struct snp_guest_request_ioctl {
+                /* Message version number */
+                __u32 msg_version;
+
+                /* Request and response structure address */
+                __u64 req_data;
+                __u64 resp_data;
+
+                /* firmware error code on failure (see psp-sev.h) */
+                __u64 fw_err;
+        };
+
+2.1 SNP_GET_REPORT
+------------------
+
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_REPORT ioctl can be used to query the attestation report from the
+SEV-SNP firmware. The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command
+provided by the SEV-SNP firmware to query the attestation report.
+
+On success, the snp_report_resp.data will contains the report. The report
+contain the format described in the SEV-SNP specification. See the SEV-SNP
+specification for further details.
+
+2.2 SNP_GET_DERIVED_KEY
+-----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_derived_key_req
+:Returns (out): struct snp_derived_key_resp on success, -negative on error
+
+The SNP_GET_DERIVED_KEY ioctl can be used to get a key derive from a root key.
+The derived key can be used by the guest for any purpose, such as sealing keys
+or communicating with external entities.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_KEY_REQ) command provided by the
+SEV-SNP firmware to derive the key. See SEV-SNP specification for further details
+on the various fields passed in the key derivation request.
+
+On success, the snp_derived_key_resp.data contains the derived key value. See
+the SEV-SNP specification for further details.
+
+
+2.3 SNP_GET_EXT_REPORT
+----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in/out): struct snp_ext_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_EXT_REPORT ioctl is similar to the SNP_GET_REPORT. The difference is
+related to the additional certificate data that is returned with the report.
+The certificate data returned is being provided by the hypervisor through the
+SNP_SET_EXT_CONFIG.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command provided by the SEV-SNP
+firmware to get the attestation report.
+
+On success, the snp_ext_report_resp.data will contain the attestation report
+and snp_ext_report_req.certs_address will contain the certificate blob. If the
+length of the blob is smaller than expected then snp_ext_report_req.certs_len will
+be updated with the expected value.
+
+See GHCB specification for further detail on how to parse the certificate blob.
+
+3. SEV-SNP CPUID Enforcement
+============================
+
+SEV-SNP guests can access a special page that contains a table of CPUID values
+that have been validated by the PSP as part of the SNP_LAUNCH_UPDATE firmware
+command. It provides the following assurances regarding the validity of CPUID
+values:
+
+ - Its address is obtained via bootloader/firmware (via CC blob), and those
+   binaries will be measured as part of the SEV-SNP attestation report.
+ - Its initial state will be encrypted/pvalidated, so attempts to modify
+   it during run-time will result in garbage being written, or #VC exceptions
+   being generated due to changes in validation state if the hypervisor tries
+   to swap the backing page.
+ - Attempts to bypass PSP checks by the hypervisor by using a normal page, or
+   a non-CPUID encrypted page will change the measurement provided by the
+   SEV-SNP attestation report.
+ - The CPUID page contents are *not* measured, but attempts to modify the
+   expected contents of a CPUID page as part of guest initialization will be
+   gated by the PSP CPUID enforcement policy checks performed on the page
+   during SNP_LAUNCH_UPDATE, and noticeable later if the guest owner
+   implements their own checks of the CPUID values.
+
+It is important to note that this last assurance is only useful if the kernel
+has taken care to make use of the SEV-SNP CPUID throughout all stages of boot.
+Otherwise, guest owner attestation provides no assurance that the kernel wasn't
+fed incorrect values at some point during boot.
+
+
+Reference
+---------
+
+SEV-SNP and GHCB specification: developer.amd.com/sev
+
+The driver is based on SEV-SNP firmware spec 0.9 and GHCB spec version 2.0.
index edea7fea95a84f1a2f990bd37cd4d5a176590b5b..492f0920b9885c828a6be62461315b3bdb9ce5cb 100644 (file)
@@ -13,6 +13,7 @@ Linux Virtualization Support
    guest-halt-polling
    ne_overview
    acrn/index
+   coco/sev-guest
 
 .. only:: html and subproject
 
index 4a900cdbc62e96a9f644bde210af299c3ce3f0c6..c8e2e9cd84dcf44829f1f5bb899151cda28afc18 100644 (file)
@@ -5713,6 +5713,8 @@ affect the device's behavior. Current defined flags::
   #define KVM_RUN_X86_SMM     (1 << 0)
   /* x86, set if bus lock detected in VM */
   #define KVM_RUN_BUS_LOCK    (1 << 1)
+  /* arm64, set for KVM_EXIT_DEBUG */
+  #define KVM_DEBUG_ARCH_HSR_HIGH_VALID  (1 << 0)
 
 ::
 
index 5d54c39a063ff2c637d311357291ea0d68db2b27..08246e8ac83542064456ea14493aeeaf32023ce2 100644 (file)
@@ -140,9 +140,8 @@ from #define X86_FEATURE_UMIP (16*32 + 2).
 
 In addition, there exists a variety of custom command-line parameters that
 disable specific features. The list of parameters includes, but is not limited
-to, nofsgsbase, nosmap, and nosmep. 5-level paging can also be disabled using
-"no5lvl". SMAP and SMEP are disabled with the aforementioned parameters,
-respectively.
+to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
+"no5lvl".
 
 e: The feature was known to be non-functional.
 ----------------------------------------------
diff --git a/Documentation/x86/ifs.rst b/Documentation/x86/ifs.rst
new file mode 100644 (file)
index 0000000..97abb69
--- /dev/null
@@ -0,0 +1,2 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. kernel-doc:: drivers/platform/x86/intel/ifs/ifs.h
index 91b2fa4566184c00f7620a4394497be0c0bee1b3..ba4f90e3819d1cabbb9b2bcd8e7281c79ebbe21f 100644 (file)
@@ -26,6 +26,7 @@ x86-specific Documentation
    intel_txt
    amd-memory-encryption
    amd_hsmp
+   tdx
    pti
    mds
    microcode
@@ -35,6 +36,7 @@ x86-specific Documentation
    usb-legacy-support
    i386/index
    x86_64/index
+   ifs
    sva
    sgx
    features
diff --git a/Documentation/x86/tdx.rst b/Documentation/x86/tdx.rst
new file mode 100644 (file)
index 0000000..b8fa432
--- /dev/null
@@ -0,0 +1,218 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Intel Trust Domain Extensions (TDX)
+=====================================
+
+Intel's Trust Domain Extensions (TDX) protect confidential guest VMs from
+the host and physical attacks by isolating the guest register state and by
+encrypting the guest memory. In TDX, a special module running in a special
+mode sits between the host and the guest and manages the guest/host
+separation.
+
+Since the host cannot directly access guest registers or memory, much
+normal functionality of a hypervisor must be moved into the guest. This is
+implemented using a Virtualization Exception (#VE) that is handled by the
+guest kernel. A #VE is handled entirely inside the guest kernel, but some
+require the hypervisor to be consulted.
+
+TDX includes new hypercall-like mechanisms for communicating from the
+guest to the hypervisor or the TDX module.
+
+New TDX Exceptions
+==================
+
+TDX guests behave differently from bare-metal and traditional VMX guests.
+In TDX guests, otherwise normal instructions or memory accesses can cause
+#VE or #GP exceptions.
+
+Instructions marked with an '*' conditionally cause exceptions.  The
+details for these instructions are discussed below.
+
+Instruction-based #VE
+---------------------
+
+- Port I/O (INS, OUTS, IN, OUT)
+- HLT
+- MONITOR, MWAIT
+- WBINVD, INVD
+- VMCALL
+- RDMSR*,WRMSR*
+- CPUID*
+
+Instruction-based #GP
+---------------------
+
+- All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH,
+  VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON
+- ENCLS, ENCLU
+- GETSEC
+- RSM
+- ENQCMD
+- RDMSR*,WRMSR*
+
+RDMSR/WRMSR Behavior
+--------------------
+
+MSR access behavior falls into three categories:
+
+- #GP generated
+- #VE generated
+- "Just works"
+
+In general, the #GP MSRs should not be used in guests.  Their use likely
+indicates a bug in the guest.  The guest may try to handle the #GP with a
+hypercall but it is unlikely to succeed.
+
+The #VE MSRs are typically able to be handled by the hypervisor.  Guests
+can make a hypercall to the hypervisor to handle the #VE.
+
+The "just works" MSRs do not need any special guest handling.  They might
+be implemented by directly passing through the MSR to the hardware or by
+trapping and handling in the TDX module.  Other than possibly being slow,
+these MSRs appear to function just as they would on bare metal.
+
+CPUID Behavior
+--------------
+
+For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID
+return values (in guest EAX/EBX/ECX/EDX) are configurable by the
+hypervisor. For such cases, the Intel TDX module architecture defines two
+virtualization types:
+
+- Bit fields for which the hypervisor controls the value seen by the guest
+  TD.
+
+- Bit fields for which the hypervisor configures the value such that the
+  guest TD either sees their native value or a value of 0.  For these bit
+  fields, the hypervisor can mask off the native values, but it can not
+  turn *on* values.
+
+A #VE is generated for CPUID leaves and sub-leaves that the TDX module does
+not know how to handle. The guest kernel may ask the hypervisor for the
+value with a hypercall.
+
+#VE on Memory Accesses
+======================
+
+There are essentially two classes of TDX memory: private and shared.
+Private memory receives full TDX protections.  Its content is protected
+against access from the hypervisor.  Shared memory is expected to be
+shared between guest and hypervisor and does not receive full TDX
+protections.
+
+A TD guest is in control of whether its memory accesses are treated as
+private or shared.  It selects the behavior with a bit in its page table
+entries.  This helps ensure that a guest does not place sensitive
+information in shared memory, exposing it to the untrusted hypervisor.
+
+#VE on Shared Memory
+--------------------
+
+Access to shared mappings can cause a #VE.  The hypervisor ultimately
+controls whether a shared memory access causes a #VE, so the guest must be
+careful to only reference shared pages it can safely handle a #VE.  For
+instance, the guest should be careful not to access shared memory in the
+#VE handler before it reads the #VE info structure (TDG.VP.VEINFO.GET).
+
+Shared mapping content is entirely controlled by the hypervisor. The guest
+should only use shared mappings for communicating with the hypervisor.
+Shared mappings must never be used for sensitive memory content like kernel
+stacks.  A good rule of thumb is that hypervisor-shared memory should be
+treated the same as memory mapped to userspace.  Both the hypervisor and
+userspace are completely untrusted.
+
+MMIO for virtual devices is implemented as shared memory.  The guest must
+be careful not to access device MMIO regions unless it is also prepared to
+handle a #VE.
+
+#VE on Private Pages
+--------------------
+
+An access to private mappings can also cause a #VE.  Since all kernel
+memory is also private memory, the kernel might theoretically need to
+handle a #VE on arbitrary kernel memory accesses.  This is not feasible, so
+TDX guests ensure that all guest memory has been "accepted" before memory
+is used by the kernel.
+
+A modest amount of memory (typically 512M) is pre-accepted by the firmware
+before the kernel runs to ensure that the kernel can start up without
+being subjected to a #VE.
+
+The hypervisor is permitted to unilaterally move accepted pages to a
+"blocked" state. However, if it does this, page access will not generate a
+#VE.  It will, instead, cause a "TD Exit" where the hypervisor is required
+to handle the exception.
+
+Linux #VE handler
+=================
+
+Just like page faults or #GP's, #VE exceptions can be either handled or be
+fatal.  Typically, an unhandled userspace #VE results in a SIGSEGV.
+An unhandled kernel #VE results in an oops.
+
+Handling nested exceptions on x86 is typically nasty business.  A #VE
+could be interrupted by an NMI which triggers another #VE and hilarity
+ensues.  The TDX #VE architecture anticipated this scenario and includes a
+feature to make it slightly less nasty.
+
+During #VE handling, the TDX module ensures that all interrupts (including
+NMIs) are blocked.  The block remains in place until the guest makes a
+TDG.VP.VEINFO.GET TDCALL.  This allows the guest to control when interrupts
+or a new #VE can be delivered.
+
+However, the guest kernel must still be careful to avoid potential
+#VE-triggering actions (discussed above) while this block is in place.
+While the block is in place, any #VE is elevated to a double fault (#DF)
+which is not recoverable.
+
+MMIO handling
+=============
+
+In non-TDX VMs, MMIO is usually implemented by giving a guest access to a
+mapping which will cause a VMEXIT on access, and then the hypervisor
+emulates the access.  That is not possible in TDX guests because VMEXIT
+will expose the register state to the host. TDX guests don't trust the host
+and can't have their state exposed to the host.
+
+In TDX, MMIO regions typically trigger a #VE exception in the guest.  The
+guest #VE handler then emulates the MMIO instruction inside the guest and
+converts it into a controlled TDCALL to the host, rather than exposing
+guest state to the host.
+
+MMIO addresses on x86 are just special physical addresses. They can
+theoretically be accessed with any instruction that accesses memory.
+However, the kernel instruction decoding method is limited. It is only
+designed to decode instructions like those generated by io.h macros.
+
+MMIO access via other means (like structure overlays) may result in an
+oops.
+
+Shared Memory Conversions
+=========================
+
+All TDX guest memory starts out as private at boot.  This memory can not
+be accessed by the hypervisor.  However, some kernel users like device
+drivers might have a need to share data with the hypervisor.  To do this,
+memory must be converted between shared and private.  This can be
+accomplished using some existing memory encryption helpers:
+
+ * set_memory_decrypted() converts a range of pages to shared.
+ * set_memory_encrypted() converts memory back to private.
+
+Device drivers are the primary user of shared memory, but there's no need
+to touch every driver. DMA buffers and ioremap() do the conversions
+automatically.
+
+TDX uses SWIOTLB for most DMA allocations. The SWIOTLB buffer is
+converted to shared on boot.
+
+For coherent DMA allocation, the DMA buffer gets converted on the
+allocation. Check force_dma_unencrypted() for details.
+
+References
+==========
+
+TDX reference material is collected here:
+
+https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html
index 07aa0007f346e86085def5a0a70311de051c3e02..03ec9cf011812a4c31c08ae5dd327a600bf6d633 100644 (file)
@@ -157,15 +157,6 @@ Rebooting
      newer BIOS, or newer board) using this option will ignore the built-in
      quirk table, and use the generic default reboot actions.
 
-Non Executable Mappings
-=======================
-
-  noexec=on|off
-    on
-      Enable(default)
-    off
-      Disable
-
 NUMA
 ====
 
@@ -310,3 +301,17 @@ Miscellaneous
     Do not use GB pages for kernel direct mappings.
   gbpages
     Use GB pages for kernel direct mappings.
+
+
+AMD SEV (Secure Encrypted Virtualization)
+=========================================
+Options relating to AMD SEV, specified via the following format:
+
+::
+
+   sev=option1[,option2]
+
+The available options are:
+
+   debug
+     Enable debug messages.
index f088f58816668fa455a0317b068e3200b471f68f..45aa9cceb4f1940eb9cda3a7176805986008c8ce 100644 (file)
@@ -19,6 +19,7 @@ Offset/Size   Proto   Name                    Meaning
 058/008                ALL     tboot_addr              Physical address of tboot shared page
 060/010                ALL     ist_info                Intel SpeedStep (IST) BIOS support information
                                                (struct ist_info)
+070/008                ALL     acpi_rsdp_addr          Physical address of ACPI RSDP table
 080/010                ALL     hd0_info                hd0 disk parameter, OBSOLETE!!
 090/010                ALL     hd1_info                hd1 disk parameter, OBSOLETE!!
 0A0/010                ALL     sys_desc_table          System description table (struct sys_desc_table),
@@ -27,6 +28,7 @@ Offset/Size   Proto   Name                    Meaning
 0C0/004                ALL     ext_ramdisk_image       ramdisk_image high 32bits
 0C4/004                ALL     ext_ramdisk_size        ramdisk_size high 32bits
 0C8/004                ALL     ext_cmd_line_ptr        cmd_line_ptr high 32bits
+13C/004                ALL     cc_blob_address         Physical address of Confidential Computing blob
 140/080                ALL     edid_info               Video mode setup (struct edid_info)
 1C0/020                ALL     efi_info                EFI 32 information (struct efi_info)
 1E0/004                ALL     alt_mem_k               Alternative mem check, in KB
index e8c52d0192a6144527b62cb5263d9e3c41a27b40..3ad3328b8a461a451884f0c95af3af7e95a9e3c1 100644 (file)
@@ -1044,7 +1044,6 @@ F:        arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
 F:     drivers/net/ethernet/amd/xgbe/
 
 AMD SENSOR FUSION HUB DRIVER
-M:     Nehal Shah <nehal-bakulchandra.shah@amd.com>
 M:     Basavaraj Natikar <basavaraj.natikar@amd.com>
 L:     linux-input@vger.kernel.org
 S:     Maintained
@@ -1447,6 +1446,7 @@ F:        drivers/media/i2c/aptina-pll.*
 
 AQUACOMPUTER D5 NEXT PUMP SENSOR DRIVER
 M:     Aleksa Savic <savicaleksa83@gmail.com>
+M:     Jack Doan <me@jackdoan.com>
 L:     linux-hwmon@vger.kernel.org
 S:     Maintained
 F:     Documentation/hwmon/aquacomputer_d5next.rst
@@ -3101,6 +3101,16 @@ S:       Maintained
 F:     Documentation/devicetree/bindings/mmc/aspeed,sdhci.yaml
 F:     drivers/mmc/host/sdhci-of-aspeed*
 
+ASPEED SMC SPI DRIVER
+M:     Chin-Ting Kuo <chin-ting_kuo@aspeedtech.com>
+M:     Cédric Le Goater <clg@kaod.org>
+L:     linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
+L:     openbmc@lists.ozlabs.org (moderated for non-subscribers)
+L:     linux-spi@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
+F:     drivers/spi/spi-aspeed-smc.c
+
 ASPEED VIDEO ENGINE DRIVER
 M:     Eddie James <eajames@linux.ibm.com>
 L:     linux-media@vger.kernel.org
@@ -3571,8 +3581,9 @@ M:        Andy Gospodarek <andy@greyhouse.net>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://sourceforge.net/projects/bonding/
+F:     Documentation/networking/bonding.rst
 F:     drivers/net/bonding/
-F:     include/net/bonding.h
+F:     include/net/bond*
 F:     include/uapi/linux/if_bonding.h
 
 BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
@@ -4574,7 +4585,9 @@ L:        keyrings@vger.kernel.org
 S:     Maintained
 F:     Documentation/admin-guide/module-signing.rst
 F:     certs/
+F:     scripts/check-blacklist-hashes.awk
 F:     scripts/sign-file.c
+F:     tools/certs/
 
 CFAG12864B LCD DRIVER
 M:     Miguel Ojeda <ojeda@kernel.org>
@@ -5439,6 +5452,7 @@ F:        net/ax25/sysctl_net_ax25.c
 
 DATA ACCESS MONITOR
 M:     SeongJae Park <sj@kernel.org>
+L:     damon@lists.linux.dev
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -7383,7 +7397,6 @@ L:        linux-mm@kvack.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
 F:     arch/alpha/kernel/binfmt_loader.c
-F:     arch/x86/ia32/ia32_aout.c
 F:     fs/*binfmt_*.c
 F:     fs/exec.c
 F:     include/linux/binfmts.h
@@ -8752,6 +8765,14 @@ F:       drivers/hid/hid-sensor-*
 F:     drivers/iio/*/hid-*
 F:     include/linux/hid-sensor-*
 
+HID WACOM DRIVER
+M:     Ping Cheng <ping.cheng@wacom.com>
+M:     Jason Gerecke  <jason.gerecke@wacom.com>
+L:     linux-input@vger.kernel.org
+S:     Maintained
+F:     drivers/hid/wacom.h
+F:     drivers/hid/wacom_*
+
 HIGH-RESOLUTION TIMERS, CLOCKEVENTS
 M:     Thomas Gleixner <tglx@linutronix.de>
 L:     linux-kernel@vger.kernel.org
@@ -9862,6 +9883,14 @@ B:       https://bugzilla.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
 F:     drivers/idle/intel_idle.c
 
+INTEL IN FIELD SCAN (IFS) DEVICE
+M:     Jithu Joseph <jithu.joseph@intel.com>
+R:     Ashok Raj <ashok.raj@intel.com>
+R:     Tony Luck <tony.luck@intel.com>
+S:     Maintained
+F:     drivers/platform/x86/intel/ifs
+F:     include/trace/events/intel_ifs.h
+
 INTEL INTEGRATED SENSOR HUB DRIVER
 M:     Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 M:     Jiri Kosina <jikos@kernel.org>
@@ -10131,7 +10160,7 @@ S:      Supported
 F:     drivers/net/wireless/intel/iwlegacy/
 
 INTEL WIRELESS WIFI LINK (iwlwifi)
-M:     Luca Coelho <luciano.coelho@intel.com>
+M:     Gregory Greenman <gregory.greenman@intel.com>
 L:     linux-wireless@vger.kernel.org
 S:     Supported
 W:     https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
@@ -10856,6 +10885,15 @@ S:     Supported
 F:     include/keys/trusted_tee.h
 F:     security/keys/trusted-keys/trusted_tee.c
 
+KEYS-TRUSTED-CAAM
+M:     Ahmad Fatoum <a.fatoum@pengutronix.de>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
+L:     linux-integrity@vger.kernel.org
+L:     keyrings@vger.kernel.org
+S:     Maintained
+F:     include/keys/trusted_caam.h
+F:     security/keys/trusted-keys/trusted_caam.c
+
 KEYS/KEYRINGS
 M:     David Howells <dhowells@redhat.com>
 M:     Jarkko Sakkinen <jarkko@kernel.org>
@@ -11828,7 +11866,7 @@ MARVELL XENON MMC/SD/SDIO HOST CONTROLLER DRIVER
 M:     Hu Ziji <huziji@marvell.com>
 L:     linux-mmc@vger.kernel.org
 S:     Supported
-F:     Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
+F:     Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml
 F:     drivers/mmc/host/sdhci-xenon*
 
 MATROX FRAMEBUFFER DRIVER
@@ -13534,12 +13572,21 @@ M:    Samuel Mendoza-Jonas <sam@mendozajonas.com>
 S:     Maintained
 F:     net/ncsi/
 
-NCT6775 HARDWARE MONITOR DRIVER
+NCT6775 HARDWARE MONITOR DRIVER - CORE & PLATFORM DRIVER
 M:     Guenter Roeck <linux@roeck-us.net>
 L:     linux-hwmon@vger.kernel.org
 S:     Maintained
 F:     Documentation/hwmon/nct6775.rst
-F:     drivers/hwmon/nct6775.c
+F:     drivers/hwmon/nct6775-core.c
+F:     drivers/hwmon/nct6775-platform.c
+F:     drivers/hwmon/nct6775.h
+
+NCT6775 HARDWARE MONITOR DRIVER - I2C DRIVER
+M:     Zev Weiss <zev@bewilderbeest.net>
+L:     linux-hwmon@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml
+F:     drivers/hwmon/nct6775-i2c.c
 
 NETDEVSIM
 M:     Jakub Kicinski <kuba@kernel.org>
@@ -14372,7 +14419,6 @@ F:      arch/arm/*omap*/*pm*
 F:     drivers/cpufreq/omap-cpufreq.c
 
 OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT
-M:     Rajendra Nayak <rnayak@codeaurora.org>
 M:     Paul Walmsley <paul@pwsan.com>
 L:     linux-omap@vger.kernel.org
 S:     Maintained
@@ -15475,7 +15521,8 @@ F:      tools/perf/
 PERFORMANCE EVENTS TOOLING ARM64
 R:     John Garry <john.garry@huawei.com>
 R:     Will Deacon <will@kernel.org>
-R:     Mathieu Poirier <mathieu.poirier@linaro.org>
+R:     James Clark <james.clark@arm.com>
+R:     Mike Leach <mike.leach@linaro.org>
 R:     Leo Yan <leo.yan@linaro.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
@@ -17515,6 +17562,7 @@ R:      Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
 R:     Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
 R:     Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
 R:     Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
+R:     Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
@@ -19541,6 +19589,7 @@ F:      drivers/thermal/
 F:     include/linux/cpu_cooling.h
 F:     include/linux/thermal.h
 F:     include/uapi/linux/thermal.h
+F:     tools/lib/thermal/
 F:     tools/thermal/
 
 THERMAL DRIVER FOR AMLOGIC SOCS
@@ -19840,6 +19889,7 @@ F:      drivers/media/usb/tm6000/
 TMIO/SDHI MMC DRIVER
 M:     Wolfram Sang <wsa+renesas@sang-engineering.com>
 L:     linux-mmc@vger.kernel.org
+L:     linux-renesas-soc@vger.kernel.org
 S:     Supported
 F:     drivers/mmc/host/renesas_sdhi*
 F:     drivers/mmc/host/tmio_mmc*
@@ -19849,6 +19899,7 @@ TMP401 HARDWARE MONITOR DRIVER
 M:     Guenter Roeck <linux@roeck-us.net>
 L:     linux-hwmon@vger.kernel.org
 S:     Maintained
+F:     Documentation/devicetree/bindings/hwmon/ti,tmp401.yaml
 F:     Documentation/hwmon/tmp401.rst
 F:     drivers/hwmon/tmp401.c
 
index 2284d1ca250397f874c7cd85f005a4405e9982bc..bbcd3abf2f4dfad948b526436210682d269a87d7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 18
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION =
 NAME = Superb Owl
 
 # *DOCUMENTATION*
@@ -1011,6 +1011,7 @@ include-$(CONFIG_KASAN)           += scripts/Makefile.kasan
 include-$(CONFIG_KCSAN)                += scripts/Makefile.kcsan
 include-$(CONFIG_UBSAN)                += scripts/Makefile.ubsan
 include-$(CONFIG_KCOV)         += scripts/Makefile.kcov
+include-$(CONFIG_RANDSTRUCT)   += scripts/Makefile.randstruct
 include-$(CONFIG_GCC_PLUGINS)  += scripts/Makefile.gcc-plugins
 
 include $(addprefix $(srctree)/, $(include-y))
@@ -1302,7 +1303,7 @@ install: sub_make_done :=
 # ---------------------------------------------------------------------------
 # Tools
 
-ifdef CONFIG_STACK_VALIDATION
+ifdef CONFIG_OBJTOOL
 prepare: tools/objtool
 endif
 
index 31c4fdc4a4baaa7ad84063c98c30e01d77de349b..763b1b5e4f410cd386df61432a00cb2dd7c1b4c8 100644 (file)
@@ -24,6 +24,13 @@ config KEXEC_ELF
 config HAVE_IMA_KEXEC
        bool
 
+config ARCH_HAS_SUBPAGE_FAULTS
+       bool
+       help
+         Select if the architecture can check permissions at sub-page
+         granularity (e.g. arm64 MTE). The probe_user_*() functions
+         must be implemented.
+
 config HOTPLUG_SMT
        bool
 
@@ -35,6 +42,7 @@ config KPROBES
        depends on MODULES
        depends on HAVE_KPROBES
        select KALLSYMS
+       select TASKS_RCU if PREEMPTION
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
@@ -46,6 +54,7 @@ config JUMP_LABEL
        bool "Optimize very unlikely/likely branches"
        depends on HAVE_ARCH_JUMP_LABEL
        depends on CC_HAS_ASM_GOTO
+       select OBJTOOL if HAVE_JUMP_LABEL_HACK
        help
         This option enables a transparent branch optimization that
         makes certain almost-always-true or almost-always-false branch
@@ -723,10 +732,7 @@ config ARCH_SUPPORTS_CFI_CLANG
 config CFI_CLANG
        bool "Use Clang's Control Flow Integrity (CFI)"
        depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
-       # Clang >= 12:
-       # - https://bugs.llvm.org/show_bug.cgi?id=46258
-       # - https://bugs.llvm.org/show_bug.cgi?id=47479
-       depends on CLANG_VERSION >= 120000
+       depends on CLANG_VERSION >= 140000
        select KALLSYMS
        help
          This option enables Clang’s forward-edge Control Flow Integrity
@@ -1026,11 +1032,23 @@ config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        depends on MMU
        select ARCH_HAS_ELF_RANDOMIZE
 
+config HAVE_OBJTOOL
+       bool
+
+config HAVE_JUMP_LABEL_HACK
+       bool
+
+config HAVE_NOINSTR_HACK
+       bool
+
+config HAVE_NOINSTR_VALIDATION
+       bool
+
 config HAVE_STACK_VALIDATION
        bool
        help
-         Architecture supports the 'objtool check' host tool command, which
-         performs compile-time stack metadata validation.
+         Architecture supports objtool compile-time frame pointer rule
+         validation.
 
 config HAVE_RELIABLE_STACKTRACE
        bool
@@ -1300,6 +1318,7 @@ config HAVE_STATIC_CALL
 config HAVE_STATIC_CALL_INLINE
        bool
        depends on HAVE_STATIC_CALL
+       select OBJTOOL
 
 config HAVE_PREEMPT_DYNAMIC
        bool
index b565cc6f408e95d04c0fe4a1c58df6c16b15a750..f89798da8a1470c4f9e183f2182b106fb78c77d0 100644 (file)
@@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void)
        __asm__ __volatile__ ("rpcc %0" : "=r"(ret));
        return ret;
 }
+#define get_cycles get_cycles
 
 #endif
index 2e8091e2d8a86d12d40c4d83fce2b2eb9ed43edf..0dcf88e7f9cf3b5fe33eea816f90bc5eccd5e248 100644 (file)
@@ -972,6 +972,17 @@ config ARM_ERRATA_764369
          relevant cache maintenance functions and sets a specific bit
          in the diagnostic control register of the SCU.
 
+config ARM_ERRATA_764319
+       bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction"
+       depends on CPU_V7
+       help
+         This option enables the workaround for the 764319 Cortex A-9 erratum.
+         CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an
+         unexpected Undefined Instruction exception when the DBGSWENABLE
+         external pin is set to 0, even when the CP14 accesses are performed
+         from a privileged mode. This work around catches the exception in a
+         way the kernel does not stop execution.
+
 config ARM_ERRATA_775420
        bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
        depends on CPU_V7
index e71ccfd1df631209c17fe9c42da3dbbae09466d8..ff4c07c69af1cd291de0aff45ee2fd3ecaa6b1ac 100644 (file)
        lm25066@40 {
                compatible = "lm25066";
                reg = <0x40>;
+               shunt-resistor-micro-ohms = <1000>;
        };
 
        /* 12VSB PMIC */
        lm25066@41 {
                compatible = "lm25066";
                reg = <0x41>;
+               shunt-resistor-micro-ohms = <10000>;
        };
 };
 
        gpio-line-names =
                /*  A */ "LOCATORLED_STATUS_N", "BMC_MAC2_INTB", "NMI_BTN_N", "BMC_NMI",
                        "", "", "", "",
-               /*  B */ "DDR_MEM_TEMP", "", "", "", "", "", "", "",
+               /*  B */ "POST_COMPLETE_N", "", "", "", "", "", "", "",
                /*  C */ "", "", "", "", "PCIE_HP_SEL_N", "PCIE_SATA_SEL_N", "LOCATORBTN", "",
                /*  D */ "BMC_PSIN", "BMC_PSOUT", "BMC_RESETCON", "RESETCON",
                        "", "", "", "PSU_FAN_FAIL_N",
index e4775bbceecc6143927b3954e3c99d44018a55c4..7cd4f075e32501158f373e2057c76e1126f69d05 100644 (file)
                groups = "FWSPID";
        };
 
-       pinctrl_fwqspid_default: fwqspid_default {
-               function = "FWSPID";
-               groups = "FWQSPID";
+       pinctrl_fwqspi_default: fwqspi_default {
+               function = "FWQSPI";
+               groups = "FWQSPI";
        };
 
        pinctrl_fwspiwp_default: fwspiwp_default {
        };
 
        pinctrl_qspi1_default: qspi1_default {
-               function = "QSPI1";
+               function = "SPI1";
                groups = "QSPI1";
        };
 
        pinctrl_qspi2_default: qspi2_default {
-               function = "QSPI2";
+               function = "SPI2";
                groups = "QSPI2";
        };
 
index 3d5ce9da42c3c2bc282f1df808ae6161e086cba0..9d2a0ce4ca061d7ffd47e501ba11623a43101f71 100644 (file)
                                reg = <0x1e6f2000 0x1000>;
                        };
 
+                       video: video@1e700000 {
+                               compatible = "aspeed,ast2600-video-engine";
+                               reg = <0x1e700000 0x1000>;
+                               clocks = <&syscon ASPEED_CLK_GATE_VCLK>,
+                                        <&syscon ASPEED_CLK_GATE_ECLK>;
+                               clock-names = "vclk", "eclk";
+                               interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+                               status = "disabled";
+                       };
+
                        gpio0: gpio@1e780000 {
                                #gpio-cells = <2>;
                                gpio-controller;
index be882ea0eee4662ecb631648edb2466e06de3dfc..688c9849eec8d937ca2a696084b4b6b8633a9092 100644 (file)
@@ -30,7 +30,6 @@ CONFIG_ARM_APPENDED_DTB=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_COREDUMP is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
index 89f4a6ff30bd1c530f300ccad4e27f558acac0e2..c1e98e33a34878ddd8fc8316d12d5a36f4ac5bec 100644 (file)
@@ -23,7 +23,6 @@ CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_COREDUMP is not set
 # CONFIG_SUSPEND is not set
 CONFIG_NET=y
index 551db328009dd697194718dd349d6223febb0122..71d6bfcf455191696e3b94ef93d1ab6a8c684d86 100644 (file)
@@ -28,7 +28,6 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_XIP_KERNEL=y
 CONFIG_XIP_PHYS_ADDR=0x08008000
 CONFIG_BINFMT_FLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_COREDUMP is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
index a89f035c3b01b16ea96c0df1e99825b35c64da96..70fdbfd83484b79e73d04912a2cf7ac819298a1b 100644 (file)
@@ -18,7 +18,6 @@ CONFIG_XIP_KERNEL=y
 CONFIG_XIP_PHYS_ADDR=0x0f000080
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 # CONFIG_SUSPEND is not set
 # CONFIG_UEVENT_HELPER is not set
 # CONFIG_STANDALONE is not set
index 413abfb42989e9f3edc846aefc1ecef53a02daa1..f82a819eb0dbb49b54e64bd4b3cb3d506316212b 100644 (file)
@@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void)          \
        return read_sysreg(a32);                \
 }                                              \
 
+CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
 CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
 CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
 CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
@@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
 
 /* Low-level accessors */
 
-static inline void gic_write_eoir(u32 irq)
-{
-       write_sysreg(irq, ICC_EOIR1);
-       isb();
-}
-
 static inline void gic_write_dir(u32 val)
 {
        write_sysreg(val, ICC_DIR);
index 34fe8d2dd5d11c7f4451dcc0034e4a20c2c18d79..90fbe4a3f9c8472fe756bebefb7c779a063f0f2c 100644 (file)
@@ -666,12 +666,11 @@ THUMB(    orr     \reg , \reg , #PSR_T_BIT        )
        __adldst_l      str, \src, \sym, \tmp, \cond
        .endm
 
-       .macro          __ldst_va, op, reg, tmp, sym, cond
+       .macro          __ldst_va, op, reg, tmp, sym, cond, offset
 #if __LINUX_ARM_ARCH__ >= 7 || \
     !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
     (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
        mov_l           \tmp, \sym, \cond
-       \op\cond        \reg, [\tmp]
 #else
        /*
         * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -683,24 +682,29 @@ THUMB(    orr     \reg , \reg , #PSR_T_BIT        )
        .reloc          .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
        .reloc          .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
        .reloc          .L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond       \tmp, pc, #8
-.L1_\@: sub\cond       \tmp, \tmp, #4
-.L2_\@: \op\cond       \reg, [\tmp, #0]
+.L0_\@: sub\cond       \tmp, pc, #8 - \offset
+.L1_\@: sub\cond       \tmp, \tmp, #4 - \offset
+.L2_\@:
 #endif
+       \op\cond        \reg, [\tmp, #\offset]
        .endm
 
        /*
         * ldr_va - load a 32-bit word from the virtual address of \sym
         */
-       .macro          ldr_va, rd:req, sym:req, cond
-       __ldst_va       ldr, \rd, \rd, \sym, \cond
+       .macro          ldr_va, rd:req, sym:req, cond, tmp, offset=0
+       .ifnb           \tmp
+       __ldst_va       ldr, \rd, \tmp, \sym, \cond, \offset
+       .else
+       __ldst_va       ldr, \rd, \rd, \sym, \cond, \offset
+       .endif
        .endm
 
        /*
         * str_va - store a 32-bit word to the virtual address of \sym
         */
        .macro          str_va, rn:req, sym:req, tmp:req, cond
-       __ldst_va       str, \rn, \tmp, \sym, \cond
+       __ldst_va       str, \rn, \tmp, \sym, \cond, 0
        .endm
 
        /*
@@ -727,9 +731,11 @@ THUMB(     orr     \reg , \reg , #PSR_T_BIT        )
         *                are permitted to overlap with 'rd' if != sp
         */
        .macro          ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
-#if __LINUX_ARM_ARCH__ >= 7 || \
-    !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
-    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
+#ifndef CONFIG_SMP
+       ldr_va          \rd, \sym, tmp=\t1
+#elif __LINUX_ARM_ARCH__ >= 7 || \
+      !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
+      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
        this_cpu_offset \t1
        mov_l           \t2, \sym
        ldr             \rd, [\t1, \t2]
index 0c70eb688a00cb82484e2f474fcfa0e1ca796ce6..2a0739a2350bef8e2590f7b2d7464e9d60717568 100644 (file)
@@ -440,6 +440,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
 extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
 extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                       unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
 #endif
 
 /*
index cfffae67c04ee0d0601f7727a270ffdccabc103b..5546c9751478c348fb41344073812600afecebc0 100644 (file)
@@ -3,20 +3,10 @@
 #define _ASM_ARM_MODULE_H
 
 #include <asm-generic/module.h>
-
-struct unwind_table;
+#include <asm/unwind.h>
 
 #ifdef CONFIG_ARM_UNWIND
-enum {
-       ARM_SEC_INIT,
-       ARM_SEC_DEVINIT,
-       ARM_SEC_CORE,
-       ARM_SEC_EXIT,
-       ARM_SEC_DEVEXIT,
-       ARM_SEC_HOT,
-       ARM_SEC_UNLIKELY,
-       ARM_SEC_MAX,
-};
+#define ELF_SECTION_UNWIND 0x70000001
 #endif
 
 #define PLT_ENT_STRIDE         L1_CACHE_BYTES
@@ -36,7 +26,8 @@ struct mod_plt_sec {
 
 struct mod_arch_specific {
 #ifdef CONFIG_ARM_UNWIND
-       struct unwind_table *unwind[ARM_SEC_MAX];
+       struct list_head unwind_list;
+       struct unwind_table *init_table;
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
        struct mod_plt_sec      core;
index 7c3b3671d6c25eba4be8cb5275b6f2e4c960921a..6d1337c169cd3c78fc1c227ea23d428ece28ab47 100644 (file)
@@ -11,5 +11,6 @@
 
 typedef unsigned long cycles_t;
 #define get_cycles()   ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
 
 #endif
index 0f8a3439902d0613ac2b4ef98a3561d950ec6a43..b51f85417f58e185edd1ae6b2dd01caaac03b82c 100644 (file)
@@ -24,6 +24,7 @@ struct unwind_idx {
 
 struct unwind_table {
        struct list_head list;
+       struct list_head mod_list;
        const struct unwind_idx *start;
        const struct unwind_idx *origin;
        const struct unwind_idx *stop;
index 06508698abb853fdf05ca391e6fcd798edd52fb3..c39303e5c23470e6a37a91185193b8964794a628 100644 (file)
@@ -61,9 +61,8 @@
        .macro  pabt_helper
        @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
 #ifdef MULTI_PABORT
-       ldr     ip, .LCprocfns
-       mov     lr, pc
-       ldr     pc, [ip, #PROCESSOR_PABT_FUNC]
+       ldr_va  ip, processor, offset=PROCESSOR_PABT_FUNC
+       bl_r    ip
 #else
        bl      CPU_PABORT_HANDLER
 #endif
@@ -82,9 +81,8 @@
        @ the fault status register in r1.  r9 must be preserved.
        @
 #ifdef MULTI_DABORT
-       ldr     ip, .LCprocfns
-       mov     lr, pc
-       ldr     pc, [ip, #PROCESSOR_DABT_FUNC]
+       ldr_va  ip, processor, offset=PROCESSOR_DABT_FUNC
+       bl_r    ip
 #else
        bl      CPU_DABORT_HANDLER
 #endif
@@ -302,16 +300,6 @@ __fiq_svc:
  UNWIND(.fnend         )
 ENDPROC(__fiq_svc)
 
-       .align  5
-.LCcralign:
-       .word   cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
-       .word   processor
-#endif
-.LCfp:
-       .word   fp_enter
-
 /*
  * Abort mode handlers
  */
@@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
  THUMB(        stmia   sp, {r0 - r12}  )
 
  ATRAP(        mrc     p15, 0, r7, c1, c0, 0)
- ATRAP(        ldr     r8, .LCcralign)
+ ATRAP(        ldr_va  r8, cr_alignment)
 
        ldmia   r0, {r3 - r5}
        add     r0, sp, #S_PC           @ here for interlock avoidance
@@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
        str     r3, [sp]                @ save the "real" r0 copied
                                        @ from the exception stack
 
- ATRAP(        ldr     r8, [r8, #0])
-
        @
        @ We are now ready to fill in the remaining blanks on the stack:
        @
@@ -505,9 +491,7 @@ __und_usr_thumb:
  */
 #if __LINUX_ARM_ARCH__ < 7
 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
-       ldr     r5, .LCcpu_architecture
-       ldr     r5, [r5]
+       ldr_va  r5, cpu_architecture
        cmp     r5, #CPU_ARCH_ARMv7
        blo     __und_usr_fault_16              @ 16bit undefined instruction
 /*
@@ -654,12 +638,6 @@ call_fpe:
        ret.w   lr                              @ CP#14 (Debug)
        ret.w   lr                              @ CP#15 (Control)
 
-#ifdef NEED_CPU_ARCHITECTURE
-       .align  2
-.LCcpu_architecture:
-       .word   __cpu_architecture
-#endif
-
 #ifdef CONFIG_NEON
        .align  6
 
@@ -685,9 +663,8 @@ call_fpe:
 #endif
 
 do_fpe:
-       ldr     r4, .LCfp
        add     r10, r10, #TI_FPSTATE           @ r10 = workspace
-       ldr     pc, [r4]                        @ Call FP module USR entry point
+       ldr_va  pc, fp_enter, tmp=r4            @ Call FP module USR entry point
 
 /*
  * The FP module is called with these registers set:
@@ -1101,6 +1078,12 @@ __kuser_helper_end:
  */
        .macro  vector_stub, name, mode, correction=0
        .align  5
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+vector_bhb_bpiall_\name:
+       mcr     p15, 0, r0, c7, c5, 6   @ BPIALL
+       @ isb not needed due to "movs pc, lr" in the vector stub
+       @ which gives a "context synchronisation".
+#endif
 
 vector_\name:
        .if \correction
@@ -1111,7 +1094,8 @@ vector_\name:
        stmia   sp, {r0, lr}            @ save r0, lr
 
        @ Save spsr_<exception> (parent CPSR)
-2:     mrs     lr, spsr
+.Lvec_\name:
+       mrs     lr, spsr
        str     lr, [sp, #8]            @ save spsr
 
        @
@@ -1145,28 +1129,14 @@ vector_bhb_loop8_\name:
 
        @ bhb workaround
        mov     r0, #8
-3:     b       . + 4
+3:     W(b)    . + 4
        subs    r0, r0, #1
        bne     3b
-       dsb
-       isb
-       b       2b
-ENDPROC(vector_bhb_loop8_\name)
-
-vector_bhb_bpiall_\name:
-       .if \correction
-       sub     lr, lr, #\correction
-       .endif
-
-       @ Save r0, lr_<exception> (parent PC)
-       stmia   sp, {r0, lr}
-
-       @ bhb workaround
-       mcr     p15, 0, r0, c7, c5, 6   @ BPIALL
+       dsb     nsh
        @ isb not needed due to "movs pc, lr" in the vector stub
        @ which gives a "context synchronisation".
-       b       2b
-ENDPROC(vector_bhb_bpiall_\name)
+       b       .Lvec_\name
+ENDPROC(vector_bhb_loop8_\name)
        .previous
 #endif
 
@@ -1176,10 +1146,15 @@ ENDPROC(vector_bhb_bpiall_\name)
        .endm
 
        .section .stubs, "ax", %progbits
-       @ This must be the first word
+       @ These need to remain at the start of the section so that
+       @ they are in range of the 'SWI' entries in the vector tables
+       @ located 4k down.
+.L__vector_swi:
        .word   vector_swi
 #ifdef CONFIG_HARDEN_BRANCH_HISTORY
+.L__vector_bhb_loop8_swi:
        .word   vector_bhb_loop8_swi
+.L__vector_bhb_bpiall_swi:
        .word   vector_bhb_bpiall_swi
 #endif
 
@@ -1322,10 +1297,11 @@ vector_addrexcptn:
        .globl  vector_fiq
 
        .section .vectors, "ax", %progbits
-.L__vectors_start:
        W(b)    vector_rst
        W(b)    vector_und
-       W(ldr)  pc, .L__vectors_start + 0x1000
+ARM(   .reloc  ., R_ARM_LDR_PC_G0, .L__vector_swi              )
+THUMB( .reloc  ., R_ARM_THM_PC12, .L__vector_swi               )
+       W(ldr)  pc, .
        W(b)    vector_pabt
        W(b)    vector_dabt
        W(b)    vector_addrexcptn
@@ -1334,10 +1310,11 @@ vector_addrexcptn:
 
 #ifdef CONFIG_HARDEN_BRANCH_HISTORY
        .section .vectors.bhb.loop8, "ax", %progbits
-.L__vectors_bhb_loop8_start:
        W(b)    vector_rst
        W(b)    vector_bhb_loop8_und
-       W(ldr)  pc, .L__vectors_bhb_loop8_start + 0x1004
+ARM(   .reloc  ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi    )
+THUMB( .reloc  ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi     )
+       W(ldr)  pc, .
        W(b)    vector_bhb_loop8_pabt
        W(b)    vector_bhb_loop8_dabt
        W(b)    vector_addrexcptn
@@ -1345,10 +1322,11 @@ vector_addrexcptn:
        W(b)    vector_bhb_loop8_fiq
 
        .section .vectors.bhb.bpiall, "ax", %progbits
-.L__vectors_bhb_bpiall_start:
        W(b)    vector_rst
        W(b)    vector_bhb_bpiall_und
-       W(ldr)  pc, .L__vectors_bhb_bpiall_start + 0x1008
+ARM(   .reloc  ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi   )
+THUMB( .reloc  ., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi    )
+       W(ldr)  pc, .
        W(b)    vector_bhb_bpiall_pabt
        W(b)    vector_bhb_bpiall_dabt
        W(b)    vector_addrexcptn
index 90d40f4d56cfd8b2a4d57ebb5a8f3c3e8e9dec74..7aa3ded4af9292d006e2346efa554187467aa393 100644 (file)
@@ -164,7 +164,7 @@ ENTRY(vector_bhb_loop8_swi)
 1:     b       2f
 2:     subs    r8, r8, #1
        bne     1b
-       dsb
+       dsb     nsh
        isb
        b       3f
 ENDPROC(vector_bhb_loop8_swi)
@@ -198,7 +198,7 @@ ENTRY(vector_swi)
 #endif
        reload_current r10, ip
        zero_fp
-       alignment_trap r10, ip, __cr_alignment
+       alignment_trap r10, ip, cr_alignment
        asm_trace_hardirqs_on save=0
        enable_irq_notrace
        ct_user_exit save=0
@@ -328,14 +328,6 @@ __sys_trace_return:
        bl      syscall_trace_exit
        b       ret_slow_syscall
 
-       .align  5
-#ifdef CONFIG_ALIGNMENT_TRAP
-       .type   __cr_alignment, #object
-__cr_alignment:
-       .word   cr_alignment
-#endif
-       .ltorg
-
        .macro  syscall_table_start, sym
        .equ    __sys_nr, 0
        .type   \sym, #object
index 9a1dc142f7825cae2697ad63c876c4208a70b423..5865621bf6912c701108643a9082ef0086a5a507 100644 (file)
@@ -48,8 +48,7 @@
        .macro  alignment_trap, rtmp1, rtmp2, label
 #ifdef CONFIG_ALIGNMENT_TRAP
        mrc     p15, 0, \rtmp2, c1, c0, 0
-       ldr     \rtmp1, \label
-       ldr     \rtmp1, [\rtmp1]
+       ldr_va  \rtmp1, \label
        teq     \rtmp1, \rtmp2
        mcrne   p15, 0, \rtmp1, c1, c0, 0
 #endif
index b1423fb130ea4a04d5192ec61beb8453b29b04af..054e9199f30db654e3fba1116b48ccf9ae7bafb6 100644 (file)
@@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
        return ret;
 }
 
+#ifdef CONFIG_ARM_ERRATA_764319
+static int oslsr_fault;
+
+static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr)
+{
+       oslsr_fault = 1;
+       instruction_pointer(regs) += 4;
+       return 0;
+}
+
+static struct undef_hook debug_oslsr_hook = {
+       .instr_mask  = 0xffffffff,
+       .instr_val = 0xee115e91,
+       .fn = debug_oslsr_trap,
+};
+#endif
+
 /*
  * One-time initialisation.
  */
@@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void)
        case ARM_DEBUG_ARCH_V7_1:
                return true;
        case ARM_DEBUG_ARCH_V7_ECP14:
+#ifdef CONFIG_ARM_ERRATA_764319
+               oslsr_fault = 0;
+               register_undef_hook(&debug_oslsr_hook);
                ARM_DBG_READ(c1, c1, 4, oslsr);
+               unregister_undef_hook(&debug_oslsr_hook);
+               if (oslsr_fault)
+                       return false;
+#else
+               ARM_DBG_READ(c1, c1, 4, oslsr);
+#endif
                if (oslsr & ARM_OSLSR_OSLM0)
                        return true;
                fallthrough;
index 549abcedf795178a6e17244ed086d64f26f8a890..d59c36dc0494bddf90d10950ab97d76a770a2730 100644 (file)
@@ -459,46 +459,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 #ifdef CONFIG_ARM_UNWIND
        const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
        const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum;
-       struct mod_unwind_map maps[ARM_SEC_MAX];
-       int i;
+       struct list_head *unwind_list = &mod->arch.unwind_list;
 
-       memset(maps, 0, sizeof(maps));
+       INIT_LIST_HEAD(unwind_list);
+       mod->arch.init_table = NULL;
 
        for (s = sechdrs; s < sechdrs_end; s++) {
                const char *secname = secstrs + s->sh_name;
+               const char *txtname;
+               const Elf_Shdr *txt_sec;
 
-               if (!(s->sh_flags & SHF_ALLOC))
+               if (!(s->sh_flags & SHF_ALLOC) ||
+                   s->sh_type != ELF_SECTION_UNWIND)
                        continue;
 
-               if (strcmp(".ARM.exidx.init.text", secname) == 0)
-                       maps[ARM_SEC_INIT].unw_sec = s;
-               else if (strcmp(".ARM.exidx", secname) == 0)
-                       maps[ARM_SEC_CORE].unw_sec = s;
-               else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
-                       maps[ARM_SEC_EXIT].unw_sec = s;
-               else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0)
-                       maps[ARM_SEC_UNLIKELY].unw_sec = s;
-               else if (strcmp(".ARM.exidx.text.hot", secname) == 0)
-                       maps[ARM_SEC_HOT].unw_sec = s;
-               else if (strcmp(".init.text", secname) == 0)
-                       maps[ARM_SEC_INIT].txt_sec = s;
-               else if (strcmp(".text", secname) == 0)
-                       maps[ARM_SEC_CORE].txt_sec = s;
-               else if (strcmp(".exit.text", secname) == 0)
-                       maps[ARM_SEC_EXIT].txt_sec = s;
-               else if (strcmp(".text.unlikely", secname) == 0)
-                       maps[ARM_SEC_UNLIKELY].txt_sec = s;
-               else if (strcmp(".text.hot", secname) == 0)
-                       maps[ARM_SEC_HOT].txt_sec = s;
-       }
+               if (!strcmp(".ARM.exidx", secname))
+                       txtname = ".text";
+               else
+                       txtname = secname + strlen(".ARM.exidx");
+               txt_sec = find_mod_section(hdr, sechdrs, txtname);
+
+               if (txt_sec) {
+                       struct unwind_table *table =
+                               unwind_table_add(s->sh_addr,
+                                               s->sh_size,
+                                               txt_sec->sh_addr,
+                                               txt_sec->sh_size);
 
-       for (i = 0; i < ARM_SEC_MAX; i++)
-               if (maps[i].unw_sec && maps[i].txt_sec)
-                       mod->arch.unwind[i] =
-                               unwind_table_add(maps[i].unw_sec->sh_addr,
-                                                maps[i].unw_sec->sh_size,
-                                                maps[i].txt_sec->sh_addr,
-                                                maps[i].txt_sec->sh_size);
+                       list_add(&table->mod_list, unwind_list);
+
+                       /* save init table for module_arch_freeing_init */
+                       if (strcmp(".ARM.exidx.init.text", secname) == 0)
+                               mod->arch.init_table = table;
+               }
+       }
 #endif
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
        s = find_mod_section(hdr, sechdrs, ".pv_table");
@@ -519,19 +513,27 @@ void
 module_arch_cleanup(struct module *mod)
 {
 #ifdef CONFIG_ARM_UNWIND
-       int i;
+       struct unwind_table *tmp;
+       struct unwind_table *n;
 
-       for (i = 0; i < ARM_SEC_MAX; i++) {
-               unwind_table_del(mod->arch.unwind[i]);
-               mod->arch.unwind[i] = NULL;
+       list_for_each_entry_safe(tmp, n,
+                       &mod->arch.unwind_list, mod_list) {
+               list_del(&tmp->mod_list);
+               unwind_table_del(tmp);
        }
+       mod->arch.init_table = NULL;
 #endif
 }
 
 void __weak module_arch_freeing_init(struct module *mod)
 {
 #ifdef CONFIG_ARM_UNWIND
-       unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]);
-       mod->arch.unwind[ARM_SEC_INIT] = NULL;
+       struct unwind_table *init = mod->arch.init_table;
+
+       if (init) {
+               mod->arch.init_table = NULL;
+               list_del(&init->mod_list);
+               unwind_table_del(init);
+       }
 #endif
 }
index 459abc5d1819537bad3f92696681074839254aaa..ea128e32e8ca8f28ae7ea8805bb986e78c7d6074 100644 (file)
@@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x18);
 static_assert(offsetof(siginfo_t, si_pkey)     == 0x14);
 static_assert(offsetof(siginfo_t, si_perf_data)        == 0x10);
 static_assert(offsetof(siginfo_t, si_perf_type)        == 0x14);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18);
 static_assert(offsetof(siginfo_t, si_band)     == 0x0c);
 static_assert(offsetof(siginfo_t, si_fd)       == 0x10);
 static_assert(offsetof(siginfo_t, si_call_addr)        == 0x0c);
index e5c2fce281cd66f0c6cb13bde87a6ac415b262dd..abdb99fe1e972c44adb427000c9ad2860e9d72fd 100644 (file)
@@ -4,10 +4,7 @@ menuconfig ARCH_SUNXI
        depends on ARCH_MULTI_V5 || ARCH_MULTI_V7
        select ARCH_HAS_RESET_CONTROLLER
        select CLKSRC_MMIO
-       select GENERIC_IRQ_CHIP
        select GPIOLIB
-       select IRQ_DOMAIN_HIERARCHY
-       select IRQ_FASTEOI_HIERARCHY_HANDLERS
        select PINCTRL
        select PM_OPP
        select SUN4I_TIMER
@@ -22,10 +19,12 @@ if ARCH_MULTI_V7
 config MACH_SUN4I
        bool "Allwinner A10 (sun4i) SoCs support"
        default ARCH_SUNXI
+       select SUN4I_INTC
 
 config MACH_SUN5I
        bool "Allwinner A10s / A13 (sun5i) SoCs support"
        default ARCH_SUNXI
+       select SUN4I_INTC
        select SUN5I_HSTIMER
 
 config MACH_SUN6I
@@ -34,6 +33,8 @@ config MACH_SUN6I
        select ARM_GIC
        select MFD_SUN6I_PRCM
        select SUN5I_HSTIMER
+       select SUN6I_R_INTC
+       select SUNXI_NMI_INTC
 
 config MACH_SUN7I
        bool "Allwinner A20 (sun7i) SoCs support"
@@ -43,17 +44,21 @@ config MACH_SUN7I
        select ARCH_SUPPORTS_BIG_ENDIAN
        select HAVE_ARM_ARCH_TIMER
        select SUN5I_HSTIMER
+       select SUNXI_NMI_INTC
 
 config MACH_SUN8I
        bool "Allwinner sun8i Family SoCs support"
        default ARCH_SUNXI
        select ARM_GIC
        select MFD_SUN6I_PRCM
+       select SUN6I_R_INTC
+       select SUNXI_NMI_INTC
 
 config MACH_SUN9I
        bool "Allwinner (sun9i) SoCs support"
        default ARCH_SUNXI
        select ARM_GIC
+       select SUNXI_NMI_INTC
 
 config ARCH_SUNXI_MC_SMP
        bool
@@ -69,6 +74,7 @@ if ARCH_MULTI_V5
 config MACH_SUNIV
        bool "Allwinner ARMv5 F-series (suniv) SoCs support"
        default ARCH_SUNXI
+       select SUN4I_INTC
        help
          Support for Allwinner suniv ARMv5 SoCs.
          (F1C100A, F1C100s, F1C200s, F1C500, F1C600)
index aa08bcb72db935f6e0b8012366936472ab08b10e..290702328a33704f1f1ee552b599231a2c58f122 100644 (file)
@@ -493,3 +493,11 @@ void __init early_ioremap_init(void)
 {
        early_ioremap_setup();
 }
+
+bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                unsigned long flags)
+{
+       unsigned long pfn = PHYS_PFN(offset);
+
+       return memblock_is_map_memory(pfn);
+}
index 06dbfb968182de7c585e41308da09eec15360600..fb9f3eb6bf483d22041a06fc9eb17bdf87866558 100644 (file)
@@ -288,6 +288,7 @@ void cpu_v7_ca15_ibe(void)
 {
        if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
                cpu_v7_spectre_v2_init();
+       cpu_v7_spectre_bhb_init();
 }
 
 void cpu_v7_bugs_init(void)
index ec52b776f9267d1f5c1c3826642db4a991148ce8..8ca1c9f262a22029d7ab908bc26a86b6d517f4ed 100644 (file)
@@ -28,7 +28,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 CFLAGS_REMOVE_vdso.o = -pg
 
 # Force -O2 to avoid libgcc dependencies
-CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(GCC_PLUGINS_CFLAGS)
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS)
 ifeq ($(c-gettimeofday-y),)
 CFLAGS_vgettimeofday.o = -O2
 else
index 20ea89d9ac2fa7cc1564f43ed518a6b18e52270b..d550f5acfaf3f15148f5b51a9f8d360cf8af0d1a 100644 (file)
@@ -262,31 +262,31 @@ config ARM64_CONT_PMD_SHIFT
        default 4
 
 config ARCH_MMAP_RND_BITS_MIN
-       default 14 if ARM64_64K_PAGES
-       default 16 if ARM64_16K_PAGES
-       default 18
+       default 14 if ARM64_64K_PAGES
+       default 16 if ARM64_16K_PAGES
+       default 18
 
 # max bits determined by the following formula:
 #  VA_BITS - PAGE_SHIFT - 3
 config ARCH_MMAP_RND_BITS_MAX
-       default 19 if ARM64_VA_BITS=36
-       default 24 if ARM64_VA_BITS=39
-       default 27 if ARM64_VA_BITS=42
-       default 30 if ARM64_VA_BITS=47
-       default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
-       default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
-       default 33 if ARM64_VA_BITS=48
-       default 14 if ARM64_64K_PAGES
-       default 16 if ARM64_16K_PAGES
-       default 18
+       default 19 if ARM64_VA_BITS=36
+       default 24 if ARM64_VA_BITS=39
+       default 27 if ARM64_VA_BITS=42
+       default 30 if ARM64_VA_BITS=47
+       default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
+       default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
+       default 33 if ARM64_VA_BITS=48
+       default 14 if ARM64_64K_PAGES
+       default 16 if ARM64_16K_PAGES
+       default 18
 
 config ARCH_MMAP_RND_COMPAT_BITS_MIN
-       default 7 if ARM64_64K_PAGES
-       default 9 if ARM64_16K_PAGES
-       default 11
+       default 7 if ARM64_64K_PAGES
+       default 9 if ARM64_16K_PAGES
+       default 11
 
 config ARCH_MMAP_RND_COMPAT_BITS_MAX
-       default 16
+       default 16
 
 config NO_IOPORT_MAP
        def_bool y if !PCI
@@ -313,7 +313,7 @@ config GENERIC_HWEIGHT
        def_bool y
 
 config GENERIC_CSUM
-        def_bool y
+       def_bool y
 
 config GENERIC_CALIBRATE_DELAY
        def_bool y
@@ -1046,8 +1046,7 @@ config SOCIONEXT_SYNQUACER_PREITS
 
          If unsure, say Y.
 
-endmenu
-
+endmenu # "ARM errata workarounds via the alternatives framework"
 
 choice
        prompt "Page size"
@@ -1575,9 +1574,9 @@ config SETEND_EMULATION
          be unexpected results in the applications.
 
          If unsure, say Y
-endif
+endif # ARMV8_DEPRECATED
 
-endif
+endif # COMPAT
 
 menu "ARMv8.1 architectural features"
 
@@ -1602,15 +1601,15 @@ config ARM64_PAN
        bool "Enable support for Privileged Access Never (PAN)"
        default y
        help
-        Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
-        prevents the kernel or hypervisor from accessing user-space (EL0)
-        memory directly.
+         Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
+         prevents the kernel or hypervisor from accessing user-space (EL0)
+         memory directly.
 
-        Choosing this option will cause any unprotected (not using
-        copy_to_user et al) memory access to fail with a permission fault.
+         Choosing this option will cause any unprotected (not using
+         copy_to_user et al) memory access to fail with a permission fault.
 
-        The feature is detected at runtime, and will remain as a 'nop'
-        instruction if the cpu does not implement the feature.
+         The feature is detected at runtime, and will remain as a 'nop'
+         instruction if the cpu does not implement the feature.
 
 config AS_HAS_LDAPR
        def_bool $(as-instr,.arch_extension rcpc)
@@ -1638,15 +1637,15 @@ config ARM64_USE_LSE_ATOMICS
          built with binutils >= 2.25 in order for the new instructions
          to be used.
 
-endmenu
+endmenu # "ARMv8.1 architectural features"
 
 menu "ARMv8.2 architectural features"
 
 config AS_HAS_ARMV8_2
-       def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
+       def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
 
 config AS_HAS_SHA3
-       def_bool $(as-instr,.arch armv8.2-a+sha3)
+       def_bool $(as-instr,.arch armv8.2-a+sha3)
 
 config ARM64_PMEM
        bool "Enable support for persistent memory"
@@ -1690,7 +1689,7 @@ config ARM64_CNP
          at runtime, and does not affect PEs that do not implement
          this feature.
 
-endmenu
+endmenu # "ARMv8.2 architectural features"
 
 menu "ARMv8.3 architectural features"
 
@@ -1753,7 +1752,7 @@ config AS_HAS_PAC
 config AS_HAS_CFI_NEGATE_RA_STATE
        def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n)
 
-endmenu
+endmenu # "ARMv8.3 architectural features"
 
 menu "ARMv8.4 architectural features"
 
@@ -1794,7 +1793,7 @@ config ARM64_TLB_RANGE
          The feature introduces new assembly instructions, and they were
          support when binutils >= 2.30.
 
-endmenu
+endmenu # "ARMv8.4 architectural features"
 
 menu "ARMv8.5 architectural features"
 
@@ -1880,6 +1879,7 @@ config ARM64_MTE
        depends on AS_HAS_LSE_ATOMICS
        # Required for tag checking in the uaccess routines
        depends on ARM64_PAN
+       select ARCH_HAS_SUBPAGE_FAULTS
        select ARCH_USES_HIGH_VMA_FLAGS
        help
          Memory Tagging (part of the ARMv8.5 Extensions) provides
@@ -1901,7 +1901,7 @@ config ARM64_MTE
 
          Documentation/arm64/memory-tagging-extension.rst.
 
-endmenu
+endmenu # "ARMv8.5 architectural features"
 
 menu "ARMv8.7 architectural features"
 
@@ -1910,12 +1910,12 @@ config ARM64_EPAN
        default y
        depends on ARM64_PAN
        help
-        Enhanced Privileged Access Never (EPAN) allows Privileged
-        Access Never to be used with Execute-only mappings.
+         Enhanced Privileged Access Never (EPAN) allows Privileged
+         Access Never to be used with Execute-only mappings.
 
-        The feature is detected at runtime, and will remain disabled
-        if the cpu does not implement the feature.
-endmenu
+         The feature is detected at runtime, and will remain disabled
+         if the cpu does not implement the feature.
+endmenu # "ARMv8.7 architectural features"
 
 config ARM64_SVE
        bool "ARM Scalable Vector Extension support"
@@ -1948,6 +1948,17 @@ config ARM64_SVE
          booting the kernel.  If unsure and you are not observing these
          symptoms, you should assume that it is safe to say Y.
 
+config ARM64_SME
+       bool "ARM Scalable Matrix Extension support"
+       default y
+       depends on ARM64_SVE
+       help
+         The Scalable Matrix Extension (SME) is an extension to the AArch64
+         execution state which utilises a substantial subset of the SVE
+         instruction set, together with the addition of new architectural
+         register state capable of holding two dimensional matrix tiles to
+         enable various matrix operations.
+
 config ARM64_MODULE_PLTS
        bool "Use PLTs to allow module memory to spill over into vmalloc area"
        depends on MODULES
@@ -1991,7 +2002,7 @@ config ARM64_DEBUG_PRIORITY_MASKING
          the validity of ICC_PMR_EL1 when calling concerned functions.
 
          If unsure, say N
-endif
+endif # ARM64_PSEUDO_NMI
 
 config RELOCATABLE
        bool "Build a relocatable kernel image" if EXPERT
@@ -2050,7 +2061,19 @@ config STACKPROTECTOR_PER_TASK
        def_bool y
        depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG
 
-endmenu
+# The GPIO number here must be sorted by descending number. In case of
+# a multiplatform kernel, we just want the highest value required by the
+# selected platforms.
+config ARCH_NR_GPIO
+        int
+        default 2048 if ARCH_APPLE
+        default 0
+        help
+          Maximum number of GPIOs in the system.
+
+          If unsure, leave the default value.
+
+endmenu # "Kernel Features"
 
 menu "Boot options"
 
@@ -2114,7 +2137,7 @@ config EFI
        help
          This option provides support for runtime services provided
          by UEFI firmware (such as non-volatile variables, realtime
-          clock, and platform reset). A UEFI stub is also provided to
+         clock, and platform reset). A UEFI stub is also provided to
          allow the kernel to be booted as an EFI application. This
          is only useful on systems that have UEFI firmware.
 
@@ -2129,7 +2152,7 @@ config DMI
          However, even with this option, the resultant kernel should
          continue to boot on existing non-UEFI platforms.
 
-endmenu
+endmenu # "Boot options"
 
 config SYSVIPC_COMPAT
        def_bool y
@@ -2150,7 +2173,7 @@ config ARCH_HIBERNATION_HEADER
 config ARCH_SUSPEND_POSSIBLE
        def_bool y
 
-endmenu
+endmenu # "Power management options"
 
 menu "CPU Power Management"
 
@@ -2158,7 +2181,7 @@ source "drivers/cpuidle/Kconfig"
 
 source "drivers/cpufreq/Kconfig"
 
-endmenu
+endmenu # "CPU Power Management"
 
 source "drivers/acpi/Kconfig"
 
@@ -2166,4 +2189,4 @@ source "arch/arm64/kvm/Kconfig"
 
 if CRYPTO
 source "arch/arm64/crypto/Kconfig"
-endif
+endif # CRYPTO
index 30b123cde02c506f5fe8a4be98a7191293292fd5..4e6d635a1731ee157c57dd25f3ec7787ab47b9a0 100644 (file)
@@ -11,12 +11,11 @@ config ARCH_ACTIONS
 config ARCH_SUNXI
        bool "Allwinner sunxi 64-bit SoC Family"
        select ARCH_HAS_RESET_CONTROLLER
-       select GENERIC_IRQ_CHIP
-       select IRQ_DOMAIN_HIERARCHY
-       select IRQ_FASTEOI_HIERARCHY_HANDLERS
        select PINCTRL
        select RESET_CONTROLLER
        select SUN4I_TIMER
+       select SUN6I_R_INTC
+       select SUNXI_NMI_INTC
        help
          This enables support for Allwinner sunxi based SoCs like the A64.
 
@@ -253,6 +252,7 @@ config ARCH_INTEL_SOCFPGA
 
 config ARCH_SYNQUACER
        bool "Socionext SynQuacer SoC Family"
+       select IRQ_FASTEOI_HIERARCHY_HANDLERS
 
 config ARCH_TEGRA
        bool "NVIDIA Tegra SoC Family"
@@ -325,4 +325,4 @@ config ARCH_ZYNQMP
        help
          This enables support for Xilinx ZynqMP Family
 
-endmenu
+endmenu # "Platform selection"
index fb99cc2827c76133fdfd71b97177f518b9386916..7ab3627cc347d6a15c6fd9b8efad50a0f62468ca 100644 (file)
        status = "okay";
 };
 
+&rxmacro {
+       status = "okay";
+};
+
 &slpi {
        status = "okay";
        firmware-name = "qcom/sm8250/slpi.mbn";
 };
 
 &swr1 {
+       status = "okay";
+
        wcd_rx: wcd9380-rx@0,4 {
                compatible = "sdw20217010d00";
                reg = <0 4>;
 };
 
 &swr2 {
+       status = "okay";
+
        wcd_tx: wcd9380-tx@0,3 {
                compatible = "sdw20217010d00";
                reg = <0 3>;
        };
 };
 
+&txmacro {
+       status = "okay";
+};
+
 &uart12 {
        status = "okay";
 };
index af8f226364361bcd4e0f00b2f073a389ad3f64ea..1304b86af1a00772ac0478d607f42950b5e318e6 100644 (file)
                        pinctrl-0 = <&rx_swr_active>;
                        compatible = "qcom,sm8250-lpass-rx-macro";
                        reg = <0 0x3200000 0 0x1000>;
+                       status = "disabled";
 
                        clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                                <&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK  LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                swr1: soundwire-controller@3210000 {
                        reg = <0 0x3210000 0 0x2000>;
                        compatible = "qcom,soundwire-v1.5.1";
+                       status = "disabled";
                        interrupts = <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&rxmacro>;
                        clock-names = "iface";
                        pinctrl-0 = <&tx_swr_active>;
                        compatible = "qcom,sm8250-lpass-tx-macro";
                        reg = <0 0x3220000 0 0x1000>;
+                       status = "disabled";
 
                        clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                                 <&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK  LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                        compatible = "qcom,soundwire-v1.5.1";
                        interrupts-extended = <&intc GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>;
                        interrupt-names = "core";
+                       status = "disabled";
 
                        clocks = <&txmacro>;
                        clock-names = "iface";
index a01886b467edab74cbfcde84a633424deb8a3b95..067fe4a6b178c30e0895721f00fef271bf7ca924 100644 (file)
@@ -16,6 +16,7 @@
 
        aliases {
                ethernet0 = &gmac0;
+               ethernet1 = &gmac1;
                mmc0 = &sdmmc0;
                mmc1 = &sdhci;
        };
@@ -78,7 +79,6 @@
        assigned-clocks = <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0>;
        assigned-clock-parents = <&cru SCLK_GMAC0_RGMII_SPEED>, <&cru CLK_MAC0_2TOP>;
        clock_in_out = "input";
-       phy-handle = <&rgmii_phy0>;
        phy-mode = "rgmii";
        pinctrl-names = "default";
        pinctrl-0 = <&gmac0_miim
        snps,reset-active-low;
        /* Reset time is 20ms, 100ms for rtl8211f */
        snps,reset-delays-us = <0 20000 100000>;
+       tx_delay = <0x4f>;
+       rx_delay = <0x0f>;
+       status = "okay";
+
+       fixed-link {
+               speed = <1000>;
+               full-duplex;
+               pause;
+       };
+};
+
+&gmac1 {
+       assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
+       assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru CLK_MAC1_2TOP>;
+       clock_in_out = "output";
+       phy-handle = <&rgmii_phy1>;
+       phy-mode = "rgmii";
+       pinctrl-names = "default";
+       pinctrl-0 = <&gmac1m1_miim
+                    &gmac1m1_tx_bus2
+                    &gmac1m1_rx_bus2
+                    &gmac1m1_rgmii_clk
+                    &gmac1m1_rgmii_bus>;
+
+       snps,reset-gpio = <&gpio3 RK_PB0 GPIO_ACTIVE_LOW>;
+       snps,reset-active-low;
+       /* Reset time is 20ms, 100ms for rtl8211f */
+       snps,reset-delays-us = <0 20000 100000>;
+
        tx_delay = <0x3c>;
        rx_delay = <0x2f>;
+
        status = "okay";
 };
 
        status = "disabled";
 };
 
-&mdio0 {
-       rgmii_phy0: ethernet-phy@0 {
+&mdio1 {
+       rgmii_phy1: ethernet-phy@0 {
                compatible = "ethernet-phy-ieee802.3-c22";
                reg = <0x0>;
        };
        pmuio2-supply = <&vcc3v3_pmu>;
        vccio1-supply = <&vccio_acodec>;
        vccio3-supply = <&vccio_sd>;
-       vccio4-supply = <&vcc_1v8>;
+       vccio4-supply = <&vcc_3v3>;
        vccio5-supply = <&vcc_3v3>;
-       vccio6-supply = <&vcc_3v3>;
+       vccio6-supply = <&vcc_1v8>;
        vccio7-supply = <&vcc_3v3>;
        status = "okay";
 };
index 345fe98605ba6cf6d4437937f38cd17c2d1d8d2b..5c8ee5a541d2047c10c8440e3691d6b1b9f97c25 100644 (file)
@@ -7,3 +7,4 @@ generic-y += parport.h
 generic-y += user.h
 
 generated-y += cpucaps.h
+generated-y += sysreg-defs.h
index 8bd5afc7b692ea16df704f6d51cf7cca2f977d6c..48d4473e8eee2cbddd32b999315f0fed267378b9 100644 (file)
  * sets the GP register's most significant bits to 0 with an explicit cast.
  */
 
-static inline void gic_write_eoir(u32 irq)
-{
-       write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
-       isb();
-}
-
 static __always_inline void gic_write_dir(u32 irq)
 {
        write_sysreg_s(irq, SYS_ICC_DIR_EL1);
index d1bb5e71df256ab679b0e275e2ad8e5aa56ae339..3a6b6d38c5b812edcafd3e68922ef153ae4a57ea 100644 (file)
@@ -142,7 +142,7 @@ static inline bool __init __early_cpu_has_rndr(void)
 {
        /* Open code as we run prior to the first call to cpufeature. */
        unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
-       return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf;
+       return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
 }
 
 static inline bool __init __must_check
index 03f52f84a4f3f8a23d8e825ee1b64b321fdf59f7..c762038ba40093c9690d1eae8e3d6ab38d71d5d7 100644 (file)
@@ -14,7 +14,7 @@
        14472:  .string file;                                   \
                .popsection;                                    \
                                                                \
-               .long 14472b - 14470b;                          \
+               .long 14472b - .;                               \
                .short line;
 #else
 #define _BUGVERBOSE_LOCATION(file, line)
@@ -25,7 +25,7 @@
 #define __BUG_ENTRY(flags)                             \
                .pushsection __bug_table,"aw";          \
                .align 2;                               \
-       14470:  .long 14471f - 14470b;                  \
+       14470:  .long 14471f - .;                       \
 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)               \
                .short flags;                           \
                .popsection;                            \
index dc3ea4080e2eb62962a1d1d9eca8ab3e67ebd7e2..6fb2e6bcc392fe8b0f5e7ebee1a1893fdc4c548d 100644 (file)
 #define __builtin_return_address(val)                                  \
        (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
 
-#ifdef CONFIG_CFI_CLANG
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function address
- * references with the address of the function's CFI jump table
- * entry. The function_nocfi macro always returns the address of the
- * actual function instead.
- */
-#define function_nocfi(x) ({                                           \
-       void *addr;                                                     \
-       asm("adrp %0, " __stringify(x) "\n\t"                           \
-           "add  %0, %0, :lo12:" __stringify(x)                        \
-           : "=r" (addr));                                             \
-       addr;                                                           \
-})
-#endif
-
 #endif /* __ASM_COMPILER_H */
index a58e366f0b074c93c28e8464421684c9b06b5ab5..115cdec1ae878202a1ed668555c38b559746d661 100644 (file)
@@ -58,11 +58,15 @@ struct cpuinfo_arm64 {
        u64             reg_id_aa64pfr0;
        u64             reg_id_aa64pfr1;
        u64             reg_id_aa64zfr0;
+       u64             reg_id_aa64smfr0;
 
        struct cpuinfo_32bit    aarch32;
 
        /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
        u64             reg_zcr;
+
+       /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
+       u64             reg_smcr;
 };
 
 DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
index c62e7e5e2f0c636fa9255bf47a00df765fe15142..14a8f3d93addf76d67bd4d13814b5202b7437fae 100644 (file)
@@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
        return val > 0;
 }
 
+static inline bool id_aa64pfr1_sme(u64 pfr1)
+{
+       u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+
+       return val > 0;
+}
+
 static inline bool id_aa64pfr1_mte(u64 pfr1)
 {
        u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
@@ -759,6 +766,23 @@ static __always_inline bool system_supports_sve(void)
                cpus_have_const_cap(ARM64_SVE);
 }
 
+static __always_inline bool system_supports_sme(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_SME) &&
+               cpus_have_const_cap(ARM64_SME);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_SME) &&
+               cpus_have_const_cap(ARM64_SME_FA64);
+}
+
+static __always_inline bool system_supports_tpidr2(void)
+{
+       return system_supports_sme();
+}
+
 static __always_inline bool system_supports_cnp(void)
 {
        return IS_ENABLED(CONFIG_ARM64_CNP) &&
index ff8f4511df71f73d671bbb57449e57ed94f3e7d9..92331c07c2d12fb64bb8acd80e7062636bc7564d 100644 (file)
@@ -36,7 +36,7 @@
 #define MIDR_VARIANT(midr)     \
        (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
 #define MIDR_IMPLEMENTOR_SHIFT 24
-#define MIDR_IMPLEMENTOR_MASK  (0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR_MASK  (0xffU << MIDR_IMPLEMENTOR_SHIFT)
 #define MIDR_IMPLEMENTOR(midr) \
        (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
 
index 00c291067e57d0e06a7431a3c3af8e905aa49c00..7b7e05c02691c7a8796aff5df79b4524da834614 100644 (file)
@@ -64,7 +64,7 @@ struct task_struct;
 
 struct step_hook {
        struct list_head node;
-       int (*fn)(struct pt_regs *regs, unsigned int esr);
+       int (*fn)(struct pt_regs *regs, unsigned long esr);
 };
 
 void register_user_step_hook(struct step_hook *hook);
@@ -75,7 +75,7 @@ void unregister_kernel_step_hook(struct step_hook *hook);
 
 struct break_hook {
        struct list_head node;
-       int (*fn)(struct pt_regs *regs, unsigned int esr);
+       int (*fn)(struct pt_regs *regs, unsigned long esr);
        u16 imm;
        u16 mask; /* These bits are ignored when comparing with imm */
 };
index c31be7eda9df413aaef23c8a59a883efccf0047a..34ceff08cac46b6dbdfaa41662b47a9f91c481f4 100644 (file)
 .Lskip_sve_\@:
 .endm
 
+/* SME register access and priority mapping */
+.macro __init_el2_nvhe_sme
+       mrs     x1, id_aa64pfr1_el1
+       ubfx    x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+       cbz     x1, .Lskip_sme_\@
+
+       bic     x0, x0, #CPTR_EL2_TSM           // Also disable SME traps
+       msr     cptr_el2, x0                    // Disable copro. traps to EL2
+       isb
+
+       mrs     x1, sctlr_el2
+       orr     x1, x1, #SCTLR_ELx_ENTP2        // Disable TPIDR2 traps
+       msr     sctlr_el2, x1
+       isb
+
+       mov     x1, #0                          // SMCR controls
+
+       mrs_s   x2, SYS_ID_AA64SMFR0_EL1
+       ubfx    x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
+       cbz     x2, .Lskip_sme_fa64_\@
+
+       orr     x1, x1, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+       orr     x1, x1, #SMCR_ELx_LEN_MASK      // Enable full SME vector
+       msr_s   SYS_SMCR_EL2, x1                // length for EL1.
+
+       mrs_s   x1, SYS_SMIDR_EL1               // Priority mapping supported?
+       ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+       cbz     x1, .Lskip_sme_\@
+
+       msr_s   SYS_SMPRIMAP_EL2, xzr           // Make all priorities equal
+
+       mrs     x1, id_aa64mmfr1_el1            // HCRX_EL2 present?
+       ubfx    x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+       cbz     x1, .Lskip_sme_\@
+
+       mrs_s   x1, SYS_HCRX_EL2
+       orr     x1, x1, #HCRX_EL2_SMPME_MASK    // Enable priority mapping
+       msr_s   SYS_HCRX_EL2, x1
+
+.Lskip_sme_\@:
+.endm
+
 /* Disable any fine grained traps */
 .macro __init_el2_fgt
        mrs     x1, id_aa64mmfr0_el1
        mrs     x1, id_aa64dfr0_el1
        ubfx    x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
        cmp     x1, #3
-       b.lt    .Lset_fgt_\@
+       b.lt    .Lset_debug_fgt_\@
        /* Disable PMSNEVFR_EL1 read and write traps */
        orr     x0, x0, #(1 << 62)
 
-.Lset_fgt_\@:
+.Lset_debug_fgt_\@:
        msr_s   SYS_HDFGRTR_EL2, x0
        msr_s   SYS_HDFGWTR_EL2, x0
-       msr_s   SYS_HFGRTR_EL2, xzr
-       msr_s   SYS_HFGWTR_EL2, xzr
+
+       mov     x0, xzr
+       mrs     x1, id_aa64pfr1_el1
+       ubfx    x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+       cbz     x1, .Lset_fgt_\@
+
+       /* Disable nVHE traps of TPIDR2 and SMPRI */
+       orr     x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
+       orr     x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+
+.Lset_fgt_\@:
+       msr_s   SYS_HFGRTR_EL2, x0
+       msr_s   SYS_HFGWTR_EL2, x0
        msr_s   SYS_HFGITR_EL2, xzr
 
        mrs     x1, id_aa64pfr0_el1             // AMU traps UNDEF without AMU
        __init_el2_nvhe_idregs
        __init_el2_nvhe_cptr
        __init_el2_nvhe_sve
+       __init_el2_nvhe_sme
        __init_el2_fgt
        __init_el2_nvhe_prepare_eret
 .endm
index d52a0b269ee80e7604f6b1b72c36fea52c0152b8..8f236de7359c76c6d6f17e01af633bf4fb6bef3b 100644 (file)
@@ -37,7 +37,8 @@
 #define ESR_ELx_EC_ERET                (0x1a)  /* EL2 only */
 /* Unallocated EC: 0x1B */
 #define ESR_ELx_EC_FPAC                (0x1C)  /* EL1 and above */
-/* Unallocated EC: 0x1D - 0x1E */
+#define ESR_ELx_EC_SME         (0x1D)
+/* Unallocated EC: 0x1E */
 #define ESR_ELx_EC_IMP_DEF     (0x1f)  /* EL3 only */
 #define ESR_ELx_EC_IABT_LOW    (0x20)
 #define ESR_ELx_EC_IABT_CUR    (0x21)
@@ -75,6 +76,7 @@
 #define ESR_ELx_IL_SHIFT       (25)
 #define ESR_ELx_IL             (UL(1) << ESR_ELx_IL_SHIFT)
 #define ESR_ELx_ISS_MASK       (ESR_ELx_IL - 1)
+#define ESR_ELx_ISS(esr)       ((esr) & ESR_ELx_ISS_MASK)
 
 /* ISS field definitions shared by different classes */
 #define ESR_ELx_WNR_SHIFT      (6)
 #define ESR_ELx_WFx_ISS_TI     (UL(1) << 0)
 #define ESR_ELx_WFx_ISS_WFI    (UL(0) << 0)
 #define ESR_ELx_WFx_ISS_WFE    (UL(1) << 0)
-#define ESR_ELx_xVC_IMM_MASK   ((1UL << 16) - 1)
+#define ESR_ELx_xVC_IMM_MASK   ((UL(1) << 16) - 1)
 
 #define DISR_EL1_IDS           (UL(1) << 24)
 /*
 #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
                                         ESR_ELx_CP15_32_ISS_DIR_READ)
 
+/*
+ * ISS values for SME traps
+ */
+
+#define ESR_ELx_SME_ISS_SME_DISABLED   0
+#define ESR_ELx_SME_ISS_ILL            1
+#define ESR_ELx_SME_ISS_SM_DISABLED    2
+#define ESR_ELx_SME_ISS_ZA_DISABLED    3
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
-static inline bool esr_is_data_abort(u32 esr)
+static inline bool esr_is_data_abort(unsigned long esr)
 {
-       const u32 ec = ESR_ELx_EC(esr);
+       const unsigned long ec = ESR_ELx_EC(esr);
 
        return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
 }
 
-const char *esr_get_class_string(u32 esr);
+const char *esr_get_class_string(unsigned long esr);
 #endif /* __ASSEMBLY */
 
 #endif /* __ASM_ESR_H */
index 339477dca55138dce520ed9f830f1d20bc7bcb13..d94aecff969029c90154c04f4e3333a952f7a569 100644 (file)
@@ -19,9 +19,9 @@
 #define __exception_irq_entry  __kprobes
 #endif
 
-static inline u32 disr_to_esr(u64 disr)
+static inline unsigned long disr_to_esr(u64 disr)
 {
-       unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+       unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
 
        if ((disr & DISR_EL1_IDS) == 0)
                esr |= (disr & DISR_EL1_ESR_MASK);
@@ -57,23 +57,24 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
                                  void (*func)(struct pt_regs *));
 asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
 
-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
                        struct pt_regs *regs);
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
-void do_sve_acc(unsigned int esr, struct pt_regs *regs);
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
-void do_sysinstr(unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
-void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
+void do_sve_acc(unsigned long esr, struct pt_regs *regs);
+void do_sme_acc(unsigned long esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
+void do_sysinstr(unsigned long esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
+void do_cp15instr(unsigned long esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
-void do_serror(struct pt_regs *regs, unsigned int esr);
+void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
+void do_serror(struct pt_regs *regs, unsigned long esr);
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
 
-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
+void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
 #endif /* __ASM_EXCEPTION_H */
index cb24385e3632ad4a3d062d01a19d4849bbe6e6c0..9bb1873f529515a9ec9e1ee487a4e9e8a02e1f3b 100644 (file)
 #define VFP_STATE_SIZE         ((32 * 8) + 4)
 #endif
 
+/*
+ * When we defined the maximum SVE vector length we defined the ABI so
+ * that the maximum vector length included all the reserved for future
+ * expansion bits in ZCR rather than those just currently defined by
+ * the architecture. While SME follows a similar pattern the fact that
+ * it includes a square matrix means that any allocations that attempt
+ * to cover the maximum potential vector length (such as happen with
+ * the regset used for ptrace) end up being extremely large. Define
+ * the much lower actual limit for use in such situations.
+ */
+#define SME_VQ_MAX     16
+
 struct task_struct;
 
 extern void fpsimd_save_state(struct user_fpsimd_state *state);
@@ -46,11 +58,23 @@ extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
 
 extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
-                                    void *sve_state, unsigned int sve_vl);
+                                    void *sve_state, unsigned int sve_vl,
+                                    void *za_state, unsigned int sme_vl,
+                                    u64 *svcr);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 extern void fpsimd_save_and_flush_cpu_state(void);
 
+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+       return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+       return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
+}
+
 /* Maximum VL that SVE/SME VL-agnostic software can transparently support */
 #define VL_ARCH_MAX 0x100
 
@@ -62,7 +86,14 @@ static inline size_t sve_ffr_offset(int vl)
 
 static inline void *sve_pffr(struct thread_struct *thread)
 {
-       return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
+       unsigned int vl;
+
+       if (system_supports_sme() && thread_sm_enabled(thread))
+               vl = thread_get_sme_vl(thread);
+       else
+               vl = thread_get_sve_vl(thread);
+
+       return (char *)thread->sve_state + sve_ffr_offset(vl);
 }
 
 extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
@@ -71,11 +102,17 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
 extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
 extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
+extern void za_save_state(void *state);
+extern void za_load_state(void const *state);
 
 struct arm64_cpu_capabilities;
 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 
 extern u64 read_zcr_features(void);
+extern u64 read_smcr_features(void);
 
 /*
  * Helpers to translate bit indices in sve_vq_map to VQ values (and
@@ -119,6 +156,7 @@ struct vl_info {
 extern void sve_alloc(struct task_struct *task);
 extern void fpsimd_release_task(struct task_struct *task);
 extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void fpsimd_force_sync_to_sve(struct task_struct *task);
 extern void sve_sync_to_fpsimd(struct task_struct *task);
 extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
 
@@ -170,6 +208,12 @@ static inline void write_vl(enum vec_type type, u64 val)
                tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
                write_sysreg_s(tmp | val, SYS_ZCR_EL1);
                break;
+#endif
+#ifdef CONFIG_ARM64_SME
+       case ARM64_VEC_SME:
+               tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
+               write_sysreg_s(tmp | val, SYS_SMCR_EL1);
+               break;
 #endif
        default:
                WARN_ON_ONCE(1);
@@ -208,6 +252,8 @@ static inline bool sve_vq_available(unsigned int vq)
        return vq_available(ARM64_VEC_SVE, vq);
 }
 
+size_t sve_state_size(struct task_struct const *task);
+
 #else /* ! CONFIG_ARM64_SVE */
 
 static inline void sve_alloc(struct task_struct *task) { }
@@ -247,8 +293,93 @@ static inline void vec_update_vq_map(enum vec_type t) { }
 static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
 static inline void sve_setup(void) { }
 
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+       return 0;
+}
+
 #endif /* ! CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_SME
+
+static inline void sme_user_disable(void)
+{
+       sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
+}
+
+static inline void sme_user_enable(void)
+{
+       sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
+}
+
+static inline void sme_smstart_sm(void)
+{
+       asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop_sm(void)
+{
+       asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop(void)
+{
+       asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
+}
+
+extern void __init sme_setup(void);
+
+static inline int sme_max_vl(void)
+{
+       return vec_max_vl(ARM64_VEC_SME);
+}
+
+static inline int sme_max_virtualisable_vl(void)
+{
+       return vec_max_virtualisable_vl(ARM64_VEC_SME);
+}
+
+extern void sme_alloc(struct task_struct *task);
+extern unsigned int sme_get_vl(void);
+extern int sme_set_current_vl(unsigned long arg);
+extern int sme_get_current_vl(void);
+
+/*
+ * Return how many bytes of memory are required to store the full SME
+ * specific state (currently just ZA) for task, given task's currently
+ * configured vector length.
+ */
+static inline size_t za_state_size(struct task_struct const *task)
+{
+       unsigned int vl = task_get_sme_vl(task);
+
+       return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+#else
+
+static inline void sme_user_disable(void) { BUILD_BUG(); }
+static inline void sme_user_enable(void) { BUILD_BUG(); }
+
+static inline void sme_smstart_sm(void) { }
+static inline void sme_smstop_sm(void) { }
+static inline void sme_smstop(void) { }
+
+static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_setup(void) { }
+static inline unsigned int sme_get_vl(void) { return 0; }
+static inline int sme_max_vl(void) { return 0; }
+static inline int sme_max_virtualisable_vl(void) { return 0; }
+static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
+static inline int sme_get_current_vl(void) { return -EINVAL; }
+
+static inline size_t za_state_size(struct task_struct const *task)
+{
+       return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SME */
+
 /* For use by EFI runtime services calls only */
 extern void __efi_fpsimd_begin(void);
 extern void __efi_fpsimd_end(void);
index 2509d7dde55a78a08a41433cdf51730f24313573..5e0910cf483216774bed55087db0462bacdcaa5a 100644 (file)
        .endif
 .endm
 
+.macro _sme_check_wv v
+       .if (\v) < 12 || (\v) > 15
+               .error "Bad vector select register \v."
+       .endif
+.endm
+
 /* SVE instruction encodings for non-SVE-capable assemblers */
 /* (pre binutils 2.28, all kernel capable clang versions support SVE) */
 
                | (\np)
 .endm
 
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+       _check_general_reg \nx
+       _check_num (\imm), -0x20, 0x1f
+       .inst   0x04bf5800                      \
+               | (\nx)                         \
+               | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ *     STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+       _sme_check_wv \nw
+       _check_general_reg \nxbase
+       _check_num (\offset), -0x100, 0xff
+       .inst   0xe1200000                      \
+               | (((\nw) & 3) << 13)           \
+               | ((\nxbase) << 5)              \
+               | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ *     LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+       _sme_check_wv \nw
+       _check_general_reg \nxbase
+       _check_num (\offset), -0x100, 0xff
+       .inst   0xe1000000                      \
+               | (((\nw) & 3) << 13)           \
+               | ((\nxbase) << 5)              \
+               | ((\offset) & 7)
+.endm
+
+/*
+ * Zero the entire ZA array
+ *     ZERO ZA
+ */
+.macro zero_za
+       .inst 0xc00800ff
+.endm
+
 .macro __for from:req, to:req
        .if (\from) == (\to)
                _for__body %\from
 921:
 .endm
 
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+               mrs_s           \xtmp, SYS_SMCR_EL1
+               bic             \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+               orr             \xtmp2, \xtmp2, \xvqminus1
+               cmp             \xtmp2, \xtmp
+               b.eq            921f
+               msr_s           SYS_SMCR_EL1, \xtmp2    //self-synchronising
+921:
+.endm
+
 /* Preserve the first 128-bits of Znz and zero the rest. */
 .macro _sve_flush_z nz
        _sve_check_zreg \nz
                ldr             w\nxtmp, [\xpfpsr, #4]
                msr             fpcr, x\nxtmp
 .endm
+
+.macro sme_save_za nxbase, xvl, nw
+       mov     w\nw, #0
+
+423:
+       _sme_str_zav \nw, \nxbase
+       add     x\nxbase, x\nxbase, \xvl
+       add     x\nw, x\nw, #1
+       cmp     \xvl, x\nw
+       bne     423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+       mov     w\nw, #0
+
+423:
+       _sme_ldr_zav \nw, \nxbase
+       add     x\nxbase, x\nxbase, \xvl
+       add     x\nw, x\nw, #1
+       cmp     \xvl, x\nw
+       bne     423b
+.endm
index 1494cfa8639bec0ca1bcb0f2ee0fde30d2910858..dbc45a4157fa7b955df2cdfb6c22446d8ade36ae 100644 (file)
@@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 struct dyn_ftrace;
+struct ftrace_ops;
+struct ftrace_regs;
+
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
 #define ftrace_init_nop ftrace_init_nop
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+                      struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
 #endif
 
 #define ftrace_return_address(n) return_address(n)
index 1242f71937f80ead96fe32e0cd30c6c30f923c42..d656822b13f17b15a6e5675e6c33851dfa04a819 100644 (file)
@@ -44,6 +44,8 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 #define __HAVE_ARCH_HUGE_PTE_CLEAR
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(pte_t *ptep);
 extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
                                 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at
index 8db5ec0089dbd3d476da432faa316102860b1bde..9f0ce004fdbc04766851d94cca09471449420b42 100644 (file)
 #define KERNEL_HWCAP_AFP               __khwcap2_feature(AFP)
 #define KERNEL_HWCAP_RPRES             __khwcap2_feature(RPRES)
 #define KERNEL_HWCAP_MTE3              __khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME               __khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64                __khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64                __khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32         __khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32                __khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32                __khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32                __khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64          __khwcap2_feature(SME_FA64)
 
 /*
  * This yields a mask that user programs can use to figure out what
index 7fd836bea7eb42853883fae3ea5015867e3dd0b9..3995652daf81a0af14bba4da469fcfa4a633887d 100644 (file)
@@ -192,4 +192,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
 extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
 
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                       unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+
 #endif /* __ASM_IO_H */
index 1767ded8388802bfc94a444e1bb28129dd993bc3..13ae232ec4a1dba32681d3ea4a9da484e5f20261 100644 (file)
 #define CPTR_EL2_TCPAC (1U << 31)
 #define CPTR_EL2_TAM   (1 << 30)
 #define CPTR_EL2_TTA   (1 << 20)
+#define CPTR_EL2_TSM   (1 << 12)
 #define CPTR_EL2_TFP   (1 << CPTR_EL2_TFP_SHIFT)
 #define CPTR_EL2_TZ    (1 << 8)
 #define CPTR_NVHE_EL2_RES1     0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
index f71358271b71c1387db4874afae7d7504a90c879..08233172e7a98c2c89c8063120061fc0f07c9c1e 100644 (file)
@@ -236,14 +236,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
        return mode != PSR_MODE_EL0t;
 }
 
-static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.fault.esr_el2;
 }
 
 static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
 {
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
 
        if (esr & ESR_ELx_CV)
                return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
@@ -374,7 +374,7 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
 
 static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 {
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
        return ESR_ELx_SYS64_ISS_RT(esr);
 }
 
index 94a27a7520f4740e64e202599c11fa75441b4e44..d5888dedf02a10dd4267fe4e1b15380f31a40b12 100644 (file)
@@ -153,7 +153,7 @@ struct kvm_arch {
 };
 
 struct kvm_vcpu_fault_info {
-       u32 esr_el2;            /* Hyp Syndrom Register */
+       u64 esr_el2;            /* Hyp Syndrom Register */
        u64 far_el2;            /* Hyp Fault Address Register */
        u64 hpfar_el2;          /* Hyp IPA Fault Address Register */
        u64 disr_el1;           /* Deferred [SError] Status Register */
@@ -295,8 +295,11 @@ struct vcpu_reset_state {
 
 struct kvm_vcpu_arch {
        struct kvm_cpu_context ctxt;
+
+       /* Guest floating point state */
        void *sve_state;
        unsigned int sve_max_vl;
+       u64 svcr;
 
        /* Stage 2 paging state used by the hardware on next switch */
        struct kvm_s2_mmu *hw_mmu;
@@ -451,6 +454,7 @@ struct kvm_vcpu_arch {
 #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE        (1 << 13) /* Save TRBE context if active  */
 #define KVM_ARM64_FP_FOREIGN_FPSTATE   (1 << 14)
 #define KVM_ARM64_ON_UNSUPPORTED_CPU   (1 << 15) /* Physical CPU not in supported_cpus */
+#define KVM_ARM64_HOST_SME_ENABLED     (1 << 16) /* SME enabled for EL0 */
 
 #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
                                 KVM_GUESTDBG_USE_SW_BP | \
index 8ac6ee77437c78a49b342e8a551ea866b05e935e..87e10d9a635b55016596a412b3b79b0951d10085 100644 (file)
@@ -14,7 +14,7 @@
  * Was this synchronous external abort a RAS notification?
  * Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
  */
-static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr)
+static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
 {
        /* apei_claim_sea(NULL) expects to mask interrupts itself */
        lockdep_assert_irqs_enabled();
index adcb937342f14d2d431eab7bff46e90a19f9a026..aa523591a44e5635ee1d2ed2fd4f22e418513fa6 100644 (file)
@@ -47,6 +47,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
                         unsigned long addr, unsigned long data);
+size_t mte_probe_user_range(const char __user *uaddr, size_t size);
 
 #else /* CONFIG_ARM64_MTE */
 
index 66671ff051835bbbacdfbcf69372f32c9fa98af8..dd3d12bce07b2f5c336ae4286e448be4481228b0 100644 (file)
@@ -49,7 +49,7 @@
 #define PMD_SHIFT              ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
 #define PMD_SIZE               (_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK               (~(PMD_SIZE-1))
-#define PTRS_PER_PMD           PTRS_PER_PTE
+#define PTRS_PER_PMD           (1 << (PAGE_SHIFT - 3))
 #endif
 
 /*
@@ -59,7 +59,7 @@
 #define PUD_SHIFT              ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
 #define PUD_SIZE               (_AC(1, UL) << PUD_SHIFT)
 #define PUD_MASK               (~(PUD_SIZE-1))
-#define PTRS_PER_PUD           PTRS_PER_PTE
+#define PTRS_PER_PUD           (1 << (PAGE_SHIFT - 3))
 #endif
 
 /*
index dff2b483ea50927249b3392152c0afdf0c190f3d..45c358538f1307cab9bc4659ff62d796596816a6 100644 (file)
@@ -1001,7 +1001,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
  */
 static inline bool arch_faults_on_old_pte(void)
 {
-       WARN_ON(preemptible());
+       /* The register read below requires a stable CPU to make any sense */
+       cant_migrate();
 
        return !cpu_has_hw_af();
 }
index 73e38d9a540ce94451849f22161fc673c91638d5..bf8aafee1eac10fc32f3dfc69d97270979a3fb0b 100644 (file)
@@ -118,6 +118,7 @@ struct debug_info {
 
 enum vec_type {
        ARM64_VEC_SVE = 0,
+       ARM64_VEC_SME,
        ARM64_VEC_MAX,
 };
 
@@ -153,6 +154,7 @@ struct thread_struct {
 
        unsigned int            fpsimd_cpu;
        void                    *sve_state;     /* SVE registers, if any */
+       void                    *za_state;      /* ZA register, if any */
        unsigned int            vl[ARM64_VEC_MAX];      /* vector length */
        unsigned int            vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
        unsigned long           fault_address;  /* fault info */
@@ -168,6 +170,8 @@ struct thread_struct {
        u64                     mte_ctrl;
 #endif
        u64                     sctlr_user;
+       u64                     svcr;
+       u64                     tpidr2_el0;
 };
 
 static inline unsigned int thread_get_vl(struct thread_struct *thread,
@@ -181,6 +185,19 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
        return thread_get_vl(thread, ARM64_VEC_SVE);
 }
 
+static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
+{
+       return thread_get_vl(thread, ARM64_VEC_SME);
+}
+
+static inline unsigned int thread_get_cur_vl(struct thread_struct *thread)
+{
+       if (system_supports_sme() && (thread->svcr & SVCR_SM_MASK))
+               return thread_get_sme_vl(thread);
+       else
+               return thread_get_sve_vl(thread);
+}
+
 unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
 void task_set_vl(struct task_struct *task, enum vec_type type,
                 unsigned long vl);
@@ -194,6 +211,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task)
        return task_get_vl(task, ARM64_VEC_SVE);
 }
 
+static inline unsigned int task_get_sme_vl(const struct task_struct *task)
+{
+       return task_get_vl(task, ARM64_VEC_SME);
+}
+
 static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
 {
        task_set_vl(task, ARM64_VEC_SVE, vl);
@@ -354,9 +376,11 @@ extern void __init minsigstksz_setup(void);
  */
 #include <asm/fpsimd.h>
 
-/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
+/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */
 #define SVE_SET_VL(arg)        sve_set_current_vl(arg)
 #define SVE_GET_VL()   sve_get_current_vl()
+#define SME_SET_VL(arg)        sme_set_current_vl(arg)
+#define SME_GET_VL()   sme_get_current_vl()
 
 /* PR_PAC_RESET_KEYS prctl */
 #define PAC_RESET_KEYS(tsk, arg)       ptrauth_prctl_reset_keys(tsk, arg)
@@ -381,12 +405,10 @@ long get_tagged_addr_ctrl(struct task_struct *task);
  * of header definitions for the use of task_stack_page.
  */
 
-#define current_top_of_stack()                                                         \
-({                                                                                     \
-       struct stack_info _info;                                                        \
-       BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info));        \
-       _info.high;                                                                     \
-})
+/*
+ * The top of the current task's task stack
+ */
+#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE)
 #define on_thread_stack()      (on_task_stack(current, current_stack_pointer, 1, NULL))
 
 #endif /* __ASSEMBLY__ */
index e77cdef9ca29bba1ca37954c6880d2a48354d9cf..aec9315bf15639a7d33c487adbdeb3e3768f8bf2 100644 (file)
@@ -31,38 +31,6 @@ struct stack_info {
        enum stack_type type;
 };
 
-/*
- * A snapshot of a frame record or fp/lr register values, along with some
- * accounting information necessary for robust unwinding.
- *
- * @fp:          The fp value in the frame record (or the real fp)
- * @pc:          The lr value in the frame record (or the real lr)
- *
- * @stacks_done: Stacks which have been entirely unwound, for which it is no
- *               longer valid to unwind to.
- *
- * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
- *               of 0. This is used to ensure that within a stack, each
- *               subsequent frame record is at an increasing address.
- * @prev_type:   The type of stack this frame record was on, or a synthetic
- *               value of STACK_TYPE_UNKNOWN. This is used to detect a
- *               transition from one stack to another.
- *
- * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
- *               associated with the most recently encountered replacement lr
- *               value.
- */
-struct stackframe {
-       unsigned long fp;
-       unsigned long pc;
-       DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
-       unsigned long prev_fp;
-       enum stack_type prev_type;
-#ifdef CONFIG_KRETPROBES
-       struct llist_node *kr_cur;
-#endif
-};
-
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
                           const char *loglvl);
 
index fbf5f8bb90555e37a8b26e97ff5770ec71f50c73..55f998c3dc284fd3764e32928ccec98d541d870f 100644 (file)
 #define SYS_DC_CSW                     sys_insn(1, 0, 7, 10, 2)
 #define SYS_DC_CISW                    sys_insn(1, 0, 7, 14, 2)
 
+/*
+ * Automatically generated definitions for system registers, the
+ * manual encodings below are in the process of being converted to
+ * come from here. The header relies on the definition of sys_reg()
+ * earlier in this file.
+ */
+#include "asm/sysreg-defs.h"
+
 /*
  * System registers, organised loosely by encoding but grouped together
  * where the architected name contains an index. e.g. ID_MMFR<n>_EL1.
  */
+#define SYS_SVCR_SMSTOP_SM_EL0         sys_reg(0, 3, 4, 2, 3)
+#define SYS_SVCR_SMSTART_SM_EL0                sys_reg(0, 3, 4, 3, 3)
+#define SYS_SVCR_SMSTOP_SMZA_EL0       sys_reg(0, 3, 4, 6, 3)
+
 #define SYS_OSDTRRX_EL1                        sys_reg(2, 0, 0, 0, 2)
 #define SYS_MDCCINT_EL1                        sys_reg(2, 0, 0, 2, 0)
 #define SYS_MDSCR_EL1                  sys_reg(2, 0, 0, 2, 2)
 #define SYS_ID_AA64PFR0_EL1            sys_reg(3, 0, 0, 4, 0)
 #define SYS_ID_AA64PFR1_EL1            sys_reg(3, 0, 0, 4, 1)
 #define SYS_ID_AA64ZFR0_EL1            sys_reg(3, 0, 0, 4, 4)
+#define SYS_ID_AA64SMFR0_EL1           sys_reg(3, 0, 0, 4, 5)
 
 #define SYS_ID_AA64DFR0_EL1            sys_reg(3, 0, 0, 5, 0)
 #define SYS_ID_AA64DFR1_EL1            sys_reg(3, 0, 0, 5, 1)
 #define SYS_ID_AA64AFR0_EL1            sys_reg(3, 0, 0, 5, 4)
 #define SYS_ID_AA64AFR1_EL1            sys_reg(3, 0, 0, 5, 5)
 
-#define SYS_ID_AA64ISAR0_EL1           sys_reg(3, 0, 0, 6, 0)
 #define SYS_ID_AA64ISAR1_EL1           sys_reg(3, 0, 0, 6, 1)
 #define SYS_ID_AA64ISAR2_EL1           sys_reg(3, 0, 0, 6, 2)
 
 #define SYS_ID_AA64MMFR1_EL1           sys_reg(3, 0, 0, 7, 1)
 #define SYS_ID_AA64MMFR2_EL1           sys_reg(3, 0, 0, 7, 2)
 
-#define SYS_SCTLR_EL1                  sys_reg(3, 0, 1, 0, 0)
 #define SYS_ACTLR_EL1                  sys_reg(3, 0, 1, 0, 1)
-#define SYS_CPACR_EL1                  sys_reg(3, 0, 1, 0, 2)
 #define SYS_RGSR_EL1                   sys_reg(3, 0, 1, 0, 5)
 #define SYS_GCR_EL1                    sys_reg(3, 0, 1, 0, 6)
 
-#define SYS_ZCR_EL1                    sys_reg(3, 0, 1, 2, 0)
 #define SYS_TRFCR_EL1                  sys_reg(3, 0, 1, 2, 1)
 
-#define SYS_TTBR0_EL1                  sys_reg(3, 0, 2, 0, 0)
-#define SYS_TTBR1_EL1                  sys_reg(3, 0, 2, 0, 1)
 #define SYS_TCR_EL1                    sys_reg(3, 0, 2, 0, 2)
 
 #define SYS_APIAKEYLO_EL1              sys_reg(3, 0, 2, 1, 0)
 #define SYS_TFSR_EL1                   sys_reg(3, 0, 5, 6, 0)
 #define SYS_TFSRE0_EL1                 sys_reg(3, 0, 5, 6, 1)
 
-#define SYS_FAR_EL1                    sys_reg(3, 0, 6, 0, 0)
 #define SYS_PAR_EL1                    sys_reg(3, 0, 7, 4, 0)
 
 #define SYS_PAR_EL1_F                  BIT(0)
 #define SYS_ICC_IGRPEN0_EL1            sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1            sys_reg(3, 0, 12, 12, 7)
 
-#define SYS_CONTEXTIDR_EL1             sys_reg(3, 0, 13, 0, 1)
 #define SYS_TPIDR_EL1                  sys_reg(3, 0, 13, 0, 4)
 
 #define SYS_SCXTNUM_EL1                        sys_reg(3, 0, 13, 0, 7)
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_CCSIDR_EL1                 sys_reg(3, 1, 0, 0, 0)
-#define SYS_CLIDR_EL1                  sys_reg(3, 1, 0, 0, 1)
 #define SYS_GMID_EL1                   sys_reg(3, 1, 0, 0, 4)
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 
-#define SYS_CSSELR_EL1                 sys_reg(3, 2, 0, 0, 0)
+#define SMIDR_EL1_IMPLEMENTER_SHIFT    24
+#define SMIDR_EL1_SMPS_SHIFT   15
+#define SMIDR_EL1_AFFINITY_SHIFT       0
 
 #define SYS_CTR_EL0                    sys_reg(3, 3, 0, 0, 1)
 #define SYS_DCZID_EL0                  sys_reg(3, 3, 0, 0, 7)
 
 #define SYS_TPIDR_EL0                  sys_reg(3, 3, 13, 0, 2)
 #define SYS_TPIDRRO_EL0                        sys_reg(3, 3, 13, 0, 3)
+#define SYS_TPIDR2_EL0                 sys_reg(3, 3, 13, 0, 5)
 
 #define SYS_SCXTNUM_EL0                        sys_reg(3, 3, 13, 0, 7)
 
 #define SYS_HFGRTR_EL2                 sys_reg(3, 4, 1, 1, 4)
 #define SYS_HFGWTR_EL2                 sys_reg(3, 4, 1, 1, 5)
 #define SYS_HFGITR_EL2                 sys_reg(3, 4, 1, 1, 6)
-#define SYS_ZCR_EL2                    sys_reg(3, 4, 1, 2, 0)
 #define SYS_TRFCR_EL2                  sys_reg(3, 4, 1, 2, 1)
-#define SYS_DACR32_EL2                 sys_reg(3, 4, 3, 0, 0)
+#define SYS_HCRX_EL2                   sys_reg(3, 4, 1, 2, 2)
 #define SYS_HDFGRTR_EL2                        sys_reg(3, 4, 3, 1, 4)
 #define SYS_HDFGWTR_EL2                        sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2                        sys_reg(3, 4, 3, 1, 6)
 #define SYS_VSESR_EL2                  sys_reg(3, 4, 5, 2, 3)
 #define SYS_FPEXC32_EL2                        sys_reg(3, 4, 5, 3, 0)
 #define SYS_TFSR_EL2                   sys_reg(3, 4, 5, 6, 0)
-#define SYS_FAR_EL2                    sys_reg(3, 4, 6, 0, 0)
 
 #define SYS_VDISR_EL2                  sys_reg(3, 4, 12, 1,  1)
 #define __SYS__AP0Rx_EL2(x)            sys_reg(3, 4, 12, 8, x)
 
 /* VHE encodings for architectural EL0/1 system registers */
 #define SYS_SCTLR_EL12                 sys_reg(3, 5, 1, 0, 0)
-#define SYS_CPACR_EL12                 sys_reg(3, 5, 1, 0, 2)
-#define SYS_ZCR_EL12                   sys_reg(3, 5, 1, 2, 0)
 #define SYS_TTBR0_EL12                 sys_reg(3, 5, 2, 0, 0)
 #define SYS_TTBR1_EL12                 sys_reg(3, 5, 2, 0, 1)
 #define SYS_TCR_EL12                   sys_reg(3, 5, 2, 0, 2)
 #define SYS_AFSR1_EL12                 sys_reg(3, 5, 5, 1, 1)
 #define SYS_ESR_EL12                   sys_reg(3, 5, 5, 2, 0)
 #define SYS_TFSR_EL12                  sys_reg(3, 5, 5, 6, 0)
-#define SYS_FAR_EL12                   sys_reg(3, 5, 6, 0, 0)
 #define SYS_MAIR_EL12                  sys_reg(3, 5, 10, 2, 0)
 #define SYS_AMAIR_EL12                 sys_reg(3, 5, 10, 3, 0)
 #define SYS_VBAR_EL12                  sys_reg(3, 5, 12, 0, 0)
-#define SYS_CONTEXTIDR_EL12            sys_reg(3, 5, 13, 0, 1)
 #define SYS_CNTKCTL_EL12               sys_reg(3, 5, 14, 1, 0)
 #define SYS_CNTP_TVAL_EL02             sys_reg(3, 5, 14, 2, 0)
 #define SYS_CNTP_CTL_EL02              sys_reg(3, 5, 14, 2, 1)
 #define SYS_CNTV_CVAL_EL02             sys_reg(3, 5, 14, 3, 2)
 
 /* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_ENTP2        (BIT(60))
 #define SCTLR_ELx_DSSBS        (BIT(44))
 #define SCTLR_ELx_ATA  (BIT(43))
 
-#define SCTLR_ELx_TCF_SHIFT    40
-#define SCTLR_ELx_TCF_NONE     (UL(0x0) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_SYNC     (UL(0x1) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_ASYNC    (UL(0x2) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_ASYMM    (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
-#define SCTLR_ELx_TCF_MASK     (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
-
 #define SCTLR_ELx_ENIA_SHIFT   31
 
-#define SCTLR_ELx_ITFSB        (BIT(37))
-#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT))
-#define SCTLR_ELx_ENIB (BIT(30))
-#define SCTLR_ELx_ENDA (BIT(27))
-#define SCTLR_ELx_EE    (BIT(25))
-#define SCTLR_ELx_IESB (BIT(21))
-#define SCTLR_ELx_WXN  (BIT(19))
-#define SCTLR_ELx_ENDB (BIT(13))
-#define SCTLR_ELx_I    (BIT(12))
-#define SCTLR_ELx_SA   (BIT(3))
-#define SCTLR_ELx_C    (BIT(2))
-#define SCTLR_ELx_A    (BIT(1))
-#define SCTLR_ELx_M    (BIT(0))
+#define SCTLR_ELx_ITFSB         (BIT(37))
+#define SCTLR_ELx_ENIA  (BIT(SCTLR_ELx_ENIA_SHIFT))
+#define SCTLR_ELx_ENIB  (BIT(30))
+#define SCTLR_ELx_LSMAOE (BIT(29))
+#define SCTLR_ELx_nTLSMD (BIT(28))
+#define SCTLR_ELx_ENDA  (BIT(27))
+#define SCTLR_ELx_EE     (BIT(25))
+#define SCTLR_ELx_EIS   (BIT(22))
+#define SCTLR_ELx_IESB  (BIT(21))
+#define SCTLR_ELx_TSCXT         (BIT(20))
+#define SCTLR_ELx_WXN   (BIT(19))
+#define SCTLR_ELx_ENDB  (BIT(13))
+#define SCTLR_ELx_I     (BIT(12))
+#define SCTLR_ELx_EOS   (BIT(11))
+#define SCTLR_ELx_SA    (BIT(3))
+#define SCTLR_ELx_C     (BIT(2))
+#define SCTLR_ELx_A     (BIT(1))
+#define SCTLR_ELx_M     (BIT(0))
 
 /* SCTLR_EL2 specific flags. */
 #define SCTLR_EL2_RES1 ((BIT(4))  | (BIT(5))  | (BIT(11)) | (BIT(16)) | \
        (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
 
 /* SCTLR_EL1 specific flags. */
-#define SCTLR_EL1_EPAN         (BIT(57))
-#define SCTLR_EL1_ATA0         (BIT(42))
-
-#define SCTLR_EL1_TCF0_SHIFT   38
-#define SCTLR_EL1_TCF0_NONE    (UL(0x0) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_SYNC    (UL(0x1) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_ASYNC   (UL(0x2) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_ASYMM   (UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
-#define SCTLR_EL1_TCF0_MASK    (UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
-
-#define SCTLR_EL1_BT1          (BIT(36))
-#define SCTLR_EL1_BT0          (BIT(35))
-#define SCTLR_EL1_UCI          (BIT(26))
-#define SCTLR_EL1_E0E          (BIT(24))
-#define SCTLR_EL1_SPAN         (BIT(23))
-#define SCTLR_EL1_NTWE         (BIT(18))
-#define SCTLR_EL1_NTWI         (BIT(16))
-#define SCTLR_EL1_UCT          (BIT(15))
-#define SCTLR_EL1_DZE          (BIT(14))
-#define SCTLR_EL1_UMA          (BIT(9))
-#define SCTLR_EL1_SED          (BIT(8))
-#define SCTLR_EL1_ITD          (BIT(7))
-#define SCTLR_EL1_CP15BEN      (BIT(5))
-#define SCTLR_EL1_SA0          (BIT(4))
-
-#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \
-                        (BIT(29)))
-
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #define ENDIAN_SET_EL1         (SCTLR_EL1_E0E | SCTLR_ELx_EE)
 #else
 #endif
 
 #define INIT_SCTLR_EL1_MMU_OFF \
-       (ENDIAN_SET_EL1 | SCTLR_EL1_RES1)
+       (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \
+        SCTLR_EL1_EIS  | SCTLR_EL1_TSCXT  | SCTLR_EL1_EOS)
 
 #define INIT_SCTLR_EL1_MMU_ON \
-       (SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   | SCTLR_EL1_SA0   | \
-        SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
-        SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
-        ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
+       (SCTLR_ELx_M      | SCTLR_ELx_C      | SCTLR_ELx_SA    | \
+        SCTLR_EL1_SA0    | SCTLR_EL1_SED    | SCTLR_ELx_I     | \
+        SCTLR_EL1_DZE    | SCTLR_EL1_UCT    | SCTLR_EL1_nTWE  | \
+        SCTLR_ELx_IESB   | SCTLR_EL1_SPAN   | SCTLR_ELx_ITFSB | \
+        ENDIAN_SET_EL1   | SCTLR_EL1_UCI    | SCTLR_EL1_EPAN  | \
+        SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | SCTLR_EL1_EIS   | \
+        SCTLR_EL1_TSCXT  | SCTLR_EL1_EOS)
 
 /* MAIR_ELx memory attributes (used by Linux) */
 #define MAIR_ATTR_DEVICE_nGnRnE                UL(0x00)
 /* Position the attr at the correct index */
 #define MAIR_ATTRIDX(attr, idx)                ((attr) << ((idx) * 8))
 
-/* id_aa64isar0 */
-#define ID_AA64ISAR0_RNDR_SHIFT                60
-#define ID_AA64ISAR0_TLB_SHIFT         56
-#define ID_AA64ISAR0_TS_SHIFT          52
-#define ID_AA64ISAR0_FHM_SHIFT         48
-#define ID_AA64ISAR0_DP_SHIFT          44
-#define ID_AA64ISAR0_SM4_SHIFT         40
-#define ID_AA64ISAR0_SM3_SHIFT         36
-#define ID_AA64ISAR0_SHA3_SHIFT                32
-#define ID_AA64ISAR0_RDM_SHIFT         28
-#define ID_AA64ISAR0_ATOMICS_SHIFT     20
-#define ID_AA64ISAR0_CRC32_SHIFT       16
-#define ID_AA64ISAR0_SHA2_SHIFT                12
-#define ID_AA64ISAR0_SHA1_SHIFT                8
-#define ID_AA64ISAR0_AES_SHIFT         4
-
-#define ID_AA64ISAR0_TLB_RANGE_NI      0x0
-#define ID_AA64ISAR0_TLB_RANGE         0x2
-
 /* id_aa64isar1 */
 #define ID_AA64ISAR1_I8MM_SHIFT                52
 #define ID_AA64ISAR1_DGH_SHIFT         48
 #define ID_AA64PFR0_ELx_32BIT_64BIT    0x2
 
 /* id_aa64pfr1 */
+#define ID_AA64PFR1_SME_SHIFT          24
 #define ID_AA64PFR1_MPAMFRAC_SHIFT     16
 #define ID_AA64PFR1_RASFRAC_SHIFT      12
 #define ID_AA64PFR1_MTE_SHIFT          8
 #define ID_AA64PFR1_SSBS_PSTATE_ONLY   1
 #define ID_AA64PFR1_SSBS_PSTATE_INSNS  2
 #define ID_AA64PFR1_BT_BTI             0x1
+#define ID_AA64PFR1_SME                        1
 
 #define ID_AA64PFR1_MTE_NI             0x0
 #define ID_AA64PFR1_MTE_EL0            0x1
 #define ID_AA64ZFR0_AES_PMULL          0x2
 #define ID_AA64ZFR0_SVEVER_SVE2                0x1
 
+/* id_aa64smfr0 */
+#define ID_AA64SMFR0_FA64_SHIFT                63
+#define ID_AA64SMFR0_I16I64_SHIFT      52
+#define ID_AA64SMFR0_F64F64_SHIFT      48
+#define ID_AA64SMFR0_I8I32_SHIFT       36
+#define ID_AA64SMFR0_F16F32_SHIFT      35
+#define ID_AA64SMFR0_B16F32_SHIFT      34
+#define ID_AA64SMFR0_F32F32_SHIFT      32
+
+#define ID_AA64SMFR0_FA64              0x1
+#define ID_AA64SMFR0_I16I64            0x4
+#define ID_AA64SMFR0_F64F64            0x1
+#define ID_AA64SMFR0_I8I32             0x4
+#define ID_AA64SMFR0_F16F32            0x1
+#define ID_AA64SMFR0_B16F32            0x1
+#define ID_AA64SMFR0_F32F32            0x1
+
 /* id_aa64mmfr0 */
 #define ID_AA64MMFR0_ECV_SHIFT         60
 #define ID_AA64MMFR0_FGT_SHIFT         56
 
 /* id_aa64mmfr1 */
 #define ID_AA64MMFR1_ECBHB_SHIFT       60
+#define ID_AA64MMFR1_HCX_SHIFT         40
 #define ID_AA64MMFR1_AFP_SHIFT         44
 #define ID_AA64MMFR1_ETS_SHIFT         36
 #define ID_AA64MMFR1_TWED_SHIFT                32
 #define DCZID_DZP_SHIFT                        4
 #define DCZID_BS_SHIFT                 0
 
-/*
- * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
- * are reserved by the SVE architecture for future expansion of the LEN
- * field, with compatible semantics.
- */
-#define ZCR_ELx_LEN_SHIFT      0
-#define ZCR_ELx_LEN_SIZE       9
-#define ZCR_ELx_LEN_MASK       0x1ff
-
 #define CPACR_EL1_FPEN_EL1EN   (BIT(20)) /* enable EL1 access */
 #define CPACR_EL1_FPEN_EL0EN   (BIT(21)) /* enable EL0 access, if EL1EN set */
 
+#define CPACR_EL1_SMEN_EL1EN   (BIT(24)) /* enable EL1 access */
+#define CPACR_EL1_SMEN_EL0EN   (BIT(25)) /* enable EL0 access, if EL1EN set */
+
 #define CPACR_EL1_ZEN_EL1EN    (BIT(16)) /* enable EL1 access */
 #define CPACR_EL1_ZEN_EL0EN    (BIT(17)) /* enable EL0 access, if EL1EN set */
 
 #define TRFCR_ELx_ExTRE                        BIT(1)
 #define TRFCR_ELx_E0TRE                        BIT(0)
 
+/* HCRX_EL2 definitions */
+#define HCRX_EL2_SMPME_MASK            (1 << 5)
 
 /* GIC Hypervisor interface registers */
 /* ICH_MISR_EL2 bit definitions */
 #define ICH_VTR_TDS_SHIFT      19
 #define ICH_VTR_TDS_MASK       (1 << ICH_VTR_TDS_SHIFT)
 
+/* HFG[WR]TR_EL2 bit definitions */
+#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT   55
+#define HFGxTR_EL2_nTPIDR2_EL0_MASK    BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT)
+#define HFGxTR_EL2_nSMPRI_EL1_SHIFT    54
+#define HFGxTR_EL2_nSMPRI_EL1_MASK     BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT)
+
 #define ARM64_FEATURE_FIELD_BITS       4
 
 /* Create a mask for the feature bits of the specified feature. */
 
 #endif
 
+#define SYS_FIELD_PREP(reg, field, val)                \
+                FIELD_PREP(reg##_##field##_MASK, val)
+
+#define SYS_FIELD_PREP_ENUM(reg, field, val)           \
+                FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+
 #endif /* __ASM_SYSREG_H */
index 305a7157c6a6a0f8aa0da0447a2a996a9a370d70..0eb7709422e292ce97860b7c7fc84a4b5bdc4216 100644 (file)
@@ -23,9 +23,9 @@ void die(const char *msg, struct pt_regs *regs, int err);
 struct siginfo;
 void arm64_notify_die(const char *str, struct pt_regs *regs,
                      int signo, int sicode, unsigned long far,
-                     int err);
+                     unsigned long err);
 
-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
+void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long,
                                             struct pt_regs *),
                           int sig, int code, const char *name);
 
index e1317b7c452519b0a65f87bfb62646e9933679e4..848739c15de8274e098af57dae60bda1423a1509 100644 (file)
@@ -82,6 +82,8 @@ int arch_dup_task_struct(struct task_struct *dst,
 #define TIF_SVE_VL_INHERIT     24      /* Inherit SVE vl_onexec across exec */
 #define TIF_SSBD               25      /* Wants SSB mitigation */
 #define TIF_TAGGED_ADDR                26      /* Allow tagged user addresses */
+#define TIF_SME                        27      /* SME in use */
+#define TIF_SME_VL_INHERIT     28      /* Inherit SME vl_onexec across exec */
 
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
index 54f32a0675dffd81981c066a813d092415be81f7..6e5826470bea6629dba214957845bab6bb29701f 100644 (file)
@@ -24,7 +24,7 @@ struct undef_hook {
 
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err);
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
 void arm64_notify_segfault(unsigned long addr);
 void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
@@ -57,7 +57,7 @@ static inline int in_entry_text(unsigned long ptr)
  * errors share the same encoding as an all-zeros encoding from a CPU that
  * doesn't support RAS.
  */
-static inline bool arm64_is_ras_serror(u32 esr)
+static inline bool arm64_is_ras_serror(unsigned long esr)
 {
        WARN_ON(preemptible());
 
@@ -77,9 +77,9 @@ static inline bool arm64_is_ras_serror(u32 esr)
  * We treat them as Uncontainable.
  * Non-RAS SError's are reported as Uncontained/Uncategorized.
  */
-static inline u32 arm64_ras_serror_get_severity(u32 esr)
+static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
 {
-       u32 aet = esr & ESR_ELx_AET;
+       unsigned long aet = esr & ESR_ELx_AET;
 
        if (!arm64_is_ras_serror(esr)) {
                /* Not a RAS error, we can't interpret the ESR. */
@@ -98,6 +98,6 @@ static inline u32 arm64_ras_serror_get_severity(u32 esr)
        return aet;
 }
 
-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr);
-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
 #endif
index e8dce0cc5eaae7f50d03fed5146ba9004e1039b8..63f9c828f1a7103a39aa43d7db0b0c08cf940a52 100644 (file)
@@ -460,4 +460,19 @@ static inline int __copy_from_user_flushcache(void *dst, const void __user *src,
 }
 #endif
 
+#ifdef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
+
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+static inline size_t probe_subpage_writeable(const char __user *uaddr,
+                                            size_t size)
+{
+       if (!system_supports_mte())
+               return 0;
+       return mte_probe_user_range(uaddr, size);
+}
+
+#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+
 #endif /* __ASM_UACCESS_H */
index 99cb5d383048dd8dedb08f51be0d3951dc6b0f4f..b0256cec63b50faf0204121a48e0052d83908502 100644 (file)
 #define HWCAP2_AFP             (1 << 20)
 #define HWCAP2_RPRES           (1 << 21)
 #define HWCAP2_MTE3            (1 << 22)
+#define HWCAP2_SME             (1 << 23)
+#define HWCAP2_SME_I16I64      (1 << 24)
+#define HWCAP2_SME_F64F64      (1 << 25)
+#define HWCAP2_SME_I8I32       (1 << 26)
+#define HWCAP2_SME_F16F32      (1 << 27)
+#define HWCAP2_SME_B16F32      (1 << 28)
+#define HWCAP2_SME_F32F32      (1 << 29)
+#define HWCAP2_SME_FA64                (1 << 30)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index c1b6ddc02d2ff96eac2248ad47f1e9191d3e85be..ab585359242d09b65b299ee6ee72d38612d626a7 100644 (file)
@@ -139,8 +139,10 @@ struct kvm_guest_debug_arch {
        __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
 };
 
+#define KVM_DEBUG_ARCH_HSR_HIGH_VALID  (1 << 0)
 struct kvm_debug_exit_arch {
        __u32 hsr;
+       __u32 hsr_high; /* ESR_EL2[61:32] */
        __u64 far;      /* used for watchpoints */
 };
 
index 758ae984ff9775be642ff2911be95f6fb1f0ca07..7fa2f7036aa7852d56fa4d8a4d9ad2ab412c28ad 100644 (file)
@@ -109,7 +109,7 @@ struct user_hwdebug_state {
        }               dbg_regs[16];
 };
 
-/* SVE/FP/SIMD state (NT_ARM_SVE) */
+/* SVE/FP/SIMD state (NT_ARM_SVE & NT_ARM_SSVE) */
 
 struct user_sve_header {
        __u32 size; /* total meaningful regset content in bytes */
@@ -220,6 +220,7 @@ struct user_sve_header {
        (SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \
                SVE_PT_SVE_PREGS_OFFSET(vq))
 
+/* For streaming mode SVE (SSVE) FFR must be read and written as zero */
 #define SVE_PT_SVE_FFR_OFFSET(vq) \
        (SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq))
 
@@ -240,10 +241,12 @@ struct user_sve_header {
                        - SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1))     \
                / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
 
-#define SVE_PT_SIZE(vq, flags)                                         \
-        (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ?             \
-                 SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)        \
-               : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+#define SVE_PT_SIZE(vq, flags)                                           \
+        (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ?               \
+                 SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)          \
+               : ((((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD ?  \
+                   SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags) \
+                 : SVE_PT_REGS_OFFSET)))
 
 /* pointer authentication masks (NT_ARM_PAC_MASK) */
 
@@ -265,6 +268,62 @@ struct user_pac_generic_keys {
        __uint128_t     apgakey;
 };
 
+/* ZA state (NT_ARM_ZA) */
+
+struct user_za_header {
+       __u32 size; /* total meaningful regset content in bytes */
+       __u32 max_size; /* maxmium possible size for this thread */
+       __u16 vl; /* current vector length */
+       __u16 max_vl; /* maximum possible vector length */
+       __u16 flags;
+       __u16 __reserved;
+};
+
+/*
+ * Common ZA_PT_* flags:
+ * These must be kept in sync with prctl interface in <linux/prctl.h>
+ */
+#define ZA_PT_VL_INHERIT               ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16)
+#define ZA_PT_VL_ONEXEC                        ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16)
+
+
+/*
+ * The remainder of the ZA state follows struct user_za_header.  The
+ * total size of the ZA state (including header) depends on the
+ * metadata in the header:  ZA_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_za_header to the register data */
+#define ZA_PT_ZA_OFFSET                                                \
+       ((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \
+               / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
+
+/*
+ * The payload starts at offset ZA_PT_ZA_OFFSET, and is of size
+ * ZA_PT_ZA_SIZE(vq, flags).
+ *
+ * The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8
+ * bytes each, starting from vector 0.
+ *
+ * Additional data might be appended in the future.
+ *
+ * The ZA matrix is represented in memory in an endianness-invariant layout
+ * which differs from the layout used for the FPSIMD V-registers on big-endian
+ * systems: see sigcontext.h for more explanation.
+ */
+
+#define ZA_PT_ZAV_OFFSET(vq, n) \
+       (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n))
+
+#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+
+#define ZA_PT_SIZE(vq)                                         \
+       (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_PTRACE_H */
index 0c796c795dbe781da43e145102e66ad2342f4cc9..4aaf31e3bf167c858101f541d1e2480004be1091 100644 (file)
@@ -132,6 +132,17 @@ struct extra_context {
 #define SVE_MAGIC      0x53564501
 
 struct sve_context {
+       struct _aarch64_ctx head;
+       __u16 vl;
+       __u16 flags;
+       __u16 __reserved[2];
+};
+
+#define SVE_SIG_FLAG_SM        0x1     /* Context describes streaming mode */
+
+#define ZA_MAGIC       0x54366345
+
+struct za_context {
        struct _aarch64_ctx head;
        __u16 vl;
        __u16 __reserved[3];
@@ -186,9 +197,16 @@ struct sve_context {
  * sve_context.vl must equal the thread's current vector length when
  * doing a sigreturn.
  *
+ * On systems with support for SME the SVE register state may reflect either
+ * streaming or non-streaming mode.  In streaming mode the streaming mode
+ * vector length will be used and the flag SVE_SIG_FLAG_SM will be set in
+ * the flags field. It is permitted to enter or leave streaming mode in
+ * a signal return, applications should take care to ensure that any difference
+ * in vector length between the two modes is handled, including any resizing
+ * and movement of context blocks.
  *
- * Note: for all these macros, the "vq" argument denotes the SVE
- * vector length in quadwords (i.e., units of 128 bits).
+ * Note: for all these macros, the "vq" argument denotes the vector length
+ * in quadwords (i.e., units of 128 bits).
  *
  * The correct way to obtain vq is to use sve_vq_from_vl(vl).  The
  * result is valid if and only if sve_vl_valid(vl) is true.  This is
@@ -249,4 +267,37 @@ struct sve_context {
 #define SVE_SIG_CONTEXT_SIZE(vq) \
                (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
 
+/*
+ * If the ZA register is enabled for the thread at signal delivery then,
+ * za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+ * and the register data may be accessed using the ZA_SIG_*() macros.
+ *
+ * If za_context.head.size < ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+ * then ZA was not enabled and no register data was included in which case
+ * ZA register was not enabled for the thread and no register data
+ * the ZA_SIG_*() macros should not be used except for this check.
+ *
+ * The same convention applies when returning from a signal: a caller
+ * will need to remove or resize the za_context block if it wants to
+ * enable the ZA register when it was previously non-live or vice-versa.
+ * This may require the caller to allocate fresh memory and/or move other
+ * context blocks in the signal frame.
+ *
+ * Changing the vector length during signal return is not permitted:
+ * za_context.vl must equal the thread's current SME vector length when
+ * doing a sigreturn.
+ */
+
+#define ZA_SIG_REGS_OFFSET                                     \
+       ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1))     \
+               / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
+
+#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+
+#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \
+                                 (SVE_SIG_ZREG_SIZE(vq) * n))
+
+#define ZA_SIG_CONTEXT_SIZE(vq) \
+               (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
+
 #endif /* _UAPI__ASM_SIGCONTEXT_H */
index 986837d7ec82dc1863f06b0906575674d32a628e..fa7981d0d9170057fda37d96f49e6ad0e6f33c2c 100644 (file)
@@ -75,6 +75,10 @@ obj-$(CONFIG_ARM64_MTE)                      += mte.o
 obj-y                                  += vdso-wrap.o
 obj-$(CONFIG_COMPAT_VDSO)              += vdso32-wrap.o
 
+# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
+$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
+$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
+
 obj-y                                  += probes/
 head-y                                 := head.o
 extra-y                                        += $(head-y) vmlinux.lds
index 4c9b5b4b7a0bc036f3d8a92e4378b1d622e9dbef..c05cc3b6162e921a3cc28d42adc14dcf4f33650e 100644 (file)
@@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 #ifdef CONFIG_ARM64_ERRATUM_1286807
        {
                ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+               /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */
+               ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
        },
 #endif
        {},
@@ -215,7 +217,7 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 #endif
 
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
-const struct midr_range cavium_erratum_23154_cpus[] = {
+static const struct midr_range cavium_erratum_23154_cpus[] = {
        MIDR_ALL_VERSIONS(MIDR_THUNDERX),
        MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX),
        MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX),
index d72c4b4d389c4130741e5a56df1159a9049d063d..4ccddf382e5b854afaa033dd9d6422cd1a5d63bb 100644 (file)
@@ -191,20 +191,20 @@ static bool __system_matches_cap(unsigned int n);
  * sync with the documentation of the CPU feature register ABI.
  */
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TLB_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_DP_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_AES_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -261,6 +261,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
@@ -293,6 +295,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
        ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
+       ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
@@ -557,7 +577,13 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
 
 static const struct arm64_ftr_bits ftr_zcr[] = {
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
-               ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0),        /* LEN */
+               ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0),       /* LEN */
+       ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_smcr[] = {
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
+               SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0),     /* LEN */
        ARM64_FTR_END,
 };
 
@@ -645,6 +671,7 @@ static const struct __ftr_reg_entry {
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
                               &id_aa64pfr1_override),
        ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
+       ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
 
        /* Op1 = 0, CRn = 0, CRm = 5 */
        ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -654,7 +681,6 @@ static const struct __ftr_reg_entry {
        ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
                               &id_aa64isar1_override),
-       ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
                               &id_aa64isar2_override),
 
@@ -666,6 +692,7 @@ static const struct __ftr_reg_entry {
 
        /* Op1 = 0, CRn = 1, CRm = 2 */
        ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+       ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
 
        /* Op1 = 1, CRn = 0, CRm = 0 */
        ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -810,7 +837,7 @@ static void __init sort_ftr_regs(void)
                 * to sys_id for subsequent binary search in get_arm64_ftr_reg()
                 * to work correctly.
                 */
-               BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+               BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id);
        }
 }
 
@@ -960,6 +987,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
        init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
        init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
        init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
+       init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
 
        if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
                init_32bit_cpu_features(&info->aarch32);
@@ -969,6 +997,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
                vec_init_vq_map(ARM64_VEC_SVE);
        }
 
+       if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+               init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
+               if (IS_ENABLED(CONFIG_ARM64_SME))
+                       vec_init_vq_map(ARM64_VEC_SME);
+       }
+
        if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
                init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
 
@@ -1195,6 +1229,9 @@ void update_cpu_features(int cpu,
        taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
                                      info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
 
+       taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
+                                     info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
+
        if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
                taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
                                        info->reg_zcr, boot->reg_zcr);
@@ -1205,6 +1242,16 @@ void update_cpu_features(int cpu,
                        vec_update_vq_map(ARM64_VEC_SVE);
        }
 
+       if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+               taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
+                                       info->reg_smcr, boot->reg_smcr);
+
+               /* Probe vector lengths, unless we already gave up on SME */
+               if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) &&
+                   !system_capabilities_finalized())
+                       vec_update_vq_map(ARM64_VEC_SME);
+       }
+
        /*
         * The kernel uses the LDGM/STGM instructions and the number of tags
         * they read/write depends on the GMID_EL1.BS field. Check that the
@@ -1288,6 +1335,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
        read_sysreg_case(SYS_ID_AA64PFR0_EL1);
        read_sysreg_case(SYS_ID_AA64PFR1_EL1);
        read_sysreg_case(SYS_ID_AA64ZFR0_EL1);
+       read_sysreg_case(SYS_ID_AA64SMFR0_EL1);
        read_sysreg_case(SYS_ID_AA64DFR0_EL1);
        read_sysreg_case(SYS_ID_AA64DFR1_EL1);
        read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
@@ -2013,7 +2061,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR0_EL1,
-               .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+               .field_pos = ID_AA64ISAR0_EL1_ATOMIC_SHIFT,
                .field_width = 4,
                .sign = FTR_UNSIGNED,
                .min_field_value = 2,
@@ -2195,10 +2243,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR0_EL1,
-               .field_pos = ID_AA64ISAR0_TLB_SHIFT,
+               .field_pos = ID_AA64ISAR0_EL1_TLB_SHIFT,
                .field_width = 4,
                .sign = FTR_UNSIGNED,
-               .min_field_value = ID_AA64ISAR0_TLB_RANGE,
+               .min_field_value = ID_AA64ISAR0_EL1_TLB_RANGE,
        },
 #ifdef CONFIG_ARM64_HW_AFDBM
        {
@@ -2227,7 +2275,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR0_EL1,
-               .field_pos = ID_AA64ISAR0_CRC32_SHIFT,
+               .field_pos = ID_AA64ISAR0_EL1_CRC32_SHIFT,
                .field_width = 4,
                .min_field_value = 1,
        },
@@ -2382,7 +2430,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR0_EL1,
-               .field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+               .field_pos = ID_AA64ISAR0_EL1_RNDR_SHIFT,
                .field_width = 4,
                .sign = FTR_UNSIGNED,
                .min_field_value = 1,
@@ -2442,6 +2490,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .min_field_value = 1,
        },
+#ifdef CONFIG_ARM64_SME
+       {
+               .desc = "Scalable Matrix Extension",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_SME,
+               .sys_reg = SYS_ID_AA64PFR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64PFR1_SME_SHIFT,
+               .field_width = 4,
+               .min_field_value = ID_AA64PFR1_SME,
+               .matches = has_cpuid_feature,
+               .cpu_enable = sme_kernel_enable,
+       },
+       /* FA64 should be sorted after the base SME capability */
+       {
+               .desc = "FA64",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_SME_FA64,
+               .sys_reg = SYS_ID_AA64SMFR0_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64SMFR0_FA64_SHIFT,
+               .field_width = 1,
+               .min_field_value = ID_AA64SMFR0_FA64,
+               .matches = has_cpuid_feature,
+               .cpu_enable = fa64_kernel_enable,
+       },
+#endif /* CONFIG_ARM64_SME */
        {},
 };
 
@@ -2514,22 +2589,22 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 #endif
 
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
-       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+       HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -2575,6 +2650,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
        HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
        HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
        HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+#ifdef CONFIG_ARM64_SME
+       HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+#endif /* CONFIG_ARM64_SME */
        {},
 };
 
@@ -2872,6 +2957,23 @@ static void verify_sve_features(void)
        /* Add checks on other ZCR bits here if necessary */
 }
 
+static void verify_sme_features(void)
+{
+       u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
+       u64 smcr = read_smcr_features();
+
+       unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
+       unsigned int len = smcr & SMCR_ELx_LEN_MASK;
+
+       if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
+               pr_crit("CPU%d: SME: vector length support mismatch\n",
+                       smp_processor_id());
+               cpu_die_early();
+       }
+
+       /* Add checks on other SMCR bits here if necessary */
+}
+
 static void verify_hyp_capabilities(void)
 {
        u64 safe_mmfr1, mmfr0, mmfr1;
@@ -2924,6 +3026,9 @@ static void verify_local_cpu_capabilities(void)
        if (system_supports_sve())
                verify_sve_features();
 
+       if (system_supports_sme())
+               verify_sme_features();
+
        if (is_hyp_mode_available())
                verify_hyp_capabilities();
 }
@@ -3041,6 +3146,7 @@ void __init setup_cpu_features(void)
                pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
 
        sve_setup();
+       sme_setup();
        minsigstksz_setup();
 
        /* Advertise that we have computed the system capabilities */
index 330b92ea863aad3e61e7559ebc8a37de215497b5..8a8136a096ac12f7ace9aae1b55e870bd2e80b69 100644 (file)
@@ -98,6 +98,14 @@ static const char *const hwcap_str[] = {
        [KERNEL_HWCAP_AFP]              = "afp",
        [KERNEL_HWCAP_RPRES]            = "rpres",
        [KERNEL_HWCAP_MTE3]             = "mte3",
+       [KERNEL_HWCAP_SME]              = "sme",
+       [KERNEL_HWCAP_SME_I16I64]       = "smei16i64",
+       [KERNEL_HWCAP_SME_F64F64]       = "smef64f64",
+       [KERNEL_HWCAP_SME_I8I32]        = "smei8i32",
+       [KERNEL_HWCAP_SME_F16F32]       = "smef16f32",
+       [KERNEL_HWCAP_SME_B16F32]       = "smeb16f32",
+       [KERNEL_HWCAP_SME_F32F32]       = "smef32f32",
+       [KERNEL_HWCAP_SME_FA64]         = "smefa64",
 };
 
 #ifdef CONFIG_COMPAT
@@ -401,6 +409,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
        info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
        info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
        info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
+       info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
 
        if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
                info->reg_gmid = read_cpuid(GMID_EL1);
@@ -412,6 +421,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
            id_aa64pfr0_sve(info->reg_id_aa64pfr0))
                info->reg_zcr = read_zcr_features();
 
+       if (IS_ENABLED(CONFIG_ARM64_SME) &&
+           id_aa64pfr1_sme(info->reg_id_aa64pfr1))
+               info->reg_smcr = read_smcr_features();
+
        cpuinfo_detect_icache_policy(info);
 }
 
index 4f3661eeb7ec6f28873cccf4411ec649c2475815..bf9fe71589bcaca54e8d18f08d5794e9830c9570 100644 (file)
@@ -202,7 +202,7 @@ void unregister_kernel_step_hook(struct step_hook *hook)
  * So we call all the registered handlers, until the right handler is
  * found which returns zero.
  */
-static int call_step_hook(struct pt_regs *regs, unsigned int esr)
+static int call_step_hook(struct pt_regs *regs, unsigned long esr)
 {
        struct step_hook *hook;
        struct list_head *list;
@@ -238,7 +238,7 @@ static void send_user_sigtrap(int si_code)
                              "User debug trap");
 }
 
-static int single_step_handler(unsigned long unused, unsigned int esr,
+static int single_step_handler(unsigned long unused, unsigned long esr,
                               struct pt_regs *regs)
 {
        bool handler_found = false;
@@ -299,11 +299,11 @@ void unregister_kernel_break_hook(struct break_hook *hook)
        unregister_debug_hook(&hook->node);
 }
 
-static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+static int call_break_hook(struct pt_regs *regs, unsigned long esr)
 {
        struct break_hook *hook;
        struct list_head *list;
-       int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
+       int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
 
        list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
 
@@ -312,7 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
         * entirely not preemptible, and we can use rcu list safely here.
         */
        list_for_each_entry_rcu(hook, list, node) {
-               unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+               unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
 
                if ((comment & ~hook->mask) == hook->imm)
                        fn = hook->fn;
@@ -322,7 +322,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 }
 NOKPROBE_SYMBOL(call_break_hook);
 
-static int brk_handler(unsigned long unused, unsigned int esr,
+static int brk_handler(unsigned long unused, unsigned long esr,
                       struct pt_regs *regs)
 {
        if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
index 878c65aa720617b33585e7288197974522178e2c..56cefd33eb8e9a1ecbdc247e8af06637c0327981 100644 (file)
@@ -75,7 +75,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
        if (interrupts_enabled(regs)) {
                if (regs->exit_rcu) {
                        trace_hardirqs_on_prepare();
-                       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+                       lockdep_hardirqs_on_prepare();
                        rcu_irq_exit();
                        lockdep_hardirqs_on(CALLER_ADDR0);
                        return;
@@ -121,7 +121,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 static __always_inline void __exit_to_user_mode(void)
 {
        trace_hardirqs_on_prepare();
-       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       lockdep_hardirqs_on_prepare();
        user_enter_irqoff();
        lockdep_hardirqs_on(CALLER_ADDR0);
 }
@@ -179,7 +179,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
        ftrace_nmi_exit();
        if (restore) {
                trace_hardirqs_on_prepare();
-               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+               lockdep_hardirqs_on_prepare();
        }
 
        rcu_nmi_exit();
@@ -215,7 +215,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
 
        if (restore) {
                trace_hardirqs_on_prepare();
-               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+               lockdep_hardirqs_on_prepare();
        }
 
        rcu_nmi_exit();
@@ -282,13 +282,13 @@ extern void (*handle_arch_irq)(struct pt_regs *);
 extern void (*handle_arch_fiq)(struct pt_regs *);
 
 static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
-                                     unsigned int esr)
+                                     unsigned long esr)
 {
        arm64_enter_nmi(regs);
 
        console_verbose();
 
-       pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
+       pr_crit("Unhandled %s exception on CPU%d, ESR 0x%016lx -- %s\n",
                vector, smp_processor_id(), esr,
                esr_get_class_string(esr));
 
@@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
        exit_to_user_mode(regs);
 }
 
+static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
+{
+       enter_from_user_mode(regs);
+       local_daif_restore(DAIF_PROCCTX);
+       do_sme_acc(esr, regs);
+       exit_to_user_mode(regs);
+}
+
 static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
 {
        enter_from_user_mode(regs);
@@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
        case ESR_ELx_EC_SVE:
                el0_sve_acc(regs, esr);
                break;
+       case ESR_ELx_EC_SME:
+               el0_sme_acc(regs, esr);
+               break;
        case ESR_ELx_EC_FP_EXC64:
                el0_fpsimd_exc(regs, esr);
                break;
@@ -818,7 +829,7 @@ UNHANDLED(el0t, 32, error)
 #ifdef CONFIG_VMAP_STACK
 asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
 {
-       unsigned int esr = read_sysreg(esr_el1);
+       unsigned long esr = read_sysreg(esr_el1);
        unsigned long far = read_sysreg(far_el1);
 
        arm64_enter_nmi(regs);
index dc242e269f9aae94854fb6a6116ceb8575ac4bd5..229436f33df5affb02df5552f62adbe053b72eef 100644 (file)
@@ -86,3 +86,39 @@ SYM_FUNC_START(sve_flush_live)
 SYM_FUNC_END(sve_flush_live)
 
 #endif /* CONFIG_ARM64_SVE */
+
+#ifdef CONFIG_ARM64_SME
+
+SYM_FUNC_START(sme_get_vl)
+       _sme_rdsvl      0, 1
+       ret
+SYM_FUNC_END(sme_get_vl)
+
+SYM_FUNC_START(sme_set_vq)
+       sme_load_vq x0, x1, x2
+       ret
+SYM_FUNC_END(sme_set_vq)
+
+/*
+ * Save the SME state
+ *
+ * x0 - pointer to buffer for state
+ */
+SYM_FUNC_START(za_save_state)
+       _sme_rdsvl      1, 1            // x1 = VL/8
+       sme_save_za 0, x1, 12
+       ret
+SYM_FUNC_END(za_save_state)
+
+/*
+ * Load the SME state
+ *
+ * x0 - pointer to buffer for state
+ */
+SYM_FUNC_START(za_load_state)
+       _sme_rdsvl      1, 1            // x1 = VL/8
+       sme_load_za 0, x1, 12
+       ret
+SYM_FUNC_END(za_load_state)
+
+#endif /* CONFIG_ARM64_SME */
index e535480a4069b5bcf8dbde672c93747037e2cd01..d42a205ef6259c8354115aa05caa2b506bc0fb75 100644 (file)
@@ -97,12 +97,6 @@ SYM_CODE_START(ftrace_common)
 SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
        bl      ftrace_stub
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
-       nop                             // If enabled, this will be replaced
-                                       // "b ftrace_graph_caller"
-#endif
-
 /*
  * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
  * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
@@ -127,17 +121,6 @@ ftrace_common_return:
        ret     x9
 SYM_CODE_END(ftrace_common)
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_CODE_START(ftrace_graph_caller)
-       ldr     x0, [sp, #S_PC]
-       sub     x0, x0, #AARCH64_INSN_SIZE      // ip (callsite's BL insn)
-       add     x1, sp, #S_LR                   // parent_ip (callsite's LR)
-       ldr     x2, [sp, #PT_REGS_SIZE]         // parent fp (callsite's FP)
-       bl      prepare_ftrace_return
-       b       ftrace_common_return
-SYM_CODE_END(ftrace_graph_caller)
-#endif
-
 #else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
 /*
index ede028dee81b09a0b07d91c99123605b3ebc3a0e..5b82b9292400539a627918eb41acae0b866357a9 100644 (file)
@@ -596,7 +596,7 @@ SYM_CODE_START_LOCAL(ret_to_user)
        ldr     x19, [tsk, #TSK_TI_FLAGS]       // re-check for single-step
        enable_step_tsk x19, x2
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
-       bl      stackleak_erase
+       bl      stackleak_erase_on_task_stack
 #endif
        kernel_exit 0
 SYM_CODE_END(ret_to_user)
index 47af76e53221175b3057439dd03f3ac08ef88327..819979398127e6f39ecddf9f62be3b27f9690a8a 100644 (file)
 struct fpsimd_last_state_struct {
        struct user_fpsimd_state *st;
        void *sve_state;
+       void *za_state;
+       u64 *svcr;
        unsigned int sve_vl;
+       unsigned int sme_vl;
 };
 
 static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -136,6 +139,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
                .max_virtualisable_vl   = SVE_VL_MIN,
        },
 #endif
+#ifdef CONFIG_ARM64_SME
+       [ARM64_VEC_SME] = {
+               .type                   = ARM64_VEC_SME,
+               .name                   = "SME",
+       },
+#endif
 };
 
 static unsigned int vec_vl_inherit_flag(enum vec_type type)
@@ -143,6 +152,8 @@ static unsigned int vec_vl_inherit_flag(enum vec_type type)
        switch (type) {
        case ARM64_VEC_SVE:
                return TIF_SVE_VL_INHERIT;
+       case ARM64_VEC_SME:
+               return TIF_SME_VL_INHERIT;
        default:
                WARN_ON_ONCE(1);
                return 0;
@@ -186,6 +197,26 @@ extern void __percpu *efi_sve_state;
 
 #endif /* ! CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_SME
+
+static int get_sme_default_vl(void)
+{
+       return get_default_vl(ARM64_VEC_SME);
+}
+
+static void set_sme_default_vl(int val)
+{
+       set_default_vl(ARM64_VEC_SME, val);
+}
+
+static void sme_free(struct task_struct *);
+
+#else
+
+static inline void sme_free(struct task_struct *t) { }
+
+#endif
+
 DEFINE_PER_CPU(bool, fpsimd_context_busy);
 EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
 
@@ -206,10 +237,19 @@ static void __get_cpu_fpsimd_context(void)
  *
  * The double-underscore version must only be called if you know the task
  * can't be preempted.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
  */
 static void get_cpu_fpsimd_context(void)
 {
-       local_bh_disable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_disable();
+       else
+               preempt_disable();
        __get_cpu_fpsimd_context();
 }
 
@@ -230,7 +270,10 @@ static void __put_cpu_fpsimd_context(void)
 static void put_cpu_fpsimd_context(void)
 {
        __put_cpu_fpsimd_context();
-       local_bh_enable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_enable();
+       else
+               preempt_enable();
 }
 
 static bool have_cpu_fpsimd_context(void)
@@ -238,23 +281,6 @@ static bool have_cpu_fpsimd_context(void)
        return !preemptible() && __this_cpu_read(fpsimd_context_busy);
 }
 
-/*
- * Call __sve_free() directly only if you know task can't be scheduled
- * or preempted.
- */
-static void __sve_free(struct task_struct *task)
-{
-       kfree(task->thread.sve_state);
-       task->thread.sve_state = NULL;
-}
-
-static void sve_free(struct task_struct *task)
-{
-       WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
-
-       __sve_free(task);
-}
-
 unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
 {
        return task->thread.vl[type];
@@ -278,17 +304,28 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
        task->thread.vl_onexec[type] = vl;
 }
 
+/*
+ * TIF_SME controls whether a task can use SME without trapping while
+ * in userspace, when TIF_SME is set then we must have storage
+ * alocated in sve_state and za_state to store the contents of both ZA
+ * and the SVE registers for both streaming and non-streaming modes.
+ *
+ * If both SVCR.ZA and SVCR.SM are disabled then at any point we
+ * may disable TIF_SME and reenable traps.
+ */
+
+
 /*
  * TIF_SVE controls whether a task can use SVE without trapping while
- * in userspace, and also the way a task's FPSIMD/SVE state is stored
- * in thread_struct.
+ * in userspace, and also (together with TIF_SME) the way a task's
+ * FPSIMD/SVE state is stored in thread_struct.
  *
  * The kernel uses this flag to track whether a user task is actively
  * using SVE, and therefore whether full SVE register state needs to
  * be tracked.  If not, the cheaper FPSIMD context handling code can
  * be used instead of the more costly SVE equivalents.
  *
- *  * TIF_SVE set:
+ *  * TIF_SVE or SVCR.SM set:
  *
  *    The task can execute SVE instructions while in userspace without
  *    trapping to the kernel.
@@ -296,7 +333,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
  *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
  *    corresponding Zn), P0-P15 and FFR are encoded in in
  *    task->thread.sve_state, formatted appropriately for vector
- *    length task->thread.sve_vl.
+ *    length task->thread.sve_vl or, if SVCR.SM is set,
+ *    task->thread.sme_vl.
  *
  *    task->thread.sve_state must point to a valid buffer at least
  *    sve_state_size(task) bytes in size.
@@ -334,16 +372,44 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
  */
 static void task_fpsimd_load(void)
 {
+       bool restore_sve_regs = false;
+       bool restore_ffr;
+
        WARN_ON(!system_supports_fpsimd());
        WARN_ON(!have_cpu_fpsimd_context());
 
+       /* Check if we should restore SVE first */
        if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
                sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
+               restore_sve_regs = true;
+               restore_ffr = true;
+       }
+
+       /* Restore SME, override SVE register configuration if needed */
+       if (system_supports_sme()) {
+               unsigned long sme_vl = task_get_sme_vl(current);
+
+               /* Ensure VL is set up for restoring data */
+               if (test_thread_flag(TIF_SME))
+                       sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
+
+               write_sysreg_s(current->thread.svcr, SYS_SVCR);
+
+               if (thread_za_enabled(&current->thread))
+                       za_load_state(current->thread.za_state);
+
+               if (thread_sm_enabled(&current->thread)) {
+                       restore_sve_regs = true;
+                       restore_ffr = system_supports_fa64();
+               }
+       }
+
+       if (restore_sve_regs)
                sve_load_state(sve_pffr(&current->thread),
-                              &current->thread.uw.fpsimd_state.fpsr, true);
-       } else {
+                              &current->thread.uw.fpsimd_state.fpsr,
+                              restore_ffr);
+       else
                fpsimd_load_state(&current->thread.uw.fpsimd_state);
-       }
 }
 
 /*
@@ -361,6 +427,9 @@ static void fpsimd_save(void)
        struct fpsimd_last_state_struct const *last =
                this_cpu_ptr(&fpsimd_last_state);
        /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+       bool save_sve_regs = false;
+       bool save_ffr;
+       unsigned int vl;
 
        WARN_ON(!system_supports_fpsimd());
        WARN_ON(!have_cpu_fpsimd_context());
@@ -368,9 +437,32 @@ static void fpsimd_save(void)
        if (test_thread_flag(TIF_FOREIGN_FPSTATE))
                return;
 
-       if (IS_ENABLED(CONFIG_ARM64_SVE) &&
-           test_thread_flag(TIF_SVE)) {
-               if (WARN_ON(sve_get_vl() != last->sve_vl)) {
+       if (test_thread_flag(TIF_SVE)) {
+               save_sve_regs = true;
+               save_ffr = true;
+               vl = last->sve_vl;
+       }
+
+       if (system_supports_sme()) {
+               u64 *svcr = last->svcr;
+               *svcr = read_sysreg_s(SYS_SVCR);
+
+               *svcr = read_sysreg_s(SYS_SVCR);
+
+               if (*svcr & SVCR_ZA_MASK)
+                       za_save_state(last->za_state);
+
+               /* If we are in streaming mode override regular SVE. */
+               if (*svcr & SVCR_SM_MASK) {
+                       save_sve_regs = true;
+                       save_ffr = system_supports_fa64();
+                       vl = last->sme_vl;
+               }
+       }
+
+       if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
+               /* Get the configured VL from RDVL, will account for SM */
+               if (WARN_ON(sve_get_vl() != vl)) {
                        /*
                         * Can't save the user regs, so current would
                         * re-enter user with corrupt state.
@@ -381,8 +473,8 @@ static void fpsimd_save(void)
                }
 
                sve_save_state((char *)last->sve_state +
-                                       sve_ffr_offset(last->sve_vl),
-                              &last->st->fpsr, true);
+                                       sve_ffr_offset(vl),
+                              &last->st->fpsr, save_ffr);
        } else {
                fpsimd_save_state(last->st);
        }
@@ -409,6 +501,8 @@ static unsigned int find_supported_vector_length(enum vec_type type,
 
        if (vl > max_vl)
                vl = max_vl;
+       if (vl < info->min_vl)
+               vl = info->min_vl;
 
        bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
                            __vq_to_bit(sve_vq_from_vl(vl)));
@@ -467,6 +561,30 @@ static int __init sve_sysctl_init(void)
 static int __init sve_sysctl_init(void) { return 0; }
 #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
 
+#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
+static struct ctl_table sme_default_vl_table[] = {
+       {
+               .procname       = "sme_default_vector_length",
+               .mode           = 0644,
+               .proc_handler   = vec_proc_do_default_vl,
+               .extra1         = &vl_info[ARM64_VEC_SME],
+       },
+       { }
+};
+
+static int __init sme_sysctl_init(void)
+{
+       if (system_supports_sme())
+               if (!register_sysctl("abi", sme_default_vl_table))
+                       return -EINVAL;
+
+       return 0;
+}
+
+#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
+static int __init sme_sysctl_init(void) { return 0; }
+#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
+
 #define ZREG(sve_state, vq, n) ((char *)(sve_state) +          \
        (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
 
@@ -520,7 +638,7 @@ static void fpsimd_to_sve(struct task_struct *task)
        if (!system_supports_sve())
                return;
 
-       vq = sve_vq_from_vl(task_get_sve_vl(task));
+       vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
        __fpsimd_to_sve(sst, fst, vq);
 }
 
@@ -537,7 +655,7 @@ static void fpsimd_to_sve(struct task_struct *task)
  */
 static void sve_to_fpsimd(struct task_struct *task)
 {
-       unsigned int vq;
+       unsigned int vq, vl;
        void const *sst = task->thread.sve_state;
        struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
        unsigned int i;
@@ -546,7 +664,8 @@ static void sve_to_fpsimd(struct task_struct *task)
        if (!system_supports_sve())
                return;
 
-       vq = sve_vq_from_vl(task_get_sve_vl(task));
+       vl = thread_get_cur_vl(&task->thread);
+       vq = sve_vq_from_vl(vl);
        for (i = 0; i < SVE_NUM_ZREGS; ++i) {
                p = (__uint128_t const *)ZREG(sst, vq, i);
                fst->vregs[i] = arm64_le128_to_cpu(*p);
@@ -554,14 +673,37 @@ static void sve_to_fpsimd(struct task_struct *task)
 }
 
 #ifdef CONFIG_ARM64_SVE
+/*
+ * Call __sve_free() directly only if you know task can't be scheduled
+ * or preempted.
+ */
+static void __sve_free(struct task_struct *task)
+{
+       kfree(task->thread.sve_state);
+       task->thread.sve_state = NULL;
+}
+
+static void sve_free(struct task_struct *task)
+{
+       WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+       __sve_free(task);
+}
 
 /*
  * Return how many bytes of memory are required to store the full SVE
  * state for task, given task's currently configured vector length.
  */
-static size_t sve_state_size(struct task_struct const *task)
+size_t sve_state_size(struct task_struct const *task)
 {
-       return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task)));
+       unsigned int vl = 0;
+
+       if (system_supports_sve())
+               vl = task_get_sve_vl(task);
+       if (system_supports_sme())
+               vl = max(vl, task_get_sme_vl(task));
+
+       return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
 }
 
 /*
@@ -587,6 +729,19 @@ void sve_alloc(struct task_struct *task)
 }
 
 
+/*
+ * Force the FPSIMD state shared with SVE to be updated in the SVE state
+ * even if the SVE state is the current active state.
+ *
+ * This should only be called by ptrace.  task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void fpsimd_force_sync_to_sve(struct task_struct *task)
+{
+       fpsimd_to_sve(task);
+}
+
 /*
  * Ensure that task->thread.sve_state is up to date with respect to
  * the user task, irrespective of when SVE is in use or not.
@@ -597,7 +752,8 @@ void sve_alloc(struct task_struct *task)
  */
 void fpsimd_sync_to_sve(struct task_struct *task)
 {
-       if (!test_tsk_thread_flag(task, TIF_SVE))
+       if (!test_tsk_thread_flag(task, TIF_SVE) &&
+           !thread_sm_enabled(&task->thread))
                fpsimd_to_sve(task);
 }
 
@@ -611,7 +767,8 @@ void fpsimd_sync_to_sve(struct task_struct *task)
  */
 void sve_sync_to_fpsimd(struct task_struct *task)
 {
-       if (test_tsk_thread_flag(task, TIF_SVE))
+       if (test_tsk_thread_flag(task, TIF_SVE) ||
+           thread_sm_enabled(&task->thread))
                sve_to_fpsimd(task);
 }
 
@@ -636,7 +793,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
        if (!test_tsk_thread_flag(task, TIF_SVE))
                return;
 
-       vq = sve_vq_from_vl(task_get_sve_vl(task));
+       vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
 
        memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
        __fpsimd_to_sve(sst, fst, vq);
@@ -680,8 +837,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
        /*
         * To ensure the FPSIMD bits of the SVE vector registers are preserved,
         * write any live register state back to task_struct, and convert to a
-        * regular FPSIMD thread.  Since the vector length can only be changed
-        * with a syscall we can't be in streaming mode while reconfiguring.
+        * regular FPSIMD thread.
         */
        if (task == current) {
                get_cpu_fpsimd_context();
@@ -690,17 +846,26 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
        }
 
        fpsimd_flush_task_state(task);
-       if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+       if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
+           thread_sm_enabled(&task->thread))
                sve_to_fpsimd(task);
 
+       if (system_supports_sme() && type == ARM64_VEC_SME) {
+               task->thread.svcr &= ~(SVCR_SM_MASK |
+                                      SVCR_ZA_MASK);
+               clear_thread_flag(TIF_SME);
+       }
+
        if (task == current)
                put_cpu_fpsimd_context();
 
        /*
-        * Force reallocation of task SVE state to the correct size
-        * on next use:
+        * Force reallocation of task SVE and SME state to the correct
+        * size on next use:
         */
        sve_free(task);
+       if (system_supports_sme() && type == ARM64_VEC_SME)
+               sme_free(task);
 
        task_set_vl(task, type, vl);
 
@@ -761,6 +926,36 @@ int sve_get_current_vl(void)
        return vec_prctl_status(ARM64_VEC_SVE, 0);
 }
 
+#ifdef CONFIG_ARM64_SME
+/* PR_SME_SET_VL */
+int sme_set_current_vl(unsigned long arg)
+{
+       unsigned long vl, flags;
+       int ret;
+
+       vl = arg & PR_SME_VL_LEN_MASK;
+       flags = arg & ~vl;
+
+       if (!system_supports_sme() || is_compat_task())
+               return -EINVAL;
+
+       ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
+       if (ret)
+               return ret;
+
+       return vec_prctl_status(ARM64_VEC_SME, flags);
+}
+
+/* PR_SME_GET_VL */
+int sme_get_current_vl(void)
+{
+       if (!system_supports_sme() || is_compat_task())
+               return -EINVAL;
+
+       return vec_prctl_status(ARM64_VEC_SME, 0);
+}
+#endif /* CONFIG_ARM64_SME */
+
 static void vec_probe_vqs(struct vl_info *info,
                          DECLARE_BITMAP(map, SVE_VQ_MAX))
 {
@@ -770,7 +965,23 @@ static void vec_probe_vqs(struct vl_info *info,
 
        for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
                write_vl(info->type, vq - 1); /* self-syncing */
-               vl = sve_get_vl();
+
+               switch (info->type) {
+               case ARM64_VEC_SVE:
+                       vl = sve_get_vl();
+                       break;
+               case ARM64_VEC_SME:
+                       vl = sme_get_vl();
+                       break;
+               default:
+                       vl = 0;
+                       break;
+               }
+
+               /* Minimum VL identified? */
+               if (sve_vq_from_vl(vl) > vq)
+                       break;
+
                vq = sve_vq_from_vl(vl); /* skip intervening lengths */
                set_bit(__vq_to_bit(vq), map);
        }
@@ -856,21 +1067,25 @@ int vec_verify_vq_map(enum vec_type type)
 
 static void __init sve_efi_setup(void)
 {
-       struct vl_info *info = &vl_info[ARM64_VEC_SVE];
+       int max_vl = 0;
+       int i;
 
        if (!IS_ENABLED(CONFIG_EFI))
                return;
 
+       for (i = 0; i < ARRAY_SIZE(vl_info); i++)
+               max_vl = max(vl_info[i].max_vl, max_vl);
+
        /*
         * alloc_percpu() warns and prints a backtrace if this goes wrong.
         * This is evidence of a crippled system and we are returning void,
         * so no attempt is made to handle this situation here.
         */
-       if (!sve_vl_valid(info->max_vl))
+       if (!sve_vl_valid(max_vl))
                goto fail;
 
        efi_sve_state = __alloc_percpu(
-               SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES);
+               SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
        if (!efi_sve_state)
                goto fail;
 
@@ -989,10 +1204,172 @@ void __init sve_setup(void)
 void fpsimd_release_task(struct task_struct *dead_task)
 {
        __sve_free(dead_task);
+       sme_free(dead_task);
 }
 
 #endif /* CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_SME
+
+/*
+ * Ensure that task->thread.za_state is allocated and sufficiently large.
+ *
+ * This function should be used only in preparation for replacing
+ * task->thread.za_state with new data.  The memory is always zeroed
+ * here to prevent stale data from showing through: this is done in
+ * the interest of testability and predictability, the architecture
+ * guarantees that when ZA is enabled it will be zeroed.
+ */
+void sme_alloc(struct task_struct *task)
+{
+       if (task->thread.za_state) {
+               memset(task->thread.za_state, 0, za_state_size(task));
+               return;
+       }
+
+       /* This could potentially be up to 64K. */
+       task->thread.za_state =
+               kzalloc(za_state_size(task), GFP_KERNEL);
+}
+
+static void sme_free(struct task_struct *task)
+{
+       kfree(task->thread.za_state);
+       task->thread.za_state = NULL;
+}
+
+void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+       /* Set priority for all PEs to architecturally defined minimum */
+       write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
+                      SYS_SMPRI_EL1);
+
+       /* Allow SME in kernel */
+       write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
+       isb();
+
+       /* Allow EL0 to access TPIDR2 */
+       write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
+       isb();
+}
+
+/*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
+void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+{
+       /* Allow use of FA64 */
+       write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
+                      SYS_SMCR_EL1);
+}
+
+/*
+ * Read the pseudo-SMCR used by cpufeatures to identify the supported
+ * vector length.
+ *
+ * Use only if SME is present.
+ * This function clobbers the SME vector length.
+ */
+u64 read_smcr_features(void)
+{
+       u64 smcr;
+       unsigned int vq_max;
+
+       sme_kernel_enable(NULL);
+       sme_smstart_sm();
+
+       /*
+        * Set the maximum possible VL.
+        */
+       write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
+                      SYS_SMCR_EL1);
+
+       smcr = read_sysreg_s(SYS_SMCR_EL1);
+       smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
+       vq_max = sve_vq_from_vl(sve_get_vl());
+       smcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+       sme_smstop_sm();
+
+       return smcr;
+}
+
+void __init sme_setup(void)
+{
+       struct vl_info *info = &vl_info[ARM64_VEC_SME];
+       u64 smcr;
+       int min_bit;
+
+       if (!system_supports_sme())
+               return;
+
+       /*
+        * SME doesn't require any particular vector length be
+        * supported but it does require at least one.  We should have
+        * disabled the feature entirely while bringing up CPUs but
+        * let's double check here.
+        */
+       WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
+
+       min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
+       info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
+
+       smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
+       info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
+
+       /*
+        * Sanity-check that the max VL we determined through CPU features
+        * corresponds properly to sme_vq_map.  If not, do our best:
+        */
+       if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
+                                                                info->max_vl)))
+               info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
+                                                           info->max_vl);
+
+       WARN_ON(info->min_vl > info->max_vl);
+
+       /*
+        * For the default VL, pick the maximum supported value <= 32
+        * (256 bits) if there is one since this is guaranteed not to
+        * grow the signal frame when in streaming mode, otherwise the
+        * minimum available VL will be used.
+        */
+       set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
+
+       pr_info("SME: minimum available vector length %u bytes per vector\n",
+               info->min_vl);
+       pr_info("SME: maximum available vector length %u bytes per vector\n",
+               info->max_vl);
+       pr_info("SME: default vector length %u bytes per vector\n",
+               get_sme_default_vl());
+}
+
+#endif /* CONFIG_ARM64_SME */
+
+static void sve_init_regs(void)
+{
+       /*
+        * Convert the FPSIMD state to SVE, zeroing all the state that
+        * is not shared with FPSIMD. If (as is likely) the current
+        * state is live in the registers then do this there and
+        * update our metadata for the current task including
+        * disabling the trap, otherwise update our in-memory copy.
+        * We are guaranteed to not be in streaming mode, we can only
+        * take a SVE trap when not in streaming mode and we can't be
+        * in streaming mode when taking a SME trap.
+        */
+       if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+               unsigned long vq_minus_one =
+                       sve_vq_from_vl(task_get_sve_vl(current)) - 1;
+               sve_set_vq(vq_minus_one);
+               sve_flush_live(true, vq_minus_one);
+               fpsimd_bind_task_to_cpu();
+       } else {
+               fpsimd_to_sve(current);
+       }
+}
+
 /*
  * Trapped SVE access
  *
@@ -1004,7 +1381,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * would have disabled the SVE access trap for userspace during
  * ret_to_user, making an SVE access trap impossible in that case.
  */
-void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned long esr, struct pt_regs *regs)
 {
        /* Even if we chose not to use SVE, the hardware could still trap: */
        if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
@@ -1024,29 +1401,84 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
                WARN_ON(1); /* SVE access shouldn't have trapped */
 
        /*
-        * Convert the FPSIMD state to SVE, zeroing all the state that
-        * is not shared with FPSIMD. If (as is likely) the current
-        * state is live in the registers then do this there and
-        * update our metadata for the current task including
-        * disabling the trap, otherwise update our in-memory copy.
+        * Even if the task can have used streaming mode we can only
+        * generate SVE access traps in normal SVE mode and
+        * transitioning out of streaming mode may discard any
+        * streaming mode state.  Always clear the high bits to avoid
+        * any potential errors tracking what is properly initialised.
+        */
+       sve_init_regs();
+
+       put_cpu_fpsimd_context();
+}
+
+/*
+ * Trapped SME access
+ *
+ * Storage is allocated for the full SVE and SME state, the current
+ * FPSIMD register contents are migrated to SVE if SVE is not already
+ * active, and the access trap is disabled.
+ *
+ * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
+ * would have disabled the SME access trap for userspace during
+ * ret_to_user, making an SVE access trap impossible in that case.
+ */
+void do_sme_acc(unsigned long esr, struct pt_regs *regs)
+{
+       /* Even if we chose not to use SME, the hardware could still trap: */
+       if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
+               force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+               return;
+       }
+
+       /*
+        * If this not a trap due to SME being disabled then something
+        * is being used in the wrong mode, report as SIGILL.
         */
+       if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
+               force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+               return;
+       }
+
+       sve_alloc(current);
+       sme_alloc(current);
+       if (!current->thread.sve_state || !current->thread.za_state) {
+               force_sig(SIGKILL);
+               return;
+       }
+
+       get_cpu_fpsimd_context();
+
+       /* With TIF_SME userspace shouldn't generate any traps */
+       if (test_and_set_thread_flag(TIF_SME))
+               WARN_ON(1);
+
        if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
                unsigned long vq_minus_one =
-                       sve_vq_from_vl(task_get_sve_vl(current)) - 1;
-               sve_set_vq(vq_minus_one);
-               sve_flush_live(true, vq_minus_one);
+                       sve_vq_from_vl(task_get_sme_vl(current)) - 1;
+               sme_set_vq(vq_minus_one);
+
                fpsimd_bind_task_to_cpu();
-       } else {
-               fpsimd_to_sve(current);
        }
 
+       /*
+        * If SVE was not already active initialise the SVE registers,
+        * any non-shared state between the streaming and regular SVE
+        * registers is architecturally guaranteed to be zeroed when
+        * we enter streaming mode.  We do not need to initialize ZA
+        * since ZA must be disabled at this point and enabling ZA is
+        * architecturally defined to zero ZA.
+        */
+       if (system_supports_sve() && !test_thread_flag(TIF_SVE))
+               sve_init_regs();
+
        put_cpu_fpsimd_context();
 }
 
 /*
  * Trapped FP/ASIMD access.
  */
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
 {
        /* TODO: implement lazy context saving/restoring */
        WARN_ON(1);
@@ -1055,7 +1487,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 /*
  * Raise a SIGFPE for the current process.
  */
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
 {
        unsigned int si_code = FPE_FLTUNK;
 
@@ -1141,6 +1573,9 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
 
 void fpsimd_flush_thread(void)
 {
+       void *sve_state = NULL;
+       void *za_state = NULL;
+
        if (!system_supports_fpsimd())
                return;
 
@@ -1152,11 +1587,28 @@ void fpsimd_flush_thread(void)
 
        if (system_supports_sve()) {
                clear_thread_flag(TIF_SVE);
-               sve_free(current);
+
+               /* Defer kfree() while in atomic context */
+               sve_state = current->thread.sve_state;
+               current->thread.sve_state = NULL;
+
                fpsimd_flush_thread_vl(ARM64_VEC_SVE);
        }
 
+       if (system_supports_sme()) {
+               clear_thread_flag(TIF_SME);
+
+               /* Defer kfree() while in atomic context */
+               za_state = current->thread.za_state;
+               current->thread.za_state = NULL;
+
+               fpsimd_flush_thread_vl(ARM64_VEC_SME);
+               current->thread.svcr = 0;
+       }
+
        put_cpu_fpsimd_context();
+       kfree(sve_state);
+       kfree(za_state);
 }
 
 /*
@@ -1198,22 +1650,34 @@ static void fpsimd_bind_task_to_cpu(void)
        WARN_ON(!system_supports_fpsimd());
        last->st = &current->thread.uw.fpsimd_state;
        last->sve_state = current->thread.sve_state;
+       last->za_state = current->thread.za_state;
        last->sve_vl = task_get_sve_vl(current);
+       last->sme_vl = task_get_sme_vl(current);
+       last->svcr = &current->thread.svcr;
        current->thread.fpsimd_cpu = smp_processor_id();
 
+       /*
+        * Toggle SVE and SME trapping for userspace if needed, these
+        * are serialsied by ret_to_user().
+        */
+       if (system_supports_sme()) {
+               if (test_thread_flag(TIF_SME))
+                       sme_user_enable();
+               else
+                       sme_user_disable();
+       }
+
        if (system_supports_sve()) {
-               /* Toggle SVE trapping for userspace if needed */
                if (test_thread_flag(TIF_SVE))
                        sve_user_enable();
                else
                        sve_user_disable();
-
-               /* Serialised by exception return to user */
        }
 }
 
 void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
-                             unsigned int sve_vl)
+                             unsigned int sve_vl, void *za_state,
+                             unsigned int sme_vl, u64 *svcr)
 {
        struct fpsimd_last_state_struct *last =
                this_cpu_ptr(&fpsimd_last_state);
@@ -1222,8 +1686,11 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
        WARN_ON(!in_softirq() && !irqs_disabled());
 
        last->st = st;
+       last->svcr = svcr;
        last->sve_state = sve_state;
+       last->za_state = za_state;
        last->sve_vl = sve_vl;
+       last->sme_vl = sme_vl;
 }
 
 /*
@@ -1320,6 +1787,15 @@ static void fpsimd_flush_cpu_state(void)
 {
        WARN_ON(!system_supports_fpsimd());
        __this_cpu_write(fpsimd_last_state.st, NULL);
+
+       /*
+        * Leaving streaming mode enabled will cause issues for any kernel
+        * NEON and leaving streaming mode or ZA enabled may increase power
+        * consumption.
+        */
+       if (system_supports_sme())
+               sme_smstop();
+
        set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
 
@@ -1397,6 +1873,7 @@ EXPORT_SYMBOL(kernel_neon_end);
 static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
 static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
 static DEFINE_PER_CPU(bool, efi_sve_state_used);
+static DEFINE_PER_CPU(bool, efi_sm_state);
 
 /*
  * EFI runtime services support functions
@@ -1431,12 +1908,28 @@ void __efi_fpsimd_begin(void)
                 */
                if (system_supports_sve() && likely(efi_sve_state)) {
                        char *sve_state = this_cpu_ptr(efi_sve_state);
+                       bool ffr = true;
+                       u64 svcr;
 
                        __this_cpu_write(efi_sve_state_used, true);
 
+                       if (system_supports_sme()) {
+                               svcr = read_sysreg_s(SYS_SVCR);
+
+                               if (!system_supports_fa64())
+                                       ffr = svcr & SVCR_SM_MASK;
+
+                               __this_cpu_write(efi_sm_state, ffr);
+                       }
+
                        sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
                                       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
-                                      true);
+                                      ffr);
+
+                       if (system_supports_sme())
+                               sysreg_clear_set_s(SYS_SVCR,
+                                                  SVCR_SM_MASK, 0);
+
                } else {
                        fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
                }
@@ -1459,11 +1952,26 @@ void __efi_fpsimd_end(void)
                if (system_supports_sve() &&
                    likely(__this_cpu_read(efi_sve_state_used))) {
                        char const *sve_state = this_cpu_ptr(efi_sve_state);
+                       bool ffr = true;
+
+                       /*
+                        * Restore streaming mode; EFI calls are
+                        * normal function calls so should not return in
+                        * streaming mode.
+                        */
+                       if (system_supports_sme()) {
+                               if (__this_cpu_read(efi_sm_state)) {
+                                       sysreg_clear_set_s(SYS_SVCR,
+                                                          0,
+                                                          SVCR_SM_MASK);
+                                       if (!system_supports_fa64())
+                                               ffr = efi_sm_state;
+                               }
+                       }
 
-                       sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1);
                        sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
                                       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
-                                      true);
+                                      ffr);
 
                        __this_cpu_write(efi_sve_state_used, false);
                } else {
@@ -1538,6 +2046,13 @@ static int __init fpsimd_init(void)
        if (!cpu_have_named_feature(ASIMD))
                pr_notice("Advanced SIMD is not implemented\n");
 
-       return sve_sysctl_init();
+
+       if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
+               pr_notice("SME is implemented but not SVE\n");
+
+       sve_sysctl_init();
+       sme_sysctl_init();
+
+       return 0;
 }
 core_initcall(fpsimd_init);
index 4506c4a90ac10fbe7910da0d95e9bdaf50ac3945..f447c4a36f694ae72f24b379ceae4e71a426a53e 100644 (file)
@@ -268,6 +268,22 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+                      struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+       /*
+        * When DYNAMIC_FTRACE_WITH_REGS is selected, `fregs` can never be NULL
+        * and arch_ftrace_get_regs(fregs) will always give a non-NULL pt_regs
+        * in which we can safely modify the LR.
+        */
+       struct pt_regs *regs = arch_ftrace_get_regs(fregs);
+       unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
+
+       prepare_ftrace_return(ip, parent, frame_pointer(regs));
+}
+#else
 /*
  * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
  * depending on @enable.
@@ -297,5 +313,6 @@ int ftrace_disable_ftrace_graph_caller(void)
 {
        return ftrace_modify_graph_caller(false);
 }
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
index cd868084e724244d8964c95e6cbf7ba34a0b36f2..b29a311bb055216aa4ab111cf10c322e4042c923 100644 (file)
@@ -617,7 +617,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
 /*
  * Debug exception handlers.
  */
-static int breakpoint_handler(unsigned long unused, unsigned int esr,
+static int breakpoint_handler(unsigned long unused, unsigned long esr,
                              struct pt_regs *regs)
 {
        int i, step = 0, *kernel_step;
@@ -751,7 +751,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
        return step;
 }
 
-static int watchpoint_handler(unsigned long addr, unsigned int esr,
+static int watchpoint_handler(unsigned long addr, unsigned long esr,
                              struct pt_regs *regs)
 {
        int i, step = 0, *kernel_step, access, closest_match = 0;
index 2aede780fb80c50f4636e830c99b786d4a1e9970..cda9c1e9864f717595a60fe0c4380949c53ca61f 100644 (file)
@@ -232,14 +232,14 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
        return err;
 }
 
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
 {
        kgdb_handle_exception(1, SIGTRAP, 0, regs);
        return DBG_HOOK_HANDLED;
 }
 NOKPROBE_SYMBOL(kgdb_brk_fn)
 
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
 {
        compiled_break = 1;
        kgdb_handle_exception(1, SIGTRAP, 0, regs);
@@ -248,7 +248,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
 }
 NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
 
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
 {
        if (!kgdb_single_step)
                return DBG_HOOK_ERROR;
index e16b248699d5c3cbd8e0a665338d861dd83b97d1..19c2d487cb08feb66052642f7bfca776e97445d0 100644 (file)
@@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
 
        /* in reserved memory? */
        addr = __pfn_to_phys(pfn);
-       if ((addr < crashk_res.start) || (crashk_res.end < addr))
-               return false;
+       if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
+               if (!crashk_low_res.end)
+                       return false;
+
+               if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
+                       return false;
+       }
 
        if (!kexec_crash_image)
                return true;
index 59c648d518488869b6cc4c6b227933aa3419bedd..889951291cc0f9cea9e9fccb9bf0619ad040f9c8 100644 (file)
@@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
 
        /* Exclude crashkernel region */
        ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+       if (ret)
+               goto out;
+
+       if (crashk_low_res.end) {
+               ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+               if (ret)
+                       goto out;
+       }
 
-       if (!ret)
-               ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
+       ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
 
+out:
        kfree(cmem);
        return ret;
 }
index 78b3e0f8e997cab99b70bffe181949d8d29610dc..57b30bcf9f21de03bc58a6b6b133329eb8cd4c55 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/swapops.h>
 #include <linux/thread_info.h>
 #include <linux/types.h>
+#include <linux/uaccess.h>
 #include <linux/uio.h>
 
 #include <asm/barrier.h>
@@ -76,6 +77,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
                        mte_sync_page_tags(page, old_pte, check_swap,
                                           pte_is_tagged);
        }
+
+       /* ensure the tags are visible before the PTE is set */
+       smp_wmb();
 }
 
 int memcmp_pages(struct page *page1, struct page *page2)
@@ -106,7 +110,8 @@ int memcmp_pages(struct page *page1, struct page *page2)
 static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
 {
        /* Enable MTE Sync Mode for EL1. */
-       sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf);
+       sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
+                        SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf));
        isb();
 
        pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
@@ -122,12 +127,12 @@ void mte_enable_kernel_sync(void)
        WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
                        "MTE async mode enabled system wide!");
 
-       __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
+       __mte_enable_kernel("synchronous", SCTLR_EL1_TCF_SYNC);
 }
 
 void mte_enable_kernel_async(void)
 {
-       __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
+       __mte_enable_kernel("asynchronous", SCTLR_EL1_TCF_ASYNC);
 
        /*
         * MTE async mode is set system wide by the first PE that
@@ -144,7 +149,7 @@ void mte_enable_kernel_async(void)
 void mte_enable_kernel_asymm(void)
 {
        if (cpus_have_cap(ARM64_MTE_ASYMM)) {
-               __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM);
+               __mte_enable_kernel("asymmetric", SCTLR_EL1_TCF_ASYMM);
 
                /*
                 * MTE asymm mode behaves as async mode for store
@@ -216,11 +221,11 @@ static void mte_update_sctlr_user(struct task_struct *task)
         * default order.
         */
        if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
-               sctlr |= SCTLR_EL1_TCF0_ASYMM;
+               sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM);
        else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
-               sctlr |= SCTLR_EL1_TCF0_ASYNC;
+               sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
        else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
-               sctlr |= SCTLR_EL1_TCF0_SYNC;
+               sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
        task->thread.sctlr_user = sctlr;
 }
 
@@ -543,3 +548,32 @@ static int register_mte_tcf_preferred_sysctl(void)
        return 0;
 }
 subsys_initcall(register_mte_tcf_preferred_sysctl);
+
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+size_t mte_probe_user_range(const char __user *uaddr, size_t size)
+{
+       const char __user *end = uaddr + size;
+       int err = 0;
+       char val;
+
+       __raw_get_user(val, uaddr, err);
+       if (err)
+               return size;
+
+       uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
+       while (uaddr < end) {
+               /*
+                * A read is sufficient for mte, the caller should have probed
+                * for the pte write permission if required.
+                */
+               __raw_get_user(val, uaddr, err);
+               if (err)
+                       return end - uaddr;
+               uaddr += MTE_GRANULE_SIZE;
+       }
+       (void)val;
+
+       return 0;
+}
index 75fed4460407dee05914177a86c234423a6efe54..57c7c211f8c71d2447e3cde932b1a3a655b309b8 100644 (file)
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-       struct pvclock_vcpu_stolen_time *kaddr;
+       struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
 /* return stolen time in ns by asking the hypervisor */
 static u64 para_steal_clock(int cpu)
 {
+       struct pvclock_vcpu_stolen_time *kaddr = NULL;
        struct pv_time_stolen_time_region *reg;
+       u64 ret = 0;
 
        reg = per_cpu_ptr(&stolen_time_region, cpu);
 
@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu)
         * online notification callback runs. Until the callback
         * has run we just return zero.
         */
-       if (!reg->kaddr)
+       rcu_read_lock();
+       kaddr = rcu_dereference(reg->kaddr);
+       if (!kaddr) {
+               rcu_read_unlock();
                return 0;
+       }
 
-       return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+       ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+       rcu_read_unlock();
+       return ret;
 }
 
 static int stolen_time_cpu_down_prepare(unsigned int cpu)
 {
+       struct pvclock_vcpu_stolen_time *kaddr = NULL;
        struct pv_time_stolen_time_region *reg;
 
        reg = this_cpu_ptr(&stolen_time_region);
        if (!reg->kaddr)
                return 0;
 
-       memunmap(reg->kaddr);
-       memset(reg, 0, sizeof(*reg));
+       kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+       synchronize_rcu();
+       memunmap(kaddr);
 
        return 0;
 }
 
 static int stolen_time_cpu_online(unsigned int cpu)
 {
+       struct pvclock_vcpu_stolen_time *kaddr = NULL;
        struct pv_time_stolen_time_region *reg;
        struct arm_smccc_res res;
 
@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu)
        if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
                return -EINVAL;
 
-       reg->kaddr = memremap(res.a0,
+       kaddr = memremap(res.a0,
                              sizeof(struct pvclock_vcpu_stolen_time),
                              MEMREMAP_WB);
 
+       rcu_assign_pointer(reg->kaddr, kaddr);
+
        if (!reg->kaddr) {
                pr_warn("Failed to map stolen time data structure\n");
                return -ENOMEM;
        }
 
-       if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-           le32_to_cpu(reg->kaddr->attributes) != 0) {
+       if (le32_to_cpu(kaddr->revision) != 0 ||
+           le32_to_cpu(kaddr->attributes) != 0) {
                pr_warn_once("Unexpected revision or attributes in stolen time data\n");
                return -ENXIO;
        }
index d9dfa82c1f184d3f4c0c1c9d60e5ad7d3de31c20..d1d1823202453d2259536636099b75b7d0f09d99 100644 (file)
@@ -335,7 +335,7 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
 }
 
 static int __kprobes
-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
        unsigned long addr = instruction_pointer(regs);
@@ -359,7 +359,7 @@ static struct break_hook kprobes_break_ss_hook = {
 };
 
 static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
 {
        kprobe_handler(regs);
        return DBG_HOOK_HANDLED;
index 9be668f3f03417455ec179b3211cd0f3550ec980..d49aef2657cdf74311bbbb9f6e4415b92c114b3c 100644 (file)
@@ -166,7 +166,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
 }
 
 static int uprobe_breakpoint_handler(struct pt_regs *regs,
-               unsigned int esr)
+                                    unsigned long esr)
 {
        if (uprobe_pre_sstep_notifier(regs))
                return DBG_HOOK_HANDLED;
@@ -175,7 +175,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
 }
 
 static int uprobe_single_step_handler(struct pt_regs *regs,
-               unsigned int esr)
+                                     unsigned long esr)
 {
        struct uprobe_task *utask = current->utask;
 
index 7fa97df55e3ad3f24443cf6ec49318d1e64f8e53..9734c9fb1a32c0e12b3f8be22dff8e7885838deb 100644 (file)
@@ -250,6 +250,8 @@ void show_regs(struct pt_regs *regs)
 static void tls_thread_flush(void)
 {
        write_sysreg(0, tpidr_el0);
+       if (system_supports_tpidr2())
+               write_sysreg_s(0, SYS_TPIDR2_EL0);
 
        if (is_compat_task()) {
                current->thread.uw.tp_value = 0;
@@ -298,16 +300,42 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        /*
         * Detach src's sve_state (if any) from dst so that it does not
-        * get erroneously used or freed prematurely.  dst's sve_state
+        * get erroneously used or freed prematurely.  dst's copies
         * will be allocated on demand later on if dst uses SVE.
         * For consistency, also clear TIF_SVE here: this could be done
         * later in copy_process(), but to avoid tripping up future
-        * maintainers it is best not to leave TIF_SVE and sve_state in
+        * maintainers it is best not to leave TIF flags and buffers in
         * an inconsistent state, even temporarily.
         */
        dst->thread.sve_state = NULL;
        clear_tsk_thread_flag(dst, TIF_SVE);
 
+       /*
+        * In the unlikely event that we create a new thread with ZA
+        * enabled we should retain the ZA state so duplicate it here.
+        * This may be shortly freed if we exec() or if CLONE_SETTLS
+        * but it's simpler to do it here. To avoid confusing the rest
+        * of the code ensure that we have a sve_state allocated
+        * whenever za_state is allocated.
+        */
+       if (thread_za_enabled(&src->thread)) {
+               dst->thread.sve_state = kzalloc(sve_state_size(src),
+                                               GFP_KERNEL);
+               if (!dst->thread.sve_state)
+                       return -ENOMEM;
+               dst->thread.za_state = kmemdup(src->thread.za_state,
+                                              za_state_size(src),
+                                              GFP_KERNEL);
+               if (!dst->thread.za_state) {
+                       kfree(dst->thread.sve_state);
+                       dst->thread.sve_state = NULL;
+                       return -ENOMEM;
+               }
+       } else {
+               dst->thread.za_state = NULL;
+               clear_tsk_thread_flag(dst, TIF_SME);
+       }
+
        /* clear any pending asynchronous tag fault raised by the parent */
        clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
 
@@ -343,6 +371,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
                 * out-of-sync with the saved value.
                 */
                *task_user_tls(p) = read_sysreg(tpidr_el0);
+               if (system_supports_tpidr2())
+                       p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
 
                if (stack_start) {
                        if (is_compat_thread(task_thread_info(p)))
@@ -353,10 +383,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
                /*
                 * If a TLS pointer was passed to clone, use it for the new
-                * thread.
+                * thread.  We also reset TPIDR2 if it's in use.
                 */
-               if (clone_flags & CLONE_SETTLS)
+               if (clone_flags & CLONE_SETTLS) {
                        p->thread.uw.tp_value = tls;
+                       p->thread.tpidr2_el0 = 0;
+               }
        } else {
                /*
                 * A kthread has no context to ERET to, so ensure any buggy
@@ -387,6 +419,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 void tls_preserve_current_state(void)
 {
        *task_user_tls(current) = read_sysreg(tpidr_el0);
+       if (system_supports_tpidr2() && !is_compat_task())
+               current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
 }
 
 static void tls_thread_switch(struct task_struct *next)
@@ -399,6 +433,8 @@ static void tls_thread_switch(struct task_struct *next)
                write_sysreg(0, tpidrro_el0);
 
        write_sysreg(*task_user_tls(next), tpidr_el0);
+       if (system_supports_tpidr2())
+               write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0);
 }
 
 /*
index 230a47b9189e79ed0eeba8f92f14aa2d984c07e8..21da83187a602c499e4240d538d28da3bed5624d 100644 (file)
@@ -713,21 +713,51 @@ static int system_call_set(struct task_struct *target,
 #ifdef CONFIG_ARM64_SVE
 
 static void sve_init_header_from_task(struct user_sve_header *header,
-                                     struct task_struct *target)
+                                     struct task_struct *target,
+                                     enum vec_type type)
 {
        unsigned int vq;
+       bool active;
+       bool fpsimd_only;
+       enum vec_type task_type;
 
        memset(header, 0, sizeof(*header));
 
-       header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
-               SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
-       if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
-               header->flags |= SVE_PT_VL_INHERIT;
+       /* Check if the requested registers are active for the task */
+       if (thread_sm_enabled(&target->thread))
+               task_type = ARM64_VEC_SME;
+       else
+               task_type = ARM64_VEC_SVE;
+       active = (task_type == type);
+
+       switch (type) {
+       case ARM64_VEC_SVE:
+               if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+                       header->flags |= SVE_PT_VL_INHERIT;
+               fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
+               break;
+       case ARM64_VEC_SME:
+               if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+                       header->flags |= SVE_PT_VL_INHERIT;
+               fpsimd_only = false;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return;
+       }
 
-       header->vl = task_get_sve_vl(target);
+       if (active) {
+               if (fpsimd_only) {
+                       header->flags |= SVE_PT_REGS_FPSIMD;
+               } else {
+                       header->flags |= SVE_PT_REGS_SVE;
+               }
+       }
+
+       header->vl = task_get_vl(target, type);
        vq = sve_vq_from_vl(header->vl);
 
-       header->max_vl = sve_max_vl();
+       header->max_vl = vec_max_vl(type);
        header->size = SVE_PT_SIZE(vq, header->flags);
        header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
                                      SVE_PT_REGS_SVE);
@@ -738,19 +768,17 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header)
        return ALIGN(header->size, SVE_VQ_BYTES);
 }
 
-static int sve_get(struct task_struct *target,
-                  const struct user_regset *regset,
-                  struct membuf to)
+static int sve_get_common(struct task_struct *target,
+                         const struct user_regset *regset,
+                         struct membuf to,
+                         enum vec_type type)
 {
        struct user_sve_header header;
        unsigned int vq;
        unsigned long start, end;
 
-       if (!system_supports_sve())
-               return -EINVAL;
-
        /* Header */
-       sve_init_header_from_task(&header, target);
+       sve_init_header_from_task(&header, target, type);
        vq = sve_vq_from_vl(header.vl);
 
        membuf_write(&to, &header, sizeof(header));
@@ -758,49 +786,61 @@ static int sve_get(struct task_struct *target,
        if (target == current)
                fpsimd_preserve_current_state();
 
-       /* Registers: FPSIMD-only case */
-
        BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
-       if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+       BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+
+       switch ((header.flags & SVE_PT_REGS_MASK)) {
+       case SVE_PT_REGS_FPSIMD:
                return __fpr_get(target, regset, to);
 
-       /* Otherwise: full SVE case */
+       case SVE_PT_REGS_SVE:
+               start = SVE_PT_SVE_OFFSET;
+               end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+               membuf_write(&to, target->thread.sve_state, end - start);
 
-       BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
-       start = SVE_PT_SVE_OFFSET;
-       end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
-       membuf_write(&to, target->thread.sve_state, end - start);
+               start = end;
+               end = SVE_PT_SVE_FPSR_OFFSET(vq);
+               membuf_zero(&to, end - start);
 
-       start = end;
-       end = SVE_PT_SVE_FPSR_OFFSET(vq);
-       membuf_zero(&to, end - start);
+               /*
+                * Copy fpsr, and fpcr which must follow contiguously in
+                * struct fpsimd_state:
+                */
+               start = end;
+               end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+               membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr,
+                            end - start);
 
-       /*
-        * Copy fpsr, and fpcr which must follow contiguously in
-        * struct fpsimd_state:
-        */
-       start = end;
-       end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
-       membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start);
+               start = end;
+               end = sve_size_from_header(&header);
+               return membuf_zero(&to, end - start);
 
-       start = end;
-       end = sve_size_from_header(&header);
-       return membuf_zero(&to, end - start);
+       default:
+               return 0;
+       }
 }
 
-static int sve_set(struct task_struct *target,
+static int sve_get(struct task_struct *target,
                   const struct user_regset *regset,
-                  unsigned int pos, unsigned int count,
-                  const void *kbuf, const void __user *ubuf)
+                  struct membuf to)
+{
+       if (!system_supports_sve())
+               return -EINVAL;
+
+       return sve_get_common(target, regset, to, ARM64_VEC_SVE);
+}
+
+static int sve_set_common(struct task_struct *target,
+                         const struct user_regset *regset,
+                         unsigned int pos, unsigned int count,
+                         const void *kbuf, const void __user *ubuf,
+                         enum vec_type type)
 {
        int ret;
        struct user_sve_header header;
        unsigned int vq;
        unsigned long start, end;
 
-       if (!system_supports_sve())
-               return -EINVAL;
-
        /* Header */
        if (count < sizeof(header))
                return -EINVAL;
@@ -813,13 +853,37 @@ static int sve_set(struct task_struct *target,
         * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
         * vec_set_vector_length(), which will also validate them for us:
         */
-       ret = vec_set_vector_length(target, ARM64_VEC_SVE, header.vl,
+       ret = vec_set_vector_length(target, type, header.vl,
                ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
        if (ret)
                goto out;
 
        /* Actual VL set may be less than the user asked for: */
-       vq = sve_vq_from_vl(task_get_sve_vl(target));
+       vq = sve_vq_from_vl(task_get_vl(target, type));
+
+       /* Enter/exit streaming mode */
+       if (system_supports_sme()) {
+               u64 old_svcr = target->thread.svcr;
+
+               switch (type) {
+               case ARM64_VEC_SVE:
+                       target->thread.svcr &= ~SVCR_SM_MASK;
+                       break;
+               case ARM64_VEC_SME:
+                       target->thread.svcr |= SVCR_SM_MASK;
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       return -EINVAL;
+               }
+
+               /*
+                * If we switched then invalidate any existing SVE
+                * state and ensure there's storage.
+                */
+               if (target->thread.svcr != old_svcr)
+                       sve_alloc(target);
+       }
 
        /* Registers: FPSIMD-only case */
 
@@ -828,10 +892,15 @@ static int sve_set(struct task_struct *target,
                ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
                                SVE_PT_FPSIMD_OFFSET);
                clear_tsk_thread_flag(target, TIF_SVE);
+               if (type == ARM64_VEC_SME)
+                       fpsimd_force_sync_to_sve(target);
                goto out;
        }
 
-       /* Otherwise: full SVE case */
+       /*
+        * Otherwise: no registers or full SVE case.  For backwards
+        * compatibility reasons we treat empty flags as SVE registers.
+        */
 
        /*
         * If setting a different VL from the requested VL and there is
@@ -852,8 +921,9 @@ static int sve_set(struct task_struct *target,
 
        /*
         * Ensure target->thread.sve_state is up to date with target's
-        * FPSIMD regs, so that a short copyin leaves trailing registers
-        * unmodified.
+        * FPSIMD regs, so that a short copyin leaves trailing
+        * registers unmodified.  Always enable SVE even if going into
+        * streaming mode.
         */
        fpsimd_sync_to_sve(target);
        set_tsk_thread_flag(target, TIF_SVE);
@@ -889,8 +959,181 @@ out:
        return ret;
 }
 
+static int sve_set(struct task_struct *target,
+                  const struct user_regset *regset,
+                  unsigned int pos, unsigned int count,
+                  const void *kbuf, const void __user *ubuf)
+{
+       if (!system_supports_sve())
+               return -EINVAL;
+
+       return sve_set_common(target, regset, pos, count, kbuf, ubuf,
+                             ARM64_VEC_SVE);
+}
+
 #endif /* CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_SME
+
+static int ssve_get(struct task_struct *target,
+                  const struct user_regset *regset,
+                  struct membuf to)
+{
+       if (!system_supports_sme())
+               return -EINVAL;
+
+       return sve_get_common(target, regset, to, ARM64_VEC_SME);
+}
+
+static int ssve_set(struct task_struct *target,
+                   const struct user_regset *regset,
+                   unsigned int pos, unsigned int count,
+                   const void *kbuf, const void __user *ubuf)
+{
+       if (!system_supports_sme())
+               return -EINVAL;
+
+       return sve_set_common(target, regset, pos, count, kbuf, ubuf,
+                             ARM64_VEC_SME);
+}
+
+static int za_get(struct task_struct *target,
+                 const struct user_regset *regset,
+                 struct membuf to)
+{
+       struct user_za_header header;
+       unsigned int vq;
+       unsigned long start, end;
+
+       if (!system_supports_sme())
+               return -EINVAL;
+
+       /* Header */
+       memset(&header, 0, sizeof(header));
+
+       if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+               header.flags |= ZA_PT_VL_INHERIT;
+
+       header.vl = task_get_sme_vl(target);
+       vq = sve_vq_from_vl(header.vl);
+       header.max_vl = sme_max_vl();
+       header.max_size = ZA_PT_SIZE(vq);
+
+       /* If ZA is not active there is only the header */
+       if (thread_za_enabled(&target->thread))
+               header.size = ZA_PT_SIZE(vq);
+       else
+               header.size = ZA_PT_ZA_OFFSET;
+
+       membuf_write(&to, &header, sizeof(header));
+
+       BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
+       end = ZA_PT_ZA_OFFSET;
+
+       if (target == current)
+               fpsimd_preserve_current_state();
+
+       /* Any register data to include? */
+       if (thread_za_enabled(&target->thread)) {
+               start = end;
+               end = ZA_PT_SIZE(vq);
+               membuf_write(&to, target->thread.za_state, end - start);
+       }
+
+       /* Zero any trailing padding */
+       start = end;
+       end = ALIGN(header.size, SVE_VQ_BYTES);
+       return membuf_zero(&to, end - start);
+}
+
+static int za_set(struct task_struct *target,
+                 const struct user_regset *regset,
+                 unsigned int pos, unsigned int count,
+                 const void *kbuf, const void __user *ubuf)
+{
+       int ret;
+       struct user_za_header header;
+       unsigned int vq;
+       unsigned long start, end;
+
+       if (!system_supports_sme())
+               return -EINVAL;
+
+       /* Header */
+       if (count < sizeof(header))
+               return -EINVAL;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+                                0, sizeof(header));
+       if (ret)
+               goto out;
+
+       /*
+        * All current ZA_PT_* flags are consumed by
+        * vec_set_vector_length(), which will also validate them for
+        * us:
+        */
+       ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl,
+               ((unsigned long)header.flags) << 16);
+       if (ret)
+               goto out;
+
+       /* Actual VL set may be less than the user asked for: */
+       vq = sve_vq_from_vl(task_get_sme_vl(target));
+
+       /* Ensure there is some SVE storage for streaming mode */
+       if (!target->thread.sve_state) {
+               sve_alloc(target);
+               if (!target->thread.sve_state) {
+                       clear_thread_flag(TIF_SME);
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       /* Allocate/reinit ZA storage */
+       sme_alloc(target);
+       if (!target->thread.za_state) {
+               ret = -ENOMEM;
+               clear_tsk_thread_flag(target, TIF_SME);
+               goto out;
+       }
+
+       /* If there is no data then disable ZA */
+       if (!count) {
+               target->thread.svcr &= ~SVCR_ZA_MASK;
+               goto out;
+       }
+
+       /*
+        * If setting a different VL from the requested VL and there is
+        * register data, the data layout will be wrong: don't even
+        * try to set the registers in this case.
+        */
+       if (vq != sve_vq_from_vl(header.vl)) {
+               ret = -EIO;
+               goto out;
+       }
+
+       BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
+       start = ZA_PT_ZA_OFFSET;
+       end = ZA_PT_SIZE(vq);
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                target->thread.za_state,
+                                start, end);
+       if (ret)
+               goto out;
+
+       /* Mark ZA as active and let userspace use it */
+       set_tsk_thread_flag(target, TIF_SME);
+       target->thread.svcr |= SVCR_ZA_MASK;
+
+out:
+       fpsimd_flush_task_state(target);
+       return ret;
+}
+
+#endif /* CONFIG_ARM64_SME */
+
 #ifdef CONFIG_ARM64_PTR_AUTH
 static int pac_mask_get(struct task_struct *target,
                        const struct user_regset *regset,
@@ -1108,6 +1351,10 @@ enum aarch64_regset {
 #ifdef CONFIG_ARM64_SVE
        REGSET_SVE,
 #endif
+#ifdef CONFIG_ARM64_SVE
+       REGSET_SSVE,
+       REGSET_ZA,
+#endif
 #ifdef CONFIG_ARM64_PTR_AUTH
        REGSET_PAC_MASK,
        REGSET_PAC_ENABLED_KEYS,
@@ -1188,6 +1435,33 @@ static const struct user_regset aarch64_regsets[] = {
                .set = sve_set,
        },
 #endif
+#ifdef CONFIG_ARM64_SME
+       [REGSET_SSVE] = { /* Streaming mode SVE */
+               .core_note_type = NT_ARM_SSVE,
+               .n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE),
+                                 SVE_VQ_BYTES),
+               .size = SVE_VQ_BYTES,
+               .align = SVE_VQ_BYTES,
+               .regset_get = ssve_get,
+               .set = ssve_set,
+       },
+       [REGSET_ZA] = { /* SME ZA */
+               .core_note_type = NT_ARM_ZA,
+               /*
+                * ZA is a single register but it's variably sized and
+                * the ptrace core requires that the size of any data
+                * be an exact multiple of the configured register
+                * size so report as though we had SVE_VQ_BYTES
+                * registers. These values aren't exposed to
+                * userspace.
+                */
+               .n = DIV_ROUND_UP(ZA_PT_SIZE(SME_VQ_MAX), SVE_VQ_BYTES),
+               .size = SVE_VQ_BYTES,
+               .align = SVE_VQ_BYTES,
+               .regset_get = za_get,
+               .set = za_set,
+       },
+#endif
 #ifdef CONFIG_ARM64_PTR_AUTH
        [REGSET_PAC_MASK] = {
                .core_note_type = NT_ARM_PAC_MASK,
index f0a3df9e18a32158fed3f3a49ec7e67733cd7407..413f899e4ac639414dcfaf2e5f0a7152fc55b938 100644 (file)
  * safe memory that has been set up to be preserved during the copy operation.
  */
 SYM_CODE_START(arm64_relocate_new_kernel)
+       /*
+        * The kimage structure isn't allocated specially and may be clobbered
+        * during relocation. We must load any values we need from it prior to
+        * any relocation occurring.
+        */
+       ldr     x28, [x0, #KIMAGE_START]
+       ldr     x27, [x0, #KIMAGE_ARCH_EL2_VECTORS]
+       ldr     x26, [x0, #KIMAGE_ARCH_DTB_MEM]
+
        /* Setup the list loop variables. */
        ldr     x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
        ldr     x17, [x0, #KIMAGE_ARCH_TTBR1]   /* x17 = linear map copy */
@@ -72,21 +81,20 @@ SYM_CODE_START(arm64_relocate_new_kernel)
        ic      iallu
        dsb     nsh
        isb
-       ldr     x4, [x0, #KIMAGE_START]                 /* relocation start */
-       ldr     x1, [x0, #KIMAGE_ARCH_EL2_VECTORS]      /* relocation start */
-       ldr     x0, [x0, #KIMAGE_ARCH_DTB_MEM]          /* dtb address */
        turn_off_mmu x12, x13
 
        /* Start new image. */
-       cbz     x1, .Lel1
-       mov     x1, x4                          /* relocation start */
-       mov     x2, x                         /* dtb address */
+       cbz     x27, .Lel1
+       mov     x1, x28                         /* kernel entry point */
+       mov     x2, x26                         /* dtb address */
        mov     x3, xzr
        mov     x4, xzr
        mov     x0, #HVC_SOFT_RESTART
        hvc     #0                              /* Jumps from el2 */
 .Lel1:
+       mov     x0, x26                         /* dtb address */
+       mov     x1, xzr
        mov     x2, xzr
        mov     x3, xzr
-       br      x                             /* Jumps from el1 */
+       br      x28                             /* Jumps from el1 */
 SYM_CODE_END(arm64_relocate_new_kernel)
index 3505789cf4bd92aaa4ac011cedc202cb866bbf27..fea3223704b6339a0381ddee334168ec5d83201b 100644 (file)
@@ -225,6 +225,8 @@ static void __init request_standard_resources(void)
        kernel_code.end     = __pa_symbol(__init_begin - 1);
        kernel_data.start   = __pa_symbol(_sdata);
        kernel_data.end     = __pa_symbol(_end - 1);
+       insert_resource(&iomem_resource, &kernel_code);
+       insert_resource(&iomem_resource, &kernel_data);
 
        num_standard_resources = memblock.memory.cnt;
        res_size = num_standard_resources * sizeof(*standard_resources);
@@ -246,20 +248,7 @@ static void __init request_standard_resources(void)
                        res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
                }
 
-               request_resource(&iomem_resource, res);
-
-               if (kernel_code.start >= res->start &&
-                   kernel_code.end <= res->end)
-                       request_resource(res, &kernel_code);
-               if (kernel_data.start >= res->start &&
-                   kernel_data.end <= res->end)
-                       request_resource(res, &kernel_data);
-#ifdef CONFIG_KEXEC_CORE
-               /* Userspace will find "Crash kernel" region in /proc/iomem. */
-               if (crashk_res.end && crashk_res.start >= res->start &&
-                   crashk_res.end <= res->end)
-                       request_resource(res, &crashk_res);
-#endif
+               insert_resource(&iomem_resource, res);
        }
 }
 
index 4a4122ef6f39b43abfaec5de4ea1514f62563768..edb2d9206a78b4550a42c59dca77fe0d69f5cc5f 100644 (file)
@@ -56,6 +56,7 @@ struct rt_sigframe_user_layout {
        unsigned long fpsimd_offset;
        unsigned long esr_offset;
        unsigned long sve_offset;
+       unsigned long za_offset;
        unsigned long extra_offset;
        unsigned long end_offset;
 };
@@ -218,6 +219,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
 struct user_ctxs {
        struct fpsimd_context __user *fpsimd;
        struct sve_context __user *sve;
+       struct za_context __user *za;
 };
 
 #ifdef CONFIG_ARM64_SVE
@@ -226,11 +228,17 @@ static int preserve_sve_context(struct sve_context __user *ctx)
 {
        int err = 0;
        u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+       u16 flags = 0;
        unsigned int vl = task_get_sve_vl(current);
        unsigned int vq = 0;
 
-       if (test_thread_flag(TIF_SVE))
+       if (thread_sm_enabled(&current->thread)) {
+               vl = task_get_sme_vl(current);
                vq = sve_vq_from_vl(vl);
+               flags |= SVE_SIG_FLAG_SM;
+       } else if (test_thread_flag(TIF_SVE)) {
+               vq = sve_vq_from_vl(vl);
+       }
 
        memset(reserved, 0, sizeof(reserved));
 
@@ -238,6 +246,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
        __put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16),
                         &ctx->head.size, err);
        __put_user_error(vl, &ctx->vl, err);
+       __put_user_error(flags, &ctx->flags, err);
        BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
        err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
 
@@ -258,18 +267,28 @@ static int preserve_sve_context(struct sve_context __user *ctx)
 static int restore_sve_fpsimd_context(struct user_ctxs *user)
 {
        int err;
-       unsigned int vq;
+       unsigned int vl, vq;
        struct user_fpsimd_state fpsimd;
        struct sve_context sve;
 
        if (__copy_from_user(&sve, user->sve, sizeof(sve)))
                return -EFAULT;
 
-       if (sve.vl != task_get_sve_vl(current))
+       if (sve.flags & SVE_SIG_FLAG_SM) {
+               if (!system_supports_sme())
+                       return -EINVAL;
+
+               vl = task_get_sme_vl(current);
+       } else {
+               vl = task_get_sve_vl(current);
+       }
+
+       if (sve.vl != vl)
                return -EINVAL;
 
        if (sve.head.size <= sizeof(*user->sve)) {
                clear_thread_flag(TIF_SVE);
+               current->thread.svcr &= ~SVCR_SM_MASK;
                goto fpsimd_only;
        }
 
@@ -301,7 +320,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
        if (err)
                return -EFAULT;
 
-       set_thread_flag(TIF_SVE);
+       if (sve.flags & SVE_SIG_FLAG_SM)
+               current->thread.svcr |= SVCR_SM_MASK;
+       else
+               set_thread_flag(TIF_SVE);
 
 fpsimd_only:
        /* copy the FP and status/control registers */
@@ -326,6 +348,101 @@ extern int restore_sve_fpsimd_context(struct user_ctxs *user);
 
 #endif /* ! CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_SME
+
+static int preserve_za_context(struct za_context __user *ctx)
+{
+       int err = 0;
+       u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+       unsigned int vl = task_get_sme_vl(current);
+       unsigned int vq;
+
+       if (thread_za_enabled(&current->thread))
+               vq = sve_vq_from_vl(vl);
+       else
+               vq = 0;
+
+       memset(reserved, 0, sizeof(reserved));
+
+       __put_user_error(ZA_MAGIC, &ctx->head.magic, err);
+       __put_user_error(round_up(ZA_SIG_CONTEXT_SIZE(vq), 16),
+                        &ctx->head.size, err);
+       __put_user_error(vl, &ctx->vl, err);
+       BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+       err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+       if (vq) {
+               /*
+                * This assumes that the ZA state has already been saved to
+                * the task struct by calling the function
+                * fpsimd_signal_preserve_current_state().
+                */
+               err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
+                                     current->thread.za_state,
+                                     ZA_SIG_REGS_SIZE(vq));
+       }
+
+       return err ? -EFAULT : 0;
+}
+
+static int restore_za_context(struct user_ctxs __user *user)
+{
+       int err;
+       unsigned int vq;
+       struct za_context za;
+
+       if (__copy_from_user(&za, user->za, sizeof(za)))
+               return -EFAULT;
+
+       if (za.vl != task_get_sme_vl(current))
+               return -EINVAL;
+
+       if (za.head.size <= sizeof(*user->za)) {
+               current->thread.svcr &= ~SVCR_ZA_MASK;
+               return 0;
+       }
+
+       vq = sve_vq_from_vl(za.vl);
+
+       if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq))
+               return -EINVAL;
+
+       /*
+        * Careful: we are about __copy_from_user() directly into
+        * thread.za_state with preemption enabled, so protection is
+        * needed to prevent a racing context switch from writing stale
+        * registers back over the new data.
+        */
+
+       fpsimd_flush_task_state(current);
+       /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+
+       sme_alloc(current);
+       if (!current->thread.za_state) {
+               current->thread.svcr &= ~SVCR_ZA_MASK;
+               clear_thread_flag(TIF_SME);
+               return -ENOMEM;
+       }
+
+       err = __copy_from_user(current->thread.za_state,
+                              (char __user const *)user->za +
+                                       ZA_SIG_REGS_OFFSET,
+                              ZA_SIG_REGS_SIZE(vq));
+       if (err)
+               return -EFAULT;
+
+       set_thread_flag(TIF_SME);
+       current->thread.svcr |= SVCR_ZA_MASK;
+
+       return 0;
+}
+#else /* ! CONFIG_ARM64_SME */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_za_context(void __user *ctx);
+extern int restore_za_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_SME */
 
 static int parse_user_sigframe(struct user_ctxs *user,
                               struct rt_sigframe __user *sf)
@@ -340,6 +457,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
 
        user->fpsimd = NULL;
        user->sve = NULL;
+       user->za = NULL;
 
        if (!IS_ALIGNED((unsigned long)base, 16))
                goto invalid;
@@ -393,7 +511,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
                        break;
 
                case SVE_MAGIC:
-                       if (!system_supports_sve())
+                       if (!system_supports_sve() && !system_supports_sme())
                                goto invalid;
 
                        if (user->sve)
@@ -405,6 +523,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
                        user->sve = (struct sve_context __user *)head;
                        break;
 
+               case ZA_MAGIC:
+                       if (!system_supports_sme())
+                               goto invalid;
+
+                       if (user->za)
+                               goto invalid;
+
+                       if (size < sizeof(*user->za))
+                               goto invalid;
+
+                       user->za = (struct za_context __user *)head;
+                       break;
+
                case EXTRA_MAGIC:
                        if (have_extra_context)
                                goto invalid;
@@ -528,6 +659,9 @@ static int restore_sigframe(struct pt_regs *regs,
                }
        }
 
+       if (err == 0 && system_supports_sme() && user.za)
+               err = restore_za_context(&user);
+
        return err;
 }
 
@@ -594,11 +728,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
        if (system_supports_sve()) {
                unsigned int vq = 0;
 
-               if (add_all || test_thread_flag(TIF_SVE)) {
-                       int vl = sve_max_vl();
+               if (add_all || test_thread_flag(TIF_SVE) ||
+                   thread_sm_enabled(&current->thread)) {
+                       int vl = max(sve_max_vl(), sme_max_vl());
 
                        if (!add_all)
-                               vl = task_get_sve_vl(current);
+                               vl = thread_get_cur_vl(&current->thread);
 
                        vq = sve_vq_from_vl(vl);
                }
@@ -609,6 +744,24 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
                        return err;
        }
 
+       if (system_supports_sme()) {
+               unsigned int vl;
+               unsigned int vq = 0;
+
+               if (add_all)
+                       vl = sme_max_vl();
+               else
+                       vl = task_get_sme_vl(current);
+
+               if (thread_za_enabled(&current->thread))
+                       vq = sve_vq_from_vl(vl);
+
+               err = sigframe_alloc(user, &user->za_offset,
+                                    ZA_SIG_CONTEXT_SIZE(vq));
+               if (err)
+                       return err;
+       }
+
        return sigframe_alloc_end(user);
 }
 
@@ -649,13 +802,21 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
                __put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
        }
 
-       /* Scalable Vector Extension state, if present */
-       if (system_supports_sve() && err == 0 && user->sve_offset) {
+       /* Scalable Vector Extension state (including streaming), if present */
+       if ((system_supports_sve() || system_supports_sme()) &&
+           err == 0 && user->sve_offset) {
                struct sve_context __user *sve_ctx =
                        apply_user_offset(user, user->sve_offset);
                err |= preserve_sve_context(sve_ctx);
        }
 
+       /* ZA state if present */
+       if (system_supports_sme() && err == 0 && user->za_offset) {
+               struct za_context __user *za_ctx =
+                       apply_user_offset(user, user->za_offset);
+               err |= preserve_za_context(za_ctx);
+       }
+
        if (err == 0 && user->extra_offset) {
                char __user *sfp = (char __user *)user->sigframe;
                char __user *userp =
@@ -759,6 +920,13 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
        /* TCO (Tag Check Override) always cleared for signal handlers */
        regs->pstate &= ~PSR_TCO_BIT;
 
+       /* Signal handlers are invoked with ZA and streaming mode disabled */
+       if (system_supports_sme()) {
+               current->thread.svcr &= ~(SVCR_ZA_MASK |
+                                         SVCR_SM_MASK);
+               sme_smstop();
+       }
+
        if (ka->sa.sa_flags & SA_RESTORER)
                sigtramp = ka->sa.sa_restorer;
        else
@@ -1011,6 +1179,7 @@ static_assert(offsetof(siginfo_t, si_upper)       == 0x28);
 static_assert(offsetof(siginfo_t, si_pkey)     == 0x20);
 static_assert(offsetof(siginfo_t, si_perf_data)        == 0x18);
 static_assert(offsetof(siginfo_t, si_perf_type)        == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
 static_assert(offsetof(siginfo_t, si_band)     == 0x10);
 static_assert(offsetof(siginfo_t, si_fd)       == 0x18);
 static_assert(offsetof(siginfo_t, si_call_addr)        == 0x10);
index d984282b979f86e6debaf29aa5a52a480de253a6..4700f8522d27b191858bff1924587793151a6acd 100644 (file)
@@ -487,6 +487,7 @@ static_assert(offsetof(compat_siginfo_t, si_upper)  == 0x18);
 static_assert(offsetof(compat_siginfo_t, si_pkey)      == 0x14);
 static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
 static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_flags)        == 0x18);
 static_assert(offsetof(compat_siginfo_t, si_band)      == 0x0c);
 static_assert(offsetof(compat_siginfo_t, si_fd)                == 0x10);
 static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c);
index 3b46041f2b978893eae596fdf84bb6aec6074080..62ed361a4376ba6efd35ad9cc158ae6b799a456a 100644 (file)
@@ -512,6 +512,7 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
 {
        return &cpu_madt_gicc[cpu];
 }
+EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
 
 /*
  * acpi_map_gic_cpu_interface - parse processor MADT entry
index e4103e085681143e5f285c91d428f152d7013a6d..0467cb79f080a9a727b452177be01fe6501dd57e 100644 (file)
 #include <asm/stacktrace.h>
 
 /*
- * AArch64 PCS assigns the frame pointer to x29.
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
  *
- * A simple function prologue looks like this:
- *     sub     sp, sp, #0x10
- *     stp     x29, x30, [sp]
- *     mov     x29, sp
+ * @fp:          The fp value in the frame record (or the real fp)
+ * @pc:          The lr value in the frame record (or the real lr)
  *
- * A simple function epilogue looks like this:
- *     mov     sp, x29
- *     ldp     x29, x30, [sp]
- *     add     sp, sp, #0x10
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ *               longer valid to unwind to.
+ *
+ * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
+ *               of 0. This is used to ensure that within a stack, each
+ *               subsequent frame record is at an increasing address.
+ * @prev_type:   The type of stack this frame record was on, or a synthetic
+ *               value of STACK_TYPE_UNKNOWN. This is used to detect a
+ *               transition from one stack to another.
+ *
+ * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
+ *               associated with the most recently encountered replacement lr
+ *               value.
  */
+struct unwind_state {
+       unsigned long fp;
+       unsigned long pc;
+       DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+       unsigned long prev_fp;
+       enum stack_type prev_type;
+#ifdef CONFIG_KRETPROBES
+       struct llist_node *kr_cur;
+#endif
+};
 
-
-static notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
-                                   unsigned long pc)
+static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
+                               unsigned long pc)
 {
-       frame->fp = fp;
-       frame->pc = pc;
+       state->fp = fp;
+       state->pc = pc;
 #ifdef CONFIG_KRETPROBES
-       frame->kr_cur = NULL;
+       state->kr_cur = NULL;
 #endif
 
        /*
         * Prime the first unwind.
         *
-        * In unwind_frame() we'll check that the FP points to a valid stack,
+        * In unwind_next() we'll check that the FP points to a valid stack,
         * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
         * treated as a transition to whichever stack that happens to be. The
         * prev_fp value won't be used, but we set it to 0 such that it is
         * definitely not an accessible stack address.
         */
-       bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
-       frame->prev_fp = 0;
-       frame->prev_type = STACK_TYPE_UNKNOWN;
+       bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
+       state->prev_fp = 0;
+       state->prev_type = STACK_TYPE_UNKNOWN;
 }
-NOKPROBE_SYMBOL(start_backtrace);
+NOKPROBE_SYMBOL(unwind_init);
 
 /*
  * Unwind from one frame record (A) to the next frame record (B).
@@ -64,15 +81,12 @@ NOKPROBE_SYMBOL(start_backtrace);
  * records (e.g. a cycle), determined based on the location and fp value of A
  * and the location (but not the fp value) of B.
  */
-static int notrace unwind_frame(struct task_struct *tsk,
-                               struct stackframe *frame)
+static int notrace unwind_next(struct task_struct *tsk,
+                              struct unwind_state *state)
 {
-       unsigned long fp = frame->fp;
+       unsigned long fp = state->fp;
        struct stack_info info;
 
-       if (!tsk)
-               tsk = current;
-
        /* Final frame; nothing to unwind */
        if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
                return -ENOENT;
@@ -83,7 +97,7 @@ static int notrace unwind_frame(struct task_struct *tsk,
        if (!on_accessible_stack(tsk, fp, 16, &info))
                return -EINVAL;
 
-       if (test_bit(info.type, frame->stacks_done))
+       if (test_bit(info.type, state->stacks_done))
                return -EINVAL;
 
        /*
@@ -99,27 +113,27 @@ static int notrace unwind_frame(struct task_struct *tsk,
         * stack to another, it's never valid to unwind back to that first
         * stack.
         */
-       if (info.type == frame->prev_type) {
-               if (fp <= frame->prev_fp)
+       if (info.type == state->prev_type) {
+               if (fp <= state->prev_fp)
                        return -EINVAL;
        } else {
-               set_bit(frame->prev_type, frame->stacks_done);
+               set_bit(state->prev_type, state->stacks_done);
        }
 
        /*
         * Record this frame record's values and location. The prev_fp and
-        * prev_type are only meaningful to the next unwind_frame() invocation.
+        * prev_type are only meaningful to the next unwind_next() invocation.
         */
-       frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
-       frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
-       frame->prev_fp = fp;
-       frame->prev_type = info.type;
+       state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+       state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+       state->prev_fp = fp;
+       state->prev_type = info.type;
 
-       frame->pc = ptrauth_strip_insn_pac(frame->pc);
+       state->pc = ptrauth_strip_insn_pac(state->pc);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (tsk->ret_stack &&
-               (frame->pc == (unsigned long)return_to_handler)) {
+               (state->pc == (unsigned long)return_to_handler)) {
                unsigned long orig_pc;
                /*
                 * This is a case where function graph tracer has
@@ -127,37 +141,37 @@ static int notrace unwind_frame(struct task_struct *tsk,
                 * to hook a function return.
                 * So replace it to an original value.
                 */
-               orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc,
-                                               (void *)frame->fp);
-               if (WARN_ON_ONCE(frame->pc == orig_pc))
+               orig_pc = ftrace_graph_ret_addr(tsk, NULL, state->pc,
+                                               (void *)state->fp);
+               if (WARN_ON_ONCE(state->pc == orig_pc))
                        return -EINVAL;
-               frame->pc = orig_pc;
+               state->pc = orig_pc;
        }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 #ifdef CONFIG_KRETPROBES
-       if (is_kretprobe_trampoline(frame->pc))
-               frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
+       if (is_kretprobe_trampoline(state->pc))
+               state->pc = kretprobe_find_ret_addr(tsk, (void *)state->fp, &state->kr_cur);
 #endif
 
        return 0;
 }
-NOKPROBE_SYMBOL(unwind_frame);
+NOKPROBE_SYMBOL(unwind_next);
 
-static void notrace walk_stackframe(struct task_struct *tsk,
-                                   struct stackframe *frame,
-                                   bool (*fn)(void *, unsigned long), void *data)
+static void notrace unwind(struct task_struct *tsk,
+                          struct unwind_state *state,
+                          stack_trace_consume_fn consume_entry, void *cookie)
 {
        while (1) {
                int ret;
 
-               if (!fn(data, frame->pc))
+               if (!consume_entry(cookie, state->pc))
                        break;
-               ret = unwind_frame(tsk, frame);
+               ret = unwind_next(tsk, state);
                if (ret < 0)
                        break;
        }
 }
-NOKPROBE_SYMBOL(walk_stackframe);
+NOKPROBE_SYMBOL(unwind);
 
 static bool dump_backtrace_entry(void *arg, unsigned long where)
 {
@@ -196,17 +210,17 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
                              void *cookie, struct task_struct *task,
                              struct pt_regs *regs)
 {
-       struct stackframe frame;
+       struct unwind_state state;
 
        if (regs)
-               start_backtrace(&frame, regs->regs[29], regs->pc);
+               unwind_init(&state, regs->regs[29], regs->pc);
        else if (task == current)
-               start_backtrace(&frame,
+               unwind_init(&state,
                                (unsigned long)__builtin_frame_address(1),
                                (unsigned long)__builtin_return_address(0));
        else
-               start_backtrace(&frame, thread_saved_fp(task),
+               unwind_init(&state, thread_saved_fp(task),
                                thread_saved_pc(task));
 
-       walk_stackframe(task, &frame, consume_entry, cookie);
+       unwind(task, &state, consume_entry, cookie);
 }
index 12c6864e51e13b1da5930d2482f51b78075cd605..df14336c3a29cf0a66d51de9add29581ac3b5b74 100644 (file)
@@ -113,6 +113,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno)
        addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4);
 
        arm64_notify_die("Oops - bad compat syscall(2)", regs,
-                        SIGILL, ILL_ILLTRP, addr, scno);
+                        SIGILL, ILL_ILLTRP, addr, 0);
        return 0;
 }
index c938603b3ba05297058cfee25fc7bf5224271342..733451fe7e41f031e2b5cf34396521d8ec0a0639 100644 (file)
@@ -158,11 +158,36 @@ trace_exit:
        syscall_trace_exit(regs);
 }
 
-static inline void sve_user_discard(void)
+/*
+ * As per the ABI exit SME streaming mode and clear the SVE state not
+ * shared with FPSIMD on syscall entry.
+ */
+static inline void fp_user_discard(void)
 {
+       /*
+        * If SME is active then exit streaming mode.  If ZA is active
+        * then flush the SVE registers but leave userspace access to
+        * both SVE and SME enabled, otherwise disable SME for the
+        * task and fall through to disabling SVE too.  This means
+        * that after a syscall we never have any streaming mode
+        * register state to track, if this changes the KVM code will
+        * need updating.
+        */
+       if (system_supports_sme() && test_thread_flag(TIF_SME)) {
+               u64 svcr = read_sysreg_s(SYS_SVCR);
+
+               if (svcr & SVCR_SM_MASK)
+                       sme_smstop_sm();
+       }
+
        if (!system_supports_sve())
                return;
 
+       /*
+        * If SME is not active then disable SVE, the registers will
+        * be cleared when userspace next attempts to access them and
+        * we do not need to track the SVE register state until then.
+        */
        clear_thread_flag(TIF_SVE);
 
        /*
@@ -177,7 +202,7 @@ static inline void sve_user_discard(void)
 
 void do_el0_svc(struct pt_regs *regs)
 {
-       sve_user_discard();
+       fp_user_discard();
        el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
 }
 
index 0529fd57567ee9098ed15424bf04520e878e7a31..9ac7a81b79be853e6344f8a565bd3be9bcc10c2c 100644 (file)
@@ -242,7 +242,7 @@ static void arm64_show_signal(int signo, const char *str)
        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
        struct task_struct *tsk = current;
-       unsigned int esr = tsk->thread.fault_code;
+       unsigned long esr = tsk->thread.fault_code;
        struct pt_regs *regs = task_pt_regs(tsk);
 
        /* Leave if the signal won't be shown */
@@ -253,7 +253,7 @@ static void arm64_show_signal(int signo, const char *str)
 
        pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk));
        if (esr)
-               pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr);
+               pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr);
 
        pr_cont("%s", str);
        print_vma_addr(KERN_CONT " in ", regs->pc);
@@ -287,7 +287,7 @@ void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
 
 void arm64_notify_die(const char *str, struct pt_regs *regs,
                      int signo, int sicode, unsigned long far,
-                     int err)
+                     unsigned long err)
 {
        if (user_mode(regs)) {
                WARN_ON(regs != current_pt_regs());
@@ -439,7 +439,7 @@ exit:
        return fn ? fn(regs, instr) : 1;
 }
 
-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err)
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err)
 {
        const char *desc;
        struct pt_regs *regs = current_pt_regs();
@@ -506,7 +506,7 @@ void do_bti(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_bti);
 
-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr)
+void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr)
 {
        /*
         * Unexpected FPAC exception or pointer authentication failure in
@@ -532,7 +532,7 @@ NOKPROBE_SYMBOL(do_ptrauth_fault);
                uaccess_ttbr0_disable();                        \
        }
 
-static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
+static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs)
 {
        unsigned long tagged_address, address;
        int rt = ESR_ELx_SYS64_ISS_RT(esr);
@@ -572,7 +572,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
                arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
-static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
+static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
 {
        int rt = ESR_ELx_SYS64_ISS_RT(esr);
        unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
@@ -591,7 +591,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
        arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
-static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
 {
        int rt = ESR_ELx_SYS64_ISS_RT(esr);
 
@@ -599,7 +599,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
        arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
-static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
 {
        int rt = ESR_ELx_SYS64_ISS_RT(esr);
 
@@ -607,7 +607,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
        arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
-static void mrs_handler(unsigned int esr, struct pt_regs *regs)
+static void mrs_handler(unsigned long esr, struct pt_regs *regs)
 {
        u32 sysreg, rt;
 
@@ -618,15 +618,15 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs)
                force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 }
 
-static void wfi_handler(unsigned int esr, struct pt_regs *regs)
+static void wfi_handler(unsigned long esr, struct pt_regs *regs)
 {
        arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
 struct sys64_hook {
-       unsigned int esr_mask;
-       unsigned int esr_val;
-       void (*handler)(unsigned int esr, struct pt_regs *regs);
+       unsigned long esr_mask;
+       unsigned long esr_val;
+       void (*handler)(unsigned long esr, struct pt_regs *regs);
 };
 
 static const struct sys64_hook sys64_hooks[] = {
@@ -675,7 +675,7 @@ static const struct sys64_hook sys64_hooks[] = {
 };
 
 #ifdef CONFIG_COMPAT
-static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
+static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs)
 {
        int cond;
 
@@ -695,7 +695,7 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
        return aarch32_opcode_cond_checks[cond](regs->pstate);
 }
 
-static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
 {
        int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
 
@@ -712,7 +712,7 @@ static const struct sys64_hook cp15_32_hooks[] = {
        {},
 };
 
-static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
 {
        int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
        int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
@@ -737,7 +737,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
        {},
 };
 
-void do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned long esr, struct pt_regs *regs)
 {
        const struct sys64_hook *hook, *hook_base;
 
@@ -778,7 +778,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
 NOKPROBE_SYMBOL(do_cp15instr);
 #endif
 
-void do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned long esr, struct pt_regs *regs)
 {
        const struct sys64_hook *hook;
 
@@ -821,6 +821,7 @@ static const char *esr_class_str[] = {
        [ESR_ELx_EC_SVE]                = "SVE",
        [ESR_ELx_EC_ERET]               = "ERET/ERETAA/ERETAB",
        [ESR_ELx_EC_FPAC]               = "FPAC",
+       [ESR_ELx_EC_SME]                = "SME",
        [ESR_ELx_EC_IMP_DEF]            = "EL3 IMP DEF",
        [ESR_ELx_EC_IABT_LOW]           = "IABT (lower EL)",
        [ESR_ELx_EC_IABT_CUR]           = "IABT (current EL)",
@@ -842,7 +843,7 @@ static const char *esr_class_str[] = {
        [ESR_ELx_EC_BRK64]              = "BRK (AArch64)",
 };
 
-const char *esr_get_class_string(u32 esr)
+const char *esr_get_class_string(unsigned long esr)
 {
        return esr_class_str[ESR_ELx_EC(esr)];
 }
@@ -851,7 +852,7 @@ const char *esr_get_class_string(u32 esr)
  * bad_el0_sync handles unexpected, but potentially recoverable synchronous
  * exceptions taken from EL0.
  */
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
 {
        unsigned long pc = instruction_pointer(regs);
 
@@ -867,7 +868,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
        __aligned(16);
 
-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
+void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far)
 {
        unsigned long tsk_stk = (unsigned long)current->stack;
        unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
@@ -876,7 +877,7 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
        console_verbose();
        pr_emerg("Insufficient stack space to handle exception!");
 
-       pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
+       pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr));
        pr_emerg("FAR: 0x%016lx\n", far);
 
        pr_emerg("Task stack:     [0x%016lx..0x%016lx]\n",
@@ -897,11 +898,11 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
 }
 #endif
 
-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
 {
        console_verbose();
 
-       pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
+       pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
                smp_processor_id(), esr, esr_get_class_string(esr));
        if (regs)
                __show_regs(regs);
@@ -912,9 +913,9 @@ void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
        unreachable();
 }
 
-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr)
 {
-       u32 aet = arm64_ras_serror_get_severity(esr);
+       unsigned long aet = arm64_ras_serror_get_severity(esr);
 
        switch (aet) {
        case ESR_ELx_AET_CE:    /* corrected error */
@@ -944,7 +945,7 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
        }
 }
 
-void do_serror(struct pt_regs *regs, unsigned int esr)
+void do_serror(struct pt_regs *regs, unsigned long esr)
 {
        /* non-RAS errors are not containable */
        if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
@@ -965,7 +966,7 @@ int is_valid_bugaddr(unsigned long addr)
        return 1;
 }
 
-static int bug_handler(struct pt_regs *regs, unsigned int esr)
+static int bug_handler(struct pt_regs *regs, unsigned long esr)
 {
        switch (report_bug(regs->pc, regs)) {
        case BUG_TRAP_TYPE_BUG:
@@ -990,7 +991,7 @@ static struct break_hook bug_break_hook = {
        .imm = BUG_BRK_IMM,
 };
 
-static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
+static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
 {
        pr_err("%s generated an invalid instruction at %pS!\n",
                "Kernel text patching",
@@ -1012,7 +1013,7 @@ static struct break_hook fault_break_hook = {
 #define KASAN_ESR_SIZE_MASK    0x0f
 #define KASAN_ESR_SIZE(esr)    (1 << ((esr) & KASAN_ESR_SIZE_MASK))
 
-static int kasan_handler(struct pt_regs *regs, unsigned int esr)
+static int kasan_handler(struct pt_regs *regs, unsigned long esr)
 {
        bool recover = esr & KASAN_ESR_RECOVER;
        bool write = esr & KASAN_ESR_WRITE;
@@ -1055,11 +1056,11 @@ static struct break_hook kasan_break_hook = {
  * Initial handler for AArch64 BRK exceptions
  * This handler only used until debug_traps_init().
  */
-int __init early_brk64(unsigned long addr, unsigned int esr,
+int __init early_brk64(unsigned long addr, unsigned long esr,
                struct pt_regs *regs)
 {
 #ifdef CONFIG_KASAN_SW_TAGS
-       unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+       unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
 
        if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
                return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
index 172452f79e462ed6ec6b462a19a80f0db4c1e857..f6e25d7c346aba02c838a922fd5df6026319ef76 100644 (file)
@@ -32,7 +32,8 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 # -Wmissing-prototypes and -Wmissing-declarations are removed from
 # the CFLAGS of vgettimeofday.c to make possible to build the
 # kernel with CONFIG_WERROR enabled.
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+                               $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
                                $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations
 KASAN_SANITIZE                 := n
 KCSAN_SANITIZE                 := n
@@ -52,9 +53,6 @@ GCOV_PROFILE := n
 targets += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
-# Force dependency (incbin is bad)
-$(obj)/vdso.o : $(obj)/vdso.so
-
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
        $(call if_changed,vdsold_and_vdso_check)
index ed181bedbffc5b16d4a654f34c6c62d22db4c21a..05ba1aae1b6f24017fb5e5c732f260d6accd7958 100644 (file)
@@ -131,9 +131,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
 targets += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
-# Force dependency (vdso.s includes vdso.so through incbin)
-$(obj)/vdso.o: $(obj)/vdso.so
-
 include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
        $(call if_changed,vdsosym)
 
index edaf0faf766f0023e99ee8c11f1b5b9ac9742fed..2d4a8f99517533777371d6ec951ad3c0d7f10eca 100644 (file)
@@ -93,7 +93,6 @@ jiffies = jiffies_64;
 
 #ifdef CONFIG_HIBERNATION
 #define HIBERNATE_TEXT                                 \
-       . = ALIGN(SZ_4K);                               \
        __hibernate_exit_text_start = .;                \
        *(.hibernate_exit.text)                         \
        __hibernate_exit_text_end = .;
@@ -103,7 +102,6 @@ jiffies = jiffies_64;
 
 #ifdef CONFIG_KEXEC_CORE
 #define KEXEC_TEXT                                     \
-       . = ALIGN(SZ_4K);                               \
        __relocate_new_kernel_start = .;                \
        *(.kexec_relocate.text)                         \
        __relocate_new_kernel_end = .;
@@ -170,9 +168,6 @@ SECTIONS
                        KPROBES_TEXT
                        HYPERVISOR_TEXT
                        IDMAP_TEXT
-                       HIBERNATE_TEXT
-                       KEXEC_TEXT
-                       TRAMP_TEXT
                        *(.gnu.warning)
                . = ALIGN(16);
                *(.got)                 /* Global offset table          */
@@ -194,6 +189,14 @@ SECTIONS
 
        HYPERVISOR_DATA_SECTIONS
 
+       /* code sections that are never executed via the kernel mapping */
+       .rodata.text : {
+               TRAMP_TEXT
+               HIBERNATE_TEXT
+               KEXEC_TEXT
+               . = ALIGN(PAGE_SIZE);
+       }
+
        idmap_pg_dir = .;
        . += IDMAP_DIR_SIZE;
        idmap_pg_end = .;
@@ -337,8 +340,8 @@ ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE,
 ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
        "ID map text too big or misaligned")
 #ifdef CONFIG_HIBERNATION
-ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
-       <= SZ_4K, "Hibernate exit text too big or misaligned")
+ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K,
+       "Hibernate exit text is bigger than 4 KiB")
 #endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
@@ -362,7 +365,7 @@ ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
 
 #ifdef CONFIG_KEXEC_CORE
 /* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */
-ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1))
-       <= SZ_4K, "kexec relocation code is too big or misaligned")
+ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K,
+       "kexec relocation code is bigger than 4 KiB")
 ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
 #endif
index 523bc934fe2f66687b2bb605776f4b239b6114d3..cedc3ba2c09828bc82f5cf76d19db3d958d0e4b9 100644 (file)
@@ -783,6 +783,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
        ret = 1;
        run->exit_reason = KVM_EXIT_UNKNOWN;
+       run->flags = 0;
        while (ret > 0) {
                /*
                 * Check conditions before entering the guest
@@ -1436,7 +1437,8 @@ static int kvm_init_vector_slots(void)
        base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
        kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
 
-       if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
+       if (kvm_system_needs_idmapped_vectors() &&
+           !is_protected_kvm_enabled()) {
                err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
                                               __BP_HARDEN_HYP_VECS_SZ, &base);
                if (err)
index 397fdac75cb1231e27fe696a084a960b282c0c72..3d251a4d2cf7bfdb3ed82df94d30b8d6b48a458c 100644 (file)
@@ -82,6 +82,26 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 
        if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
                vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
+
+       /*
+        * We don't currently support SME guests but if we leave
+        * things in streaming mode then when the guest starts running
+        * FPSIMD or SVE code it may generate SME traps so as a
+        * special case if we are in streaming mode we force the host
+        * state to be saved now and exit streaming mode so that we
+        * don't have to handle any SME traps for valid guest
+        * operations. Do this for ZA as well for now for simplicity.
+        */
+       if (system_supports_sme()) {
+               if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
+                       vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
+
+               if (read_sysreg_s(SYS_SVCR) &
+                   (SVCR_SM_MASK | SVCR_ZA_MASK)) {
+                       vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+                       fpsimd_save_and_flush_cpu_state();
+               }
+       }
 }
 
 /*
@@ -109,9 +129,14 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
        WARN_ON_ONCE(!irqs_disabled());
 
        if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+               /*
+                * Currently we do not support SME guests so SVCR is
+                * always 0 and we just need a variable to point to.
+                */
                fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
                                         vcpu->arch.sve_state,
-                                        vcpu->arch.sve_max_vl);
+                                        vcpu->arch.sve_max_vl,
+                                        NULL, 0, &vcpu->arch.svcr);
 
                clear_thread_flag(TIF_FOREIGN_FPSTATE);
                update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
@@ -130,6 +155,22 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 
        local_irq_save(flags);
 
+       /*
+        * If we have VHE then the Hyp code will reset CPACR_EL1 to
+        * CPACR_EL1_DEFAULT and we need to reenable SME.
+        */
+       if (has_vhe() && system_supports_sme()) {
+               /* Also restore EL0 state seen on entry */
+               if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED)
+                       sysreg_clear_set(CPACR_EL1, 0,
+                                        CPACR_EL1_SMEN_EL0EN |
+                                        CPACR_EL1_SMEN_EL1EN);
+               else
+                       sysreg_clear_set(CPACR_EL1,
+                                        CPACR_EL1_SMEN_EL0EN,
+                                        CPACR_EL1_SMEN_EL1EN);
+       }
+
        if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
                if (vcpu_has_sve(vcpu)) {
                        __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
index 97fe14aab1a38025c186f074643d0484d1c14aa3..0b829292dc54427295deec28280fae7cfd345f1d 100644 (file)
@@ -26,7 +26,7 @@
 
 typedef int (*exit_handle_fn)(struct kvm_vcpu *);
 
-static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
+static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
 {
        if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
                kvm_inject_vabt(vcpu);
@@ -117,10 +117,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
 static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
 
        run->exit_reason = KVM_EXIT_DEBUG;
-       run->debug.arch.hsr = esr;
+       run->debug.arch.hsr = lower_32_bits(esr);
+       run->debug.arch.hsr_high = upper_32_bits(esr);
+       run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
 
        if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
                run->debug.arch.far = vcpu->arch.fault.far_el2;
@@ -130,9 +132,9 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 
 static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 {
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
 
-       kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
+       kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n",
                      esr, esr_get_class_string(esr));
 
        kvm_inject_undefined(vcpu);
@@ -187,7 +189,7 @@ static exit_handle_fn arm_exit_handlers[] = {
 
 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 {
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
        u8 esr_ec = ESR_ELx_EC(esr);
 
        return arm_exit_handlers[esr_ec];
@@ -334,6 +336,6 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
         */
        kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
 
-       panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
+       panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
              spsr, elr_virt, esr, far, hpfar, par, vcpu);
 }
index 5d31f6c64c8c16d0a38cd790dabb5f5e845656c0..37d9f211c200c8c221137251f3147a3a96bb28c9 100644 (file)
@@ -266,7 +266,7 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
        return true;
 }
 
-static inline bool esr_is_ptrauth_trap(u32 esr)
+static inline bool esr_is_ptrauth_trap(u64 esr)
 {
        switch (esr_sys64_to_sysreg(esr)) {
        case SYS_APIAKEYLO_EL1:
index 5ad626527d4119d011fb1001d3e10a192f65be63..fd55014b34975795dbe35bd2abbeee2bbf50e9c1 100644 (file)
  * No restrictions on instructions implemented in AArch64.
  */
 #define PVM_ID_AA64ISAR0_ALLOW (\
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
        )
 
 #define PVM_ID_AA64ISAR1_ALLOW (\
index 6410d21d86957d3b9e057e6412c2b91563e97c31..caace61ea459d29c9af59156bd9b738a866af6f4 100644 (file)
@@ -47,10 +47,24 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
                __activate_traps_fpsimd32(vcpu);
        }
+       if (cpus_have_final_cap(ARM64_SME))
+               val |= CPTR_EL2_TSM;
 
        write_sysreg(val, cptr_el2);
        write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
 
+       if (cpus_have_final_cap(ARM64_SME)) {
+               val = read_sysreg_s(SYS_HFGRTR_EL2);
+               val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK |
+                        HFGxTR_EL2_nSMPRI_EL1_MASK);
+               write_sysreg_s(val, SYS_HFGRTR_EL2);
+
+               val = read_sysreg_s(SYS_HFGWTR_EL2);
+               val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK |
+                        HFGxTR_EL2_nSMPRI_EL1_MASK);
+               write_sysreg_s(val, SYS_HFGWTR_EL2);
+       }
+
        if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
                struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
 
@@ -94,9 +108,25 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 
        write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
 
+       if (cpus_have_final_cap(ARM64_SME)) {
+               u64 val;
+
+               val = read_sysreg_s(SYS_HFGRTR_EL2);
+               val |= HFGxTR_EL2_nTPIDR2_EL0_MASK |
+                       HFGxTR_EL2_nSMPRI_EL1_MASK;
+               write_sysreg_s(val, SYS_HFGRTR_EL2);
+
+               val = read_sysreg_s(SYS_HFGWTR_EL2);
+               val |= HFGxTR_EL2_nTPIDR2_EL0_MASK |
+                       HFGxTR_EL2_nSMPRI_EL1_MASK;
+               write_sysreg_s(val, SYS_HFGWTR_EL2);
+       }
+
        cptr = CPTR_EL2_DEFAULT;
        if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED))
                cptr |= CPTR_EL2_TZ;
+       if (cpus_have_final_cap(ARM64_SME))
+               cptr &= ~CPTR_EL2_TSM;
 
        write_sysreg(cptr, cptr_el2);
        write_sysreg(__kvm_hyp_host_vector, vbar_el2);
index 33f5181af330d07dcd12e08ed7907aa640b8a12a..619f94fc95fa889cb507f93b49f5fd4dd243133a 100644 (file)
@@ -33,7 +33,7 @@ u64 id_aa64mmfr2_el1_sys_val;
  */
 static void inject_undef64(struct kvm_vcpu *vcpu)
 {
-       u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+       u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
        *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
        *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
index 4fb419f7b8b61a93171860b77e8fd45253affb5a..6cb638b184b1832ea43fa146bd5816dce73aa8e0 100644 (file)
@@ -473,7 +473,7 @@ static int __vgic_v3_bpr_min(void)
 
 static int __vgic_v3_get_group(struct kvm_vcpu *vcpu)
 {
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
        u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 
        return crm != 8;
@@ -1016,7 +1016,7 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
 {
        int rt;
-       u32 esr;
+       u64 esr;
        u32 vmcr;
        void (*fn)(struct kvm_vcpu *, u32, int);
        bool is_read;
index 262dfe03134daba2f7c9669b115ae792cd5b15e1..969f20daf97aabb9b8dd67c7eec8e7272e1953d9 100644 (file)
@@ -41,7 +41,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
        val = read_sysreg(cpacr_el1);
        val |= CPACR_EL1_TTA;
-       val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+       val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
+                CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
 
        /*
         * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -62,6 +63,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                __activate_traps_fpsimd32(vcpu);
        }
 
+       if (cpus_have_final_cap(ARM64_SME))
+               write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2,
+                            sctlr_el2);
+
        write_sysreg(val, cpacr_el1);
 
        write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
@@ -83,6 +88,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
         */
        asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 
+       if (cpus_have_final_cap(ARM64_SME))
+               write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2,
+                            sctlr_el2);
+
        write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
 
        if (!arm64_kernel_unmapped_at_el0())
index ba20405d2dc2fdfd312f580b12cdda297d092f85..55a5dbe957e0a38cf68d9ed6bc1b78e8d76900fd 100644 (file)
@@ -18,7 +18,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 {
        unsigned long cpsr = *vcpu_cpsr(vcpu);
        bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
-       u32 esr = 0;
+       u64 esr = 0;
 
        vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1          |
                             KVM_ARM64_EXCEPT_AA64_ELx_SYNC     |
@@ -50,7 +50,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 
 static void inject_undef64(struct kvm_vcpu *vcpu)
 {
-       u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+       u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
        vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1          |
                             KVM_ARM64_EXCEPT_AA64_ELx_SYNC     |
index 7b45c040cc27f37ab5c21050f5f75c35a52fffae..18b403b58b5382f21b3b69b5f624a2d157bcaa64 100644 (file)
@@ -1123,8 +1123,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
                val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
                val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
                val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
-               if (irqchip_in_kernel(vcpu->kvm) &&
-                   vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+               if (kvm_vgic_global_state.type == VGIC_V3) {
                        val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
                        val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
                }
@@ -1132,6 +1131,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
        case SYS_ID_AA64PFR1_EL1:
                if (!kvm_has_mte(vcpu->kvm))
                        val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+
+               val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME);
                break;
        case SYS_ID_AA64ISAR1_EL1:
                if (!vcpu_has_ptrauth(vcpu))
@@ -1553,7 +1554,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        ID_UNALLOCATED(4,2),
        ID_UNALLOCATED(4,3),
        ID_SANITISED(ID_AA64ZFR0_EL1),
-       ID_UNALLOCATED(4,5),
+       ID_HIDDEN(ID_AA64SMFR0_EL1),
        ID_UNALLOCATED(4,6),
        ID_UNALLOCATED(4,7),
 
@@ -1596,6 +1597,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
        { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
        { SYS_DESC(SYS_TRFCR_EL1), undef_access },
+       { SYS_DESC(SYS_SMPRI_EL1), undef_access },
+       { SYS_DESC(SYS_SMCR_EL1), undef_access },
        { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
        { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
        { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
@@ -1678,8 +1681,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
        { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
        { SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+       { SYS_DESC(SYS_SMIDR_EL1), undef_access },
        { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
        { SYS_DESC(SYS_CTR_EL0), access_ctr },
+       { SYS_DESC(SYS_SVCR), undef_access },
 
        { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
          .reset = reset_pmcr, .reg = PMCR_EL0 },
@@ -1719,6 +1724,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
        { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
        { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
+       { SYS_DESC(SYS_TPIDR2_EL0), undef_access },
 
        { SYS_DESC(SYS_SCXTNUM_EL0), undef_access },
 
@@ -2304,7 +2310,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
                            size_t nr_global)
 {
        struct sys_reg_params params;
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
        int Rt = kvm_vcpu_sys_get_rt(vcpu);
        int Rt2 = (esr >> 10) & 0x1f;
 
@@ -2354,7 +2360,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
                            size_t nr_global)
 {
        struct sys_reg_params params;
-       u32 esr = kvm_vcpu_get_esr(vcpu);
+       u64 esr = kvm_vcpu_get_esr(vcpu);
        int Rt  = kvm_vcpu_sys_get_rt(vcpu);
 
        params.CRm = (esr >> 1) & 0xf;
index 8590af3c98c0ba3eb516d994decd845dcf0e278f..eeb9e45bcce82d2d11636938d8641724b784fa5d 100644 (file)
@@ -93,7 +93,7 @@ SYM_FUNC_START(mte_copy_tags_from_user)
        mov     x3, x1
        cbz     x2, 2f
 1:
-       user_ldst 2f, ldtrb, w4, x1, 0
+USER(2f, ldtrb w4, [x1])
        lsl     x4, x4, #MTE_TAG_SHIFT
        stg     x4, [x0], #MTE_GRANULE_SIZE
        add     x1, x1, #1
@@ -120,7 +120,7 @@ SYM_FUNC_START(mte_copy_tags_to_user)
 1:
        ldg     x4, [x1]
        ubfx    x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
-       user_ldst 2f, sttrb, w4, x0, 0
+USER(2f, sttrb w4, [x0])
        add     x0, x0, #1
        add     x1, x1, #MTE_GRANULE_SIZE
        subs    x2, x2, #1
index b5447e53cd73ee48ae65ecb90be18b3529bc4d34..0dea80bf6de469e6ab3d5b584d451cf100e77e94 100644 (file)
@@ -16,8 +16,8 @@
 
 void copy_highpage(struct page *to, struct page *from)
 {
-       struct page *kto = page_address(to);
-       struct page *kfrom = page_address(from);
+       void *kto = page_address(to);
+       void *kfrom = page_address(from);
 
        copy_page(kto, kfrom);
 
index 77341b160aca7baecbccc637dda68295f0017f50..c5e11768e5c141ae854ef3b71cfbf8a1ca17d907 100644 (file)
@@ -43,7 +43,7 @@
 #include <asm/traps.h>
 
 struct fault_info {
-       int     (*fn)(unsigned long far, unsigned int esr,
+       int     (*fn)(unsigned long far, unsigned long esr,
                      struct pt_regs *regs);
        int     sig;
        int     code;
@@ -53,17 +53,17 @@ struct fault_info {
 static const struct fault_info fault_info[];
 static struct fault_info debug_fault_info[];
 
-static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
+static inline const struct fault_info *esr_to_fault_info(unsigned long esr)
 {
        return fault_info + (esr & ESR_ELx_FSC);
 }
 
-static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
+static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr)
 {
        return debug_fault_info + DBG_ESR_EVT(esr);
 }
 
-static void data_abort_decode(unsigned int esr)
+static void data_abort_decode(unsigned long esr)
 {
        pr_alert("Data abort info:\n");
 
@@ -85,11 +85,11 @@ static void data_abort_decode(unsigned int esr)
                 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
 }
 
-static void mem_abort_decode(unsigned int esr)
+static void mem_abort_decode(unsigned long esr)
 {
        pr_alert("Mem abort info:\n");
 
-       pr_alert("  ESR = 0x%08x\n", esr);
+       pr_alert("  ESR = 0x%016lx\n", esr);
        pr_alert("  EC = 0x%02lx: %s, IL = %u bits\n",
                 ESR_ELx_EC(esr), esr_get_class_string(esr),
                 (esr & ESR_ELx_IL) ? 32 : 16);
@@ -99,7 +99,7 @@ static void mem_abort_decode(unsigned int esr)
        pr_alert("  EA = %lu, S1PTW = %lu\n",
                 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
                 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
-       pr_alert("  FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC),
+       pr_alert("  FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC),
                 esr_to_fault_info(esr)->name);
 
        if (esr_is_data_abort(esr))
@@ -229,20 +229,20 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
        return 1;
 }
 
-static bool is_el1_instruction_abort(unsigned int esr)
+static bool is_el1_instruction_abort(unsigned long esr)
 {
        return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
 }
 
-static bool is_el1_data_abort(unsigned int esr)
+static bool is_el1_data_abort(unsigned long esr)
 {
        return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR;
 }
 
-static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
+static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr,
                                           struct pt_regs *regs)
 {
-       unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+       unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE;
 
        if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
                return false;
@@ -258,7 +258,7 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
 }
 
 static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
-                                                       unsigned int esr,
+                                                       unsigned long esr,
                                                        struct pt_regs *regs)
 {
        unsigned long flags;
@@ -290,7 +290,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
 }
 
 static void die_kernel_fault(const char *msg, unsigned long addr,
-                            unsigned int esr, struct pt_regs *regs)
+                            unsigned long esr, struct pt_regs *regs)
 {
        bust_spinlocks(1);
 
@@ -308,7 +308,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
 }
 
 #ifdef CONFIG_KASAN_HW_TAGS
-static void report_tag_fault(unsigned long addr, unsigned int esr,
+static void report_tag_fault(unsigned long addr, unsigned long esr,
                             struct pt_regs *regs)
 {
        /*
@@ -320,11 +320,11 @@ static void report_tag_fault(unsigned long addr, unsigned int esr,
 }
 #else
 /* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
-static inline void report_tag_fault(unsigned long addr, unsigned int esr,
+static inline void report_tag_fault(unsigned long addr, unsigned long esr,
                                    struct pt_regs *regs) { }
 #endif
 
-static void do_tag_recovery(unsigned long addr, unsigned int esr,
+static void do_tag_recovery(unsigned long addr, unsigned long esr,
                           struct pt_regs *regs)
 {
 
@@ -335,13 +335,14 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr,
         * It will be done lazily on the other CPUs when they will hit a
         * tag fault.
         */
-       sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE);
+       sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
+                        SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE));
        isb();
 }
 
-static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
+static bool is_el1_mte_sync_tag_check_fault(unsigned long esr)
 {
-       unsigned int fsc = esr & ESR_ELx_FSC;
+       unsigned long fsc = esr & ESR_ELx_FSC;
 
        if (!is_el1_data_abort(esr))
                return false;
@@ -352,7 +353,7 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
        return false;
 }
 
-static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+static void __do_kernel_fault(unsigned long addr, unsigned long esr,
                              struct pt_regs *regs)
 {
        const char *msg;
@@ -393,7 +394,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
        die_kernel_fault(msg, addr, esr, regs);
 }
 
-static void set_thread_esr(unsigned long address, unsigned int esr)
+static void set_thread_esr(unsigned long address, unsigned long esr)
 {
        current->thread.fault_address = address;
 
@@ -441,7 +442,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
                         * exception level). Fail safe by not providing an ESR
                         * context record at all.
                         */
-                       WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
+                       WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr);
                        esr = 0;
                        break;
                }
@@ -450,7 +451,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
        current->thread.fault_code = esr;
 }
 
-static void do_bad_area(unsigned long far, unsigned int esr,
+static void do_bad_area(unsigned long far, unsigned long esr,
                        struct pt_regs *regs)
 {
        unsigned long addr = untagged_addr(far);
@@ -501,7 +502,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
        return handle_mm_fault(vma, addr, mm_flags, regs);
 }
 
-static bool is_el0_instruction_abort(unsigned int esr)
+static bool is_el0_instruction_abort(unsigned long esr)
 {
        return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
 }
@@ -510,12 +511,12 @@ static bool is_el0_instruction_abort(unsigned int esr)
  * Note: not valid for EL1 DC IVAC, but we never use that such that it
  * should fault. EL0 cannot issue DC IVAC (undef).
  */
-static bool is_write_abort(unsigned int esr)
+static bool is_write_abort(unsigned long esr)
 {
        return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
 }
 
-static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
+static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
                                   struct pt_regs *regs)
 {
        const struct fault_info *inf;
@@ -671,7 +672,7 @@ no_context:
 }
 
 static int __kprobes do_translation_fault(unsigned long far,
-                                         unsigned int esr,
+                                         unsigned long esr,
                                          struct pt_regs *regs)
 {
        unsigned long addr = untagged_addr(far);
@@ -683,19 +684,19 @@ static int __kprobes do_translation_fault(unsigned long far,
        return 0;
 }
 
-static int do_alignment_fault(unsigned long far, unsigned int esr,
+static int do_alignment_fault(unsigned long far, unsigned long esr,
                              struct pt_regs *regs)
 {
        do_bad_area(far, esr, regs);
        return 0;
 }
 
-static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
+static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
 {
        return 1; /* "fault" */
 }
 
-static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
+static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
 {
        const struct fault_info *inf;
        unsigned long siaddr;
@@ -725,7 +726,7 @@ static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
        return 0;
 }
 
-static int do_tag_check_fault(unsigned long far, unsigned int esr,
+static int do_tag_check_fault(unsigned long far, unsigned long esr,
                              struct pt_regs *regs)
 {
        /*
@@ -805,7 +806,7 @@ static const struct fault_info fault_info[] = {
        { do_bad,               SIGKILL, SI_KERNEL,     "unknown 63"                    },
 };
 
-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs)
 {
        const struct fault_info *inf = esr_to_fault_info(esr);
        unsigned long addr = untagged_addr(far);
@@ -825,14 +826,14 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_mem_abort);
 
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
 {
        arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
                         addr, esr);
 }
 NOKPROBE_SYMBOL(do_sp_pc_abort);
 
-int __init early_brk64(unsigned long addr, unsigned int esr,
+int __init early_brk64(unsigned long addr, unsigned long esr,
                       struct pt_regs *regs);
 
 /*
@@ -852,7 +853,7 @@ static struct fault_info __refdata debug_fault_info[] = {
 };
 
 void __init hook_debug_fault_code(int nr,
-                                 int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+                                 int (*fn)(unsigned long, unsigned long, struct pt_regs *),
                                  int sig, int code, const char *name)
 {
        BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
@@ -885,7 +886,7 @@ static void debug_exception_exit(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(debug_exception_exit);
 
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
                        struct pt_regs *regs)
 {
        const struct fault_info *inf = esr_to_debug_fault_info(esr);
index cbace1c9e1372450cc708744f874ac705aed7c31..64bb078e2e7b0800588976eb044b3269e2c33980 100644 (file)
@@ -158,6 +158,28 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
        return contig_ptes;
 }
 
+pte_t huge_ptep_get(pte_t *ptep)
+{
+       int ncontig, i;
+       size_t pgsize;
+       pte_t orig_pte = ptep_get(ptep);
+
+       if (!pte_present(orig_pte) || !pte_cont(orig_pte))
+               return orig_pte;
+
+       ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
+       for (i = 0; i < ncontig; i++, ptep++) {
+               pte_t pte = ptep_get(ptep);
+
+               if (pte_dirty(pte))
+                       orig_pte = pte_mkdirty(orig_pte);
+
+               if (pte_young(pte))
+                       orig_pte = pte_mkyoung(orig_pte);
+       }
+       return orig_pte;
+}
+
 /*
  * Changing some bits of contiguous entries requires us to follow a
  * Break-Before-Make approach, breaking the whole contiguous set
@@ -166,15 +188,14 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
  *
  * This helper performs the break step.
  */
-static pte_t get_clear_flush(struct mm_struct *mm,
+static pte_t get_clear_contig(struct mm_struct *mm,
                             unsigned long addr,
                             pte_t *ptep,
                             unsigned long pgsize,
                             unsigned long ncontig)
 {
-       pte_t orig_pte = huge_ptep_get(ptep);
-       bool valid = pte_valid(orig_pte);
-       unsigned long i, saddr = addr;
+       pte_t orig_pte = ptep_get(ptep);
+       unsigned long i;
 
        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
                pte_t pte = ptep_get_and_clear(mm, addr, ptep);
@@ -190,11 +211,6 @@ static pte_t get_clear_flush(struct mm_struct *mm,
                if (pte_young(pte))
                        orig_pte = pte_mkyoung(orig_pte);
        }
-
-       if (valid) {
-               struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
-               flush_tlb_range(&vma, saddr, addr);
-       }
        return orig_pte;
 }
 
@@ -385,14 +401,14 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 {
        int ncontig;
        size_t pgsize;
-       pte_t orig_pte = huge_ptep_get(ptep);
+       pte_t orig_pte = ptep_get(ptep);
 
        if (!pte_cont(orig_pte))
                return ptep_get_and_clear(mm, addr, ptep);
 
        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 
-       return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+       return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
 }
 
 /*
@@ -408,11 +424,11 @@ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
 {
        int i;
 
-       if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
+       if (pte_write(pte) != pte_write(ptep_get(ptep)))
                return 1;
 
        for (i = 0; i < ncontig; i++) {
-               pte_t orig_pte = huge_ptep_get(ptep + i);
+               pte_t orig_pte = ptep_get(ptep + i);
 
                if (pte_dirty(pte) != pte_dirty(orig_pte))
                        return 1;
@@ -443,7 +459,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
        if (!__cont_access_flags_changed(ptep, pte, ncontig))
                return 0;
 
-       orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+       orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig);
 
        /* Make sure we don't lose the dirty or young state */
        if (pte_dirty(orig_pte))
@@ -476,7 +492,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
        dpfn = pgsize >> PAGE_SHIFT;
 
-       pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+       pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
        pte = pte_wrprotect(pte);
 
        hugeprot = pte_pgprot(pte);
index 1e7b1550e2fcebbea89f1443cb8daa5f009b4498..a1410143ea62662974a1d36e42231e7dd41ce3b9 100644 (file)
@@ -90,6 +90,32 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
 phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
 #endif
 
+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN                    SZ_2M
+
+#define CRASH_ADDR_LOW_MAX             arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX            (PHYS_MASK + 1)
+
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+       unsigned long long low_base;
+
+       low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+       if (!low_base) {
+               pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+               return -ENOMEM;
+       }
+
+       pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
+               low_base, low_base + low_size, low_size >> 20);
+
+       crashk_low_res.start = low_base;
+       crashk_low_res.end   = low_base + low_size - 1;
+       insert_resource(&iomem_resource, &crashk_low_res);
+
+       return 0;
+}
+
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
  *
@@ -100,17 +126,35 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
 static void __init reserve_crashkernel(void)
 {
        unsigned long long crash_base, crash_size;
-       unsigned long long crash_max = arm64_dma_phys_limit;
+       unsigned long long crash_low_size = 0;
+       unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
+       char *cmdline = boot_command_line;
        int ret;
 
        if (!IS_ENABLED(CONFIG_KEXEC_CORE))
                return;
 
-       ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+       /* crashkernel=X[@offset] */
+       ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
                                &crash_size, &crash_base);
-       /* no crashkernel= or invalid value specified */
-       if (ret || !crash_size)
+       if (ret == -ENOENT) {
+               ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
+               if (ret || !crash_size)
+                       return;
+
+               /*
+                * crashkernel=Y,low can be specified or not, but invalid value
+                * is not allowed.
+                */
+               ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
+               if (ret && (ret != -ENOENT))
+                       return;
+
+               crash_max = CRASH_ADDR_HIGH_MAX;
+       } else if (ret || !crash_size) {
+               /* The specified value is invalid */
                return;
+       }
 
        crash_size = PAGE_ALIGN(crash_size);
 
@@ -118,8 +162,7 @@ static void __init reserve_crashkernel(void)
        if (crash_base)
                crash_max = crash_base + crash_size;
 
-       /* Current arm64 boot protocol requires 2MB alignment */
-       crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
+       crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
                                               crash_base, crash_max);
        if (!crash_base) {
                pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -127,6 +170,12 @@ static void __init reserve_crashkernel(void)
                return;
        }
 
+       if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
+            crash_low_size && reserve_crashkernel_low(crash_low_size)) {
+               memblock_phys_free(crash_base, crash_size);
+               return;
+       }
+
        pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
                crash_base, crash_base + crash_size, crash_size >> 20);
 
@@ -135,8 +184,12 @@ static void __init reserve_crashkernel(void)
         * map. Inform kmemleak so that it won't try to access it.
         */
        kmemleak_ignore_phys(crash_base);
+       if (crashk_low_res.end)
+               kmemleak_ignore_phys(crashk_low_res.start);
+
        crashk_res.start = crash_base;
        crashk_res.end = crash_base + crash_size - 1;
+       insert_resource(&iomem_resource, &crashk_res);
 }
 
 /*
@@ -157,7 +210,7 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
        return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
 }
 
-static void __init zone_sizes_init(unsigned long min, unsigned long max)
+static void __init zone_sizes_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
        unsigned int __maybe_unused acpi_zone_dma_bits;
@@ -176,7 +229,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
        if (!arm64_dma_phys_limit)
                arm64_dma_phys_limit = dma32_phys_limit;
 #endif
-       max_zone_pfns[ZONE_NORMAL] = max;
+       max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
        free_area_init(max_zone_pfns);
 }
@@ -374,7 +427,7 @@ void __init bootmem_init(void)
         * done after the fixed reservations
         */
        sparse_init();
-       zone_sizes_init(min, max);
+       zone_sizes_init();
 
        /*
         * Reserve the CMA area after arm64_dma_phys_limit was initialised.
index b7c81dacabf079f50d6d6225a43b15a2b4e621ab..b21f91cd830db4fdbcf4f52f2477c3c96a875e0f 100644 (file)
@@ -99,3 +99,11 @@ void __init early_ioremap_init(void)
 {
        early_ioremap_setup();
 }
+
+bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                unsigned long flags)
+{
+       unsigned long pfn = PHYS_PFN(offset);
+
+       return pfn_is_map_memory(pfn);
+}
index d7da8ca40d2e19dfe7f70bf100de143ea84bcba0..4ea2eefbc053f673ca1575cbf50cf6b61139319b 100644 (file)
@@ -238,7 +238,7 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
        int this_level, index, level_lsb, level_msb;
 
        dst_addr &= PAGE_MASK;
-       prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC));
+       prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX));
 
        for (this_level = 3; this_level >= 0; this_level--) {
                levels[this_level] = trans_alloc(info);
index cf1307188150f5130be2fd2c829ee8d0bf9d87f0..07a93ab21a62b2fdcf56eb50f587ea3201028d33 100644 (file)
@@ -3,7 +3,7 @@
 gen := arch/$(ARCH)/include/generated
 kapi := $(gen)/asm
 
-kapi-hdrs-y := $(kapi)/cpucaps.h
+kapi-hdrs-y := $(kapi)/cpucaps.h $(kapi)/sysreg-defs.h
 
 targets += $(addprefix ../../../, $(kapi-hdrs-y))
 
@@ -14,5 +14,11 @@ kapi:   $(kapi-hdrs-y)
 quiet_cmd_gen_cpucaps = GEN     $@
       cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
 
+quiet_cmd_gen_sysreg = GEN     $@
+      cmd_gen_sysreg = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
+
 $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
        $(call if_changed,gen_cpucaps)
+
+$(kapi)/sysreg-defs.h: $(src)/gen-sysreg.awk $(src)/sysreg FORCE
+       $(call if_changed,gen_sysreg)
index 3ed418f70e3bd2f65283dd5fa7c3098c0ac435dd..e52b289a27c2b7d82064fd8342d96c91ab5b914e 100644 (file)
@@ -43,6 +43,8 @@ KVM_PROTECTED_MODE
 MISMATCHED_CACHE_TYPE
 MTE
 MTE_ASYMM
+SME
+SME_FA64
 SPECTRE_V2
 SPECTRE_V3A
 SPECTRE_V4
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
new file mode 100755 (executable)
index 0000000..89bfb74
--- /dev/null
@@ -0,0 +1,268 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-sysreg.awk: arm64 sysreg header generator
+#
+# Usage: awk -f gen-sysreg.awk sysregs.txt
+
+# Log an error and terminate
+function fatal(msg) {
+       print "Error at " NR ": " msg > "/dev/stderr"
+       exit 1
+}
+
+# Sanity check that the start or end of a block makes sense at this point in
+# the file. If not, produce an error and terminate.
+#
+# @this - the $Block or $EndBlock
+# @prev - the only valid block to already be in (value of @block)
+# @new - the new value of @block
+function change_block(this, prev, new) {
+       if (block != prev)
+               fatal("unexpected " this " (inside " block ")")
+
+       block = new
+}
+
+# Sanity check the number of records for a field makes sense. If not, produce
+# an error and terminate.
+function expect_fields(nf) {
+       if (NF != nf)
+               fatal(NF " fields found where " nf " expected")
+}
+
+# Print a CPP macro definition, padded with spaces so that the macro bodies
+# line up in a column
+function define(name, val) {
+       printf "%-48s%s\n", "#define " name, val
+}
+
+# Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field
+function define_field(reg, field, msb, lsb) {
+       define(reg "_" field, "GENMASK(" msb ", " lsb ")")
+       define(reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")")
+       define(reg "_" field "_SHIFT", lsb)
+       define(reg "_" field "_WIDTH", msb - lsb + 1)
+}
+
+# Parse a "<msb>[:<lsb>]" string into the global variables @msb and @lsb
+function parse_bitdef(reg, field, bitdef, _bits)
+{
+       if (bitdef ~ /^[0-9]+$/) {
+               msb = bitdef
+               lsb = bitdef
+       } else if (split(bitdef, _bits, ":") == 2) {
+               msb = _bits[1]
+               lsb = _bits[2]
+       } else {
+               fatal("invalid bit-range definition '" bitdef "'")
+       }
+
+
+       if (msb != next_bit)
+               fatal(reg "." field " starts at " msb " not " next_bit)
+       if (63 < msb || msb < 0)
+               fatal(reg "." field " invalid high bit in '" bitdef "'")
+       if (63 < lsb || lsb < 0)
+               fatal(reg "." field " invalid low bit in '" bitdef "'")
+       if (msb < lsb)
+               fatal(reg "." field " invalid bit-range '" bitdef "'")
+       if (low > high)
+               fatal(reg "." field " has invalid range " high "-" low)
+
+       next_bit = lsb - 1
+}
+
+BEGIN {
+       print "#ifndef __ASM_SYSREG_DEFS_H"
+       print "#define __ASM_SYSREG_DEFS_H"
+       print ""
+       print "/* Generated file - do not edit */"
+       print ""
+
+       block = "None"
+}
+
+END {
+       print "#endif /* __ASM_SYSREG_DEFS_H */"
+}
+
+# skip blank lines and comment lines
+/^$/ { next }
+/^#/ { next }
+
+/^SysregFields/ {
+       change_block("SysregFields", "None", "SysregFields")
+       expect_fields(2)
+
+       reg = $2
+
+       res0 = "UL(0)"
+       res1 = "UL(0)"
+
+       next_bit = 63
+
+       next
+}
+
+/^EndSysregFields/ {
+       if (next_bit > 0)
+               fatal("Unspecified bits in " reg)
+
+       change_block("EndSysregFields", "SysregFields", "None")
+
+       define(reg "_RES0", "(" res0 ")")
+       define(reg "_RES1", "(" res1 ")")
+       print ""
+
+       reg = null
+       res0 = null
+       res1 = null
+
+       next
+}
+
+/^Sysreg/ {
+       change_block("Sysreg", "None", "Sysreg")
+       expect_fields(7)
+
+       reg = $2
+       op0 = $3
+       op1 = $4
+       crn = $5
+       crm = $6
+       op2 = $7
+
+       res0 = "UL(0)"
+       res1 = "UL(0)"
+
+       define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2)
+       define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")")
+
+       define("SYS_" reg "_Op0", op0)
+       define("SYS_" reg "_Op1", op1)
+       define("SYS_" reg "_CRn", crn)
+       define("SYS_" reg "_CRm", crm)
+       define("SYS_" reg "_Op2", op2)
+
+       print ""
+
+       next_bit = 63
+
+       next
+}
+
+/^EndSysreg/ {
+       if (next_bit > 0)
+               fatal("Unspecified bits in " reg)
+
+       change_block("EndSysreg", "Sysreg", "None")
+
+       if (res0 != null)
+               define(reg "_RES0", "(" res0 ")")
+       if (res1 != null)
+               define(reg "_RES1", "(" res1 ")")
+       if (res0 != null || res1 != null)
+               print ""
+
+       reg = null
+       op0 = null
+       op1 = null
+       crn = null
+       crm = null
+       op2 = null
+       res0 = null
+       res1 = null
+
+       next
+}
+
+# Currently this is effectivey a comment, in future we may want to emit
+# defines for the fields.
+/^Fields/ && (block == "Sysreg") {
+       expect_fields(2)
+
+       if (next_bit != 63)
+               fatal("Some fields already defined for " reg)
+
+       print "/* For " reg " fields see " $2 " */"
+       print ""
+
+        next_bit = 0
+       res0 = null
+       res1 = null
+
+       next
+}
+
+
+/^Res0/ && (block == "Sysreg" || block == "SysregFields") {
+       expect_fields(2)
+       parse_bitdef(reg, "RES0", $2)
+       field = "RES0_" msb "_" lsb
+
+       res0 = res0 " | GENMASK_ULL(" msb ", " lsb ")"
+
+       next
+}
+
+/^Res1/ && (block == "Sysreg" || block == "SysregFields") {
+       expect_fields(2)
+       parse_bitdef(reg, "RES1", $2)
+       field = "RES1_" msb "_" lsb
+
+       res1 = res1 " | GENMASK_ULL(" msb ", " lsb ")"
+
+       next
+}
+
+/^Field/ && (block == "Sysreg" || block == "SysregFields") {
+       expect_fields(3)
+       field = $3
+       parse_bitdef(reg, field, $2)
+
+       define_field(reg, field, msb, lsb)
+       print ""
+
+       next
+}
+
+/^Raz/ && (block == "Sysreg" || block == "SysregFields") {
+       expect_fields(2)
+       parse_bitdef(reg, field, $2)
+
+       next
+}
+
+/^Enum/ {
+       change_block("Enum", "Sysreg", "Enum")
+       expect_fields(3)
+       field = $3
+       parse_bitdef(reg, field, $2)
+
+       define_field(reg, field, msb, lsb)
+
+       next
+}
+
+/^EndEnum/ {
+       change_block("EndEnum", "Enum", "Sysreg")
+       field = null
+       msb = null
+       lsb = null
+       print ""
+       next
+}
+
+/0b[01]+/ && block = "Enum" {
+       expect_fields(2)
+       val = $1
+       name = $2
+
+       define(reg "_" field "_" name, "UL(" val ")")
+       next
+}
+
+# Any lines not handled by previous rules are unexpected
+{
+       fatal("unhandled statement")
+}
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
new file mode 100644 (file)
index 0000000..ff5e552
--- /dev/null
@@ -0,0 +1,369 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# System register metadata
+
+# Each System register is described by a Sysreg block:
+
+# Sysreg       <name>  <op0>   <op1>   <crn>   <crm>   <op2>
+# <field>
+# ...
+# EndSysreg
+
+# Within a Sysreg block, each field can be described as one of:
+
+# Res0 <msb>[:<lsb>]
+
+# Res1 <msb>[:<lsb>]
+
+# Field        <msb>[:<lsb>]   <name>
+
+# Enum <msb>[:<lsb>]   <name>
+#      <enumval>       <enumname>
+#      ...
+# EndEnum
+
+# Alternatively if multiple registers share the same layout then
+# a SysregFields block can be used to describe the shared layout
+
+# SysregFields <fieldsname>
+# <field>
+# ...
+# EndSysregFields
+
+# and referenced from within the Sysreg:
+
+# Sysreg       <name>  <op0>   <op1>   <crn>   <crm>   <op2>
+# Fields       <fieldsname>
+# EndSysreg
+
+# For ID registers we adopt a few conventions for translating the
+# language in the ARM into defines:
+#
+# NI  - Not implemented
+# IMP - Implemented
+#
+# In general it is recommended that new enumeration items be named for the
+# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
+# item ACCDATA) though it may be more taseful to do something else.
+
+Sysreg ID_AA64ISAR0_EL1        3       0       0       6       0
+Enum   63:60   RNDR
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   59:56   TLB
+       0b0000  NI
+       0b0001  OS
+       0b0010  RANGE
+EndEnum
+Enum   55:52   TS
+       0b0000  NI
+       0b0001  FLAGM
+       0b0010  FLAGM2
+EndEnum
+Enum   51:48   FHM
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   47:44   DP
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   43:40   SM4
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   39:36   SM3
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   35:32   SHA3
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   31:28   RDM
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   27:24   TME
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   23:20   ATOMIC
+       0b0000  NI
+       0b0010  IMP
+EndEnum
+Enum   19:16   CRC32
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   15:12   SHA2
+       0b0000  NI
+       0b0001  SHA256
+       0b0010  SHA512
+EndEnum
+Enum   11:8    SHA1
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   7:4     AES
+       0b0000  NI
+       0b0001  AES
+       0b0010  PMULL
+EndEnum
+Res0   3:0
+EndSysreg
+
+Sysreg SCTLR_EL1       3       0       1       0       0
+Field  63      TIDCP
+Field  62      SPINMASK
+Field  61      NMI
+Field  60      EnTP2
+Res0   59:58
+Field  57      EPAN
+Field  56      EnALS
+Field  55      EnAS0
+Field  54      EnASR
+Field  53      TME
+Field  52      TME0
+Field  51      TMT
+Field  50      TMT0
+Field  49:46   TWEDEL
+Field  45      TWEDEn
+Field  44      DSSBS
+Field  43      ATA
+Field  42      ATA0
+Enum   41:40   TCF
+       0b00    NONE
+       0b01    SYNC
+       0b10    ASYNC
+       0b11    ASYMM
+EndEnum
+Enum   39:38   TCF0
+       0b00    NONE
+       0b01    SYNC
+       0b10    ASYNC
+       0b11    ASYMM
+EndEnum
+Field  37      ITFSB
+Field  36      BT1
+Field  35      BT0
+Res0   34
+Field  33      MSCEn
+Field  32      CMOW
+Field  31      EnIA
+Field  30      EnIB
+Field  29      LSMAOE
+Field  28      nTLSMD
+Field  27      EnDA
+Field  26      UCI
+Field  25      EE
+Field  24      E0E
+Field  23      SPAN
+Field  22      EIS
+Field  21      IESB
+Field  20      TSCXT
+Field  19      WXN
+Field  18      nTWE
+Res0   17
+Field  16      nTWI
+Field  15      UCT
+Field  14      DZE
+Field  13      EnDB
+Field  12      I
+Field  11      EOS
+Field  10      EnRCTX
+Field  9       UMA
+Field  8       SED
+Field  7       ITD
+Field  6       nAA
+Field  5       CP15BEN
+Field  4       SA0
+Field  3       SA
+Field  2       C
+Field  1       A
+Field  0       M
+EndSysreg
+
+SysregFields   CPACR_ELx
+Res0   63:29
+Field  28      TTA
+Res0   27:26
+Field  25:24   SMEN
+Res0   23:22
+Field  21:20   FPEN
+Res0   19:18
+Field  17:16   ZEN
+Res0   15:0
+EndSysregFields
+
+Sysreg CPACR_EL1       3       0       1       0       2
+Fields CPACR_ELx
+EndSysreg
+
+Sysreg SMPRI_EL1       3       0       1       2       4
+Res0   63:4
+Field  3:0     PRIORITY
+EndSysreg
+
+SysregFields   ZCR_ELx
+Res0   63:9
+Raz    8:4
+Field  3:0     LEN
+EndSysregFields
+
+Sysreg ZCR_EL1 3       0       1       2       0
+Fields ZCR_ELx
+EndSysreg
+
+SysregFields   SMCR_ELx
+Res0   63:32
+Field  31      FA64
+Res0   30:9
+Raz    8:4
+Field  3:0     LEN
+EndSysregFields
+
+Sysreg SMCR_EL1        3       0       1       2       6
+Fields SMCR_ELx
+EndSysreg
+
+Sysreg FAR_EL1 3       0       6       0       0
+Field  63:0    ADDR
+EndSysreg
+
+SysregFields   CONTEXTIDR_ELx
+Res0   63:32
+Field  31:0    PROCID
+EndSysregFields
+
+Sysreg CONTEXTIDR_EL1  3       0       13      0       1
+Fields CONTEXTIDR_ELx
+EndSysreg
+
+Sysreg CLIDR_EL1       3       1       0       0       1
+Res0   63:47
+Field  46:33   Ttypen
+Field  32:30   ICB
+Field  29:27   LoUU
+Field  26:24   LoC
+Field  23:21   LoUIS
+Field  20:18   Ctype7
+Field  17:15   Ctype6
+Field  14:12   Ctype5
+Field  11:9    Ctype4
+Field  8:6     Ctype3
+Field  5:3     Ctype2
+Field  2:0     Ctype1
+EndSysreg
+
+Sysreg SMIDR_EL1       3       1       0       0       6
+Res0   63:32
+Field  31:24   IMPLEMENTER
+Field  23:16   REVISION
+Field  15      SMPS
+Res0   14:12
+Field  11:0    AFFINITY
+EndSysreg
+
+Sysreg CSSELR_EL1      3       2       0       0       0
+Res0   63:5
+Field  4       TnD
+Field  3:1     Level
+Field  0       InD
+EndSysreg
+
+Sysreg SVCR    3       3       4       2       2
+Res0   63:2
+Field  1       ZA
+Field  0       SM
+EndSysreg
+
+Sysreg ZCR_EL2 3       4       1       2       0
+Fields ZCR_ELx
+EndSysreg
+
+Sysreg SMPRIMAP_EL2    3       4       1       2       5
+Field  63:60   P15
+Field  59:56   P14
+Field  55:52   P13
+Field  51:48   P12
+Field  47:44   P11
+Field  43:40   P10
+Field  39:36   F9
+Field  35:32   P8
+Field  31:28   P7
+Field  27:24   P6
+Field  23:20   P5
+Field  19:16   P4
+Field  15:12   P3
+Field  11:8    P2
+Field  7:4     P1
+Field  3:0     P0
+EndSysreg
+
+Sysreg SMCR_EL2        3       4       1       2       6
+Fields SMCR_ELx
+EndSysreg
+
+Sysreg DACR32_EL2      3       4       3       0       0
+Res0   63:32
+Field  31:30   D15
+Field  29:28   D14
+Field  27:26   D13
+Field  25:24   D12
+Field  23:22   D11
+Field  21:20   D10
+Field  19:18   D9
+Field  17:16   D8
+Field  15:14   D7
+Field  13:12   D6
+Field  11:10   D5
+Field  9:8     D4
+Field  7:6     D3
+Field  5:4     D2
+Field  3:2     D1
+Field  1:0     D0
+EndSysreg
+
+Sysreg FAR_EL2 3       4       6       0       0
+Field  63:0    ADDR
+EndSysreg
+
+Sysreg CONTEXTIDR_EL2  3       4       13      0       1
+Fields CONTEXTIDR_ELx
+EndSysreg
+
+Sysreg CPACR_EL12      3       5       1       0       2
+Fields CPACR_ELx
+EndSysreg
+
+Sysreg ZCR_EL12        3       5       1       2       0
+Fields ZCR_ELx
+EndSysreg
+
+Sysreg SMCR_EL12       3       5       1       2       6
+Fields SMCR_ELx
+EndSysreg
+
+Sysreg FAR_EL12        3       5       6       0       0
+Field  63:0    ADDR
+EndSysreg
+
+Sysreg CONTEXTIDR_EL12 3       5       13      0       1
+Fields CONTEXTIDR_ELx
+EndSysreg
+
+SysregFields TTBRx_EL1
+Field  63:48   ASID
+Field  47:1    BADDR
+Field  0       CnP
+EndSysregFields
+
+Sysreg TTBR0_EL1       3       0       2       0       0
+Fields TTBRx_EL1
+EndSysreg
+
+Sysreg TTBR1_EL1       3       0       2       0       1
+Fields TTBRx_EL1
+EndSysreg
index 4e39f7abdeb6dc90d4018a927f11f139515a4ce8..0621eaea41962b03801c3a4f26e6b02ccd59d9e1 100644 (file)
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+obj-y += kernel/ mm/
+
 # for cleaning
 subdir- += boot
index 75ef86605d69ec9d4e04bca29d4b77803634c72f..21d72b078eefc95ac9277f203e07cc20029599ed 100644 (file)
@@ -320,6 +320,14 @@ config HOTPLUG_CPU
          controlled through /sys/devices/system/cpu/cpu1/hotplug/target.
 
          Say N if you want to disable CPU hotplug.
+
+config HAVE_EFFICIENT_UNALIGNED_STRING_OPS
+       bool "Enable EFFICIENT_UNALIGNED_STRING_OPS for abiv2"
+       depends on CPU_CK807 || CPU_CK810 || CPU_CK860
+       help
+         Say Y here to enable EFFICIENT_UNALIGNED_STRING_OPS. Some CPU models could
+         deal with unaligned access by hardware.
+
 endmenu
 
 source "arch/csky/Kconfig.platforms"
index 86680507763647faafe6f436533f7c15ef90d0d9..4e1d619fd5c63d4e7f0342e3a9550d1bfb14da73 100644 (file)
@@ -61,15 +61,12 @@ KBUILD_AFLAGS += $(KBUILD_CFLAGS)
 
 head-y := arch/csky/kernel/head.o
 
-core-y += arch/csky/kernel/
-core-y += arch/csky/mm/
 core-y += arch/csky/$(CSKYABI)/
 
 libs-y += arch/csky/lib/ \
        $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 
 boot := arch/csky/boot
-core-y += $(boot)/dts/
 
 all: zImage
 
index 601ce3b2fb85c2e81f67a629c6e2437192c849ab..a4b2ade0fc67666a1bc5eeb98b6e5c7edff99982 100644 (file)
@@ -4,5 +4,3 @@ obj-y                                   += bswapdi.o
 obj-y                                  += bswapsi.o
 obj-y                                  += cacheflush.o
 obj-y                                  += mmap.o
-obj-y                                  += memcpy.o
-obj-y                                  += strksyms.o
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
deleted file mode 100644 (file)
index 5078eb5..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-
-.macro GET_FRONT_BITS rx y
-#ifdef __cskyLE__
-       lsri    \rx, \y
-#else
-       lsli    \rx, \y
-#endif
-.endm
-
-.macro GET_AFTER_BITS rx y
-#ifdef __cskyLE__
-       lsli    \rx, \y
-#else
-       lsri    \rx, \y
-#endif
-.endm
-
-/* void *memcpy(void *dest, const void *src, size_t n); */
-ENTRY(memcpy)
-       mov     r7, r2
-       cmplti  r4, 4
-       bt      .L_copy_by_byte
-       mov     r6, r2
-       andi    r6, 3
-       cmpnei  r6, 0
-       jbt     .L_dest_not_aligned
-       mov     r6, r3
-       andi    r6, 3
-       cmpnei  r6, 0
-       jbt     .L_dest_aligned_but_src_not_aligned
-.L0:
-       cmplti  r4, 16
-       jbt     .L_aligned_and_len_less_16bytes
-       subi    sp, 8
-       stw     r8, (sp, 0)
-.L_aligned_and_len_larger_16bytes:
-       ldw     r1, (r3, 0)
-       ldw     r5, (r3, 4)
-       ldw     r8, (r3, 8)
-       stw     r1, (r7, 0)
-       ldw     r1, (r3, 12)
-       stw     r5, (r7, 4)
-       stw     r8, (r7, 8)
-       stw     r1, (r7, 12)
-       subi    r4, 16
-       addi    r3, 16
-       addi    r7, 16
-       cmplti  r4, 16
-       jbf     .L_aligned_and_len_larger_16bytes
-       ldw     r8, (sp, 0)
-       addi    sp, 8
-       cmpnei  r4, 0
-       jbf     .L_return
-
-.L_aligned_and_len_less_16bytes:
-       cmplti  r4, 4
-       bt      .L_copy_by_byte
-.L1:
-       ldw     r1, (r3, 0)
-       stw     r1, (r7, 0)
-       subi    r4, 4
-       addi    r3, 4
-       addi    r7, 4
-       cmplti  r4, 4
-       jbf     .L1
-       br      .L_copy_by_byte
-
-.L_return:
-       rts
-
-.L_copy_by_byte:                      /* len less than 4 bytes */
-       cmpnei  r4, 0
-       jbf     .L_return
-.L4:
-       ldb     r1, (r3, 0)
-       stb     r1, (r7, 0)
-       addi    r3, 1
-       addi    r7, 1
-       decne   r4
-       jbt     .L4
-       rts
-
-/*
- * If dest is not aligned, just copying some bytes makes the dest align.
- * Afther that, we judge whether the src is aligned.
- */
-.L_dest_not_aligned:
-       mov     r5, r3
-       rsub    r5, r5, r7
-       abs     r5, r5
-       cmplt   r5, r4
-       bt      .L_copy_by_byte
-       mov     r5, r7
-       sub     r5, r3
-       cmphs   r5, r4
-       bf      .L_copy_by_byte
-       mov     r5, r6
-.L5:
-       ldb     r1, (r3, 0)              /* makes the dest align. */
-       stb     r1, (r7, 0)
-       addi    r5, 1
-       subi    r4, 1
-       addi    r3, 1
-       addi    r7, 1
-       cmpnei  r5, 4
-       jbt     .L5
-       cmplti  r4, 4
-       jbt     .L_copy_by_byte
-       mov     r6, r3                   /* judge whether the src is aligned. */
-       andi    r6, 3
-       cmpnei  r6, 0
-       jbf     .L0
-
-/* Judge the number of misaligned, 1, 2, 3? */
-.L_dest_aligned_but_src_not_aligned:
-       mov     r5, r3
-       rsub    r5, r5, r7
-       abs     r5, r5
-       cmplt   r5, r4
-       bt      .L_copy_by_byte
-       bclri   r3, 0
-       bclri   r3, 1
-       ldw     r1, (r3, 0)
-       addi    r3, 4
-       cmpnei  r6, 2
-       bf      .L_dest_aligned_but_src_not_aligned_2bytes
-       cmpnei  r6, 3
-       bf      .L_dest_aligned_but_src_not_aligned_3bytes
-
-.L_dest_aligned_but_src_not_aligned_1byte:
-       mov     r5, r7
-       sub     r5, r3
-       cmphs   r5, r4
-       bf      .L_copy_by_byte
-       cmplti  r4, 16
-       bf      .L11
-.L10:                                     /* If the len is less than 16 bytes */
-       GET_FRONT_BITS r1 8
-       mov     r5, r1
-       ldw     r6, (r3, 0)
-       mov     r1, r6
-       GET_AFTER_BITS r6 24
-       or      r5, r6
-       stw     r5, (r7, 0)
-       subi    r4, 4
-       addi    r3, 4
-       addi    r7, 4
-       cmplti  r4, 4
-       bf      .L10
-       subi    r3, 3
-       br      .L_copy_by_byte
-.L11:
-       subi    sp, 16
-       stw     r8, (sp, 0)
-       stw     r9, (sp, 4)
-       stw     r10, (sp, 8)
-       stw     r11, (sp, 12)
-.L12:
-       ldw     r5, (r3, 0)
-       ldw     r11, (r3, 4)
-       ldw     r8, (r3, 8)
-       ldw     r9, (r3, 12)
-
-       GET_FRONT_BITS r1 8               /* little or big endian? */
-       mov     r10, r5
-       GET_AFTER_BITS r5 24
-       or      r5, r1
-
-       GET_FRONT_BITS r10 8
-       mov     r1, r11
-       GET_AFTER_BITS r11 24
-       or      r11, r10
-
-       GET_FRONT_BITS r1 8
-       mov     r10, r8
-       GET_AFTER_BITS r8 24
-       or      r8, r1
-
-       GET_FRONT_BITS r10 8
-       mov     r1, r9
-       GET_AFTER_BITS r9 24
-       or      r9, r10
-
-       stw     r5, (r7, 0)
-       stw     r11, (r7, 4)
-       stw     r8, (r7, 8)
-       stw     r9, (r7, 12)
-       subi    r4, 16
-       addi    r3, 16
-       addi    r7, 16
-       cmplti  r4, 16
-       jbf     .L12
-       ldw     r8, (sp, 0)
-       ldw     r9, (sp, 4)
-       ldw     r10, (sp, 8)
-       ldw     r11, (sp, 12)
-       addi    sp , 16
-       cmplti  r4, 4
-       bf      .L10
-       subi    r3, 3
-       br      .L_copy_by_byte
-
-.L_dest_aligned_but_src_not_aligned_2bytes:
-       cmplti  r4, 16
-       bf      .L21
-.L20:
-       GET_FRONT_BITS r1 16
-       mov     r5, r1
-       ldw     r6, (r3, 0)
-       mov     r1, r6
-       GET_AFTER_BITS r6 16
-       or      r5, r6
-       stw     r5, (r7, 0)
-       subi    r4, 4
-       addi    r3, 4
-       addi    r7, 4
-       cmplti  r4, 4
-       bf      .L20
-       subi    r3, 2
-       br      .L_copy_by_byte
-       rts
-
-.L21:  /* n > 16 */
-       subi    sp, 16
-       stw     r8, (sp, 0)
-       stw     r9, (sp, 4)
-       stw     r10, (sp, 8)
-       stw     r11, (sp, 12)
-
-.L22:
-       ldw     r5, (r3, 0)
-       ldw     r11, (r3, 4)
-       ldw     r8, (r3, 8)
-       ldw     r9, (r3, 12)
-
-       GET_FRONT_BITS r1 16
-       mov     r10, r5
-       GET_AFTER_BITS r5 16
-       or      r5, r1
-
-       GET_FRONT_BITS r10 16
-       mov     r1, r11
-       GET_AFTER_BITS r11 16
-       or      r11, r10
-
-       GET_FRONT_BITS r1 16
-       mov     r10, r8
-       GET_AFTER_BITS r8 16
-       or      r8, r1
-
-       GET_FRONT_BITS r10 16
-       mov     r1, r9
-       GET_AFTER_BITS r9 16
-       or      r9, r10
-
-       stw     r5, (r7, 0)
-       stw     r11, (r7, 4)
-       stw     r8, (r7, 8)
-       stw     r9, (r7, 12)
-       subi    r4, 16
-       addi    r3, 16
-       addi    r7, 16
-       cmplti  r4, 16
-       jbf     .L22
-       ldw     r8, (sp, 0)
-       ldw     r9, (sp, 4)
-       ldw     r10, (sp, 8)
-       ldw     r11, (sp, 12)
-       addi    sp, 16
-       cmplti  r4, 4
-       bf      .L20
-       subi    r3, 2
-       br      .L_copy_by_byte
-
-
-.L_dest_aligned_but_src_not_aligned_3bytes:
-       cmplti  r4, 16
-       bf      .L31
-.L30:
-       GET_FRONT_BITS r1 24
-       mov     r5, r1
-       ldw     r6, (r3, 0)
-       mov     r1, r6
-       GET_AFTER_BITS r6 8
-       or      r5, r6
-       stw     r5, (r7, 0)
-       subi    r4, 4
-       addi    r3, 4
-       addi    r7, 4
-       cmplti  r4, 4
-       bf      .L30
-       subi    r3, 1
-       br      .L_copy_by_byte
-.L31:
-       subi    sp, 16
-       stw     r8, (sp, 0)
-       stw     r9, (sp, 4)
-       stw     r10, (sp, 8)
-       stw     r11, (sp, 12)
-.L32:
-       ldw     r5, (r3, 0)
-       ldw     r11, (r3, 4)
-       ldw     r8, (r3, 8)
-       ldw     r9, (r3, 12)
-
-       GET_FRONT_BITS r1 24
-       mov     r10, r5
-       GET_AFTER_BITS r5 8
-       or      r5, r1
-
-       GET_FRONT_BITS r10 24
-       mov     r1, r11
-       GET_AFTER_BITS r11 8
-       or      r11, r10
-
-       GET_FRONT_BITS r1 24
-       mov     r10, r8
-       GET_AFTER_BITS r8 8
-       or      r8, r1
-
-       GET_FRONT_BITS r10 24
-       mov     r1, r9
-       GET_AFTER_BITS r9 8
-       or      r9, r10
-
-       stw     r5, (r7, 0)
-       stw     r11, (r7, 4)
-       stw     r8, (r7, 8)
-       stw     r9, (r7, 12)
-       subi    r4, 16
-       addi    r3, 16
-       addi    r7, 16
-       cmplti  r4, 16
-       jbf     .L32
-       ldw     r8, (sp, 0)
-       ldw     r9, (sp, 4)
-       ldw     r10, (sp, 8)
-       ldw     r11, (sp, 12)
-       addi    sp, 16
-       cmplti  r4, 4
-       bf      .L30
-       subi    r3, 1
-       br      .L_copy_by_byte
diff --git a/arch/csky/abiv1/strksyms.c b/arch/csky/abiv1/strksyms.c
deleted file mode 100644 (file)
index c7ccbb2..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(memcpy);
index c561efa5533c75bc957795f8ed7286c19bf937d7..ea8005fe01a82d9aa24ccb11af606da075fae3fb 100644 (file)
@@ -2,9 +2,11 @@
 obj-y                          += cacheflush.o
 obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o
 obj-y                          += memcmp.o
+ifeq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
 obj-y                          += memcpy.o
 obj-y                          += memmove.o
 obj-y                          += memset.o
+endif
 obj-y                          += strcmp.o
 obj-y                          += strcpy.o
 obj-y                          += strlen.o
index 06da723d82024e683c2d9d50c814f178afb76334..8d1fd28c6cf901e49048c811734c22cb52ede1f1 100644 (file)
@@ -3,10 +3,12 @@
 
 #include <linux/module.h>
 
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memmove);
+#endif
+EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strlen);
index dbc9b1bd72f0b7d3c986194e9581ab9948810b7b..c3cfde28f8e603d38a9c7e1a84c0aafd39202967 100644 (file)
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 targets := Image zImage uImage
-targets += $(dtb-y)
 
 $(obj)/Image: vmlinux FORCE
        $(call if_changed,objcopy)
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
new file mode 100644 (file)
index 0000000..60406ef
--- /dev/null
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_ATOMIC_H
+#define __ASM_CSKY_ATOMIC_H
+
+#ifdef CONFIG_SMP
+#include <asm-generic/atomic64.h>
+
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+
+#define __atomic_acquire_fence()       __bar_brarw()
+
+#define __atomic_release_fence()       __bar_brwaw()
+
+static __always_inline int arch_atomic_read(const atomic_t *v)
+{
+       return READ_ONCE(v->counter);
+}
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
+{
+       WRITE_ONCE(v->counter, i);
+}
+
+#define ATOMIC_OP(op)                                                  \
+static __always_inline                                                 \
+void arch_atomic_##op(int i, atomic_t *v)                              \
+{                                                                      \
+       unsigned long tmp;                                              \
+       __asm__ __volatile__ (                                          \
+       "1:     ldex.w          %0, (%2)        \n"                     \
+       "       " #op "         %0, %1          \n"                     \
+       "       stex.w          %0, (%2)        \n"                     \
+       "       bez             %0, 1b          \n"                     \
+       : "=&r" (tmp)                                                   \
+       : "r" (i), "r" (&v->counter)                                    \
+       : "memory");                                                    \
+}
+
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
+ATOMIC_OP(and)
+ATOMIC_OP( or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_FETCH_OP(op)                                            \
+static __always_inline                                                 \
+int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)               \
+{                                                                      \
+       register int ret, tmp;                                          \
+       __asm__ __volatile__ (                                          \
+       "1:     ldex.w          %0, (%3) \n"                            \
+       "       mov             %1, %0   \n"                            \
+       "       " #op "         %0, %2   \n"                            \
+       "       stex.w          %0, (%3) \n"                            \
+       "       bez             %0, 1b   \n"                            \
+               : "=&r" (tmp), "=&r" (ret)                              \
+               : "r" (i), "r"(&v->counter)                             \
+               : "memory");                                            \
+       return ret;                                                     \
+}
+
+#define ATOMIC_OP_RETURN(op, c_op)                                     \
+static __always_inline                                                 \
+int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)              \
+{                                                                      \
+       return arch_atomic_fetch_##op##_relaxed(i, v) c_op i;           \
+}
+
+#define ATOMIC_OPS(op, c_op)                                           \
+       ATOMIC_FETCH_OP(op)                                             \
+       ATOMIC_OP_RETURN(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
+
+#define arch_atomic_fetch_add_relaxed  arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed  arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+
+#define ATOMIC_OPS(op)                                                 \
+       ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS( or)
+ATOMIC_OPS(xor)
+
+#define arch_atomic_fetch_and_relaxed  arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed   arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed  arch_atomic_fetch_xor_relaxed
+
+#undef ATOMIC_OPS
+
+#undef ATOMIC_FETCH_OP
+
+static __always_inline int
+arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+       int prev, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%3)        \n"
+               "       cmpne           %0, %4          \n"
+               "       bf              2f              \n"
+               "       mov             %1, %0          \n"
+               "       add             %1, %2          \n"
+               "       stex.w          %1, (%3)        \n"
+               "       bez             %1, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (prev), "=&r" (tmp)
+               : "r" (a), "r" (&v->counter), "r" (u)
+               : "memory");
+
+       return prev;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+static __always_inline bool
+arch_atomic_inc_unless_negative(atomic_t *v)
+{
+       int rc, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%2)        \n"
+               "       movi            %1, 0           \n"
+               "       blz             %0, 2f          \n"
+               "       movi            %1, 1           \n"
+               "       addi            %0, 1           \n"
+               "       stex.w          %0, (%2)        \n"
+               "       bez             %0, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (tmp), "=&r" (rc)
+               : "r" (&v->counter)
+               : "memory");
+
+       return tmp ? true : false;
+
+}
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+
+static __always_inline bool
+arch_atomic_dec_unless_positive(atomic_t *v)
+{
+       int rc, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%2)        \n"
+               "       movi            %1, 0           \n"
+               "       bhz             %0, 2f          \n"
+               "       movi            %1, 1           \n"
+               "       subi            %0, 1           \n"
+               "       stex.w          %0, (%2)        \n"
+               "       bez             %0, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (tmp), "=&r" (rc)
+               : "r" (&v->counter)
+               : "memory");
+
+       return tmp ? true : false;
+}
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+
+static __always_inline int
+arch_atomic_dec_if_positive(atomic_t *v)
+{
+       int dec, tmp;
+
+       __asm__ __volatile__ (
+               RELEASE_FENCE
+               "1:     ldex.w          %0, (%2)        \n"
+               "       subi            %1, %0, 1       \n"
+               "       blz             %1, 2f          \n"
+               "       stex.w          %1, (%2)        \n"
+               "       bez             %1, 1b          \n"
+               FULL_FENCE
+               "2:\n"
+               : "=&r" (dec), "=&r" (tmp)
+               : "r" (&v->counter)
+               : "memory");
+
+       return dec - 1;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+
+#define ATOMIC_OP()                                                    \
+static __always_inline                                                 \
+int arch_atomic_xchg_relaxed(atomic_t *v, int n)                       \
+{                                                                      \
+       return __xchg_relaxed(n, &(v->counter), 4);                     \
+}                                                                      \
+static __always_inline                                                 \
+int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n)             \
+{                                                                      \
+       return __cmpxchg_relaxed(&(v->counter), o, n, 4);               \
+}                                                                      \
+static __always_inline                                                 \
+int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n)             \
+{                                                                      \
+       return __cmpxchg_acquire(&(v->counter), o, n, 4);               \
+}                                                                      \
+static __always_inline                                                 \
+int arch_atomic_cmpxchg(atomic_t *v, int o, int n)                     \
+{                                                                      \
+       return __cmpxchg(&(v->counter), o, n, 4);                       \
+}
+
+#define ATOMIC_OPS()                                                   \
+       ATOMIC_OP()
+
+ATOMIC_OPS()
+
+#define arch_atomic_xchg_relaxed       arch_atomic_xchg_relaxed
+#define arch_atomic_cmpxchg_relaxed    arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire    arch_atomic_cmpxchg_acquire
+#define arch_atomic_cmpxchg            arch_atomic_cmpxchg
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP
+
+#else
+#include <asm-generic/atomic.h>
+#endif
+
+#endif /* __ASM_CSKY_ATOMIC_H */
index f4045dd53e176ce5ce5be83b7d5b0ca354508932..15de58b10aece7727bc73f7516b49e817f2fff9a 100644 (file)
  * bar.brar
  * bar.bwaw
  */
+#define FULL_FENCE             ".long 0x842fc000\n"
+#define ACQUIRE_FENCE          ".long 0x8427c000\n"
+#define RELEASE_FENCE          ".long 0x842ec000\n"
+
 #define __bar_brw()    asm volatile (".long 0x842cc000\n":::"memory")
 #define __bar_br()     asm volatile (".long 0x8424c000\n":::"memory")
 #define __bar_bw()     asm volatile (".long 0x8428c000\n":::"memory")
 #define __bar_arw()    asm volatile (".long 0x8423c000\n":::"memory")
 #define __bar_ar()     asm volatile (".long 0x8421c000\n":::"memory")
 #define __bar_aw()     asm volatile (".long 0x8422c000\n":::"memory")
-#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
-#define __bar_brarw()  asm volatile (".long 0x8427c000\n":::"memory")
+#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
+#define __bar_brarw()  asm volatile (ACQUIRE_FENCE:::"memory")
 #define __bar_bwarw()  asm volatile (".long 0x842bc000\n":::"memory")
 #define __bar_brwar()  asm volatile (".long 0x842dc000\n":::"memory")
-#define __bar_brwaw()  asm volatile (".long 0x842ec000\n":::"memory")
+#define __bar_brwaw()  asm volatile (RELEASE_FENCE:::"memory")
 #define __bar_brar()   asm volatile (".long 0x8425c000\n":::"memory")
 #define __bar_brar()   asm volatile (".long 0x8425c000\n":::"memory")
 #define __bar_bwaw()   asm volatile (".long 0x842ac000\n":::"memory")
@@ -56,7 +60,6 @@
 #define __smp_rmb()    __bar_brar()
 #define __smp_wmb()    __bar_bwaw()
 
-#define ACQUIRE_FENCE          ".long 0x8427c000\n"
 #define __smp_acquire_fence()  __bar_brarw()
 #define __smp_release_fence()  __bar_brwaw()
 
index d1bef11f8dc9710d2d36b47ec83f276bd6410656..5b8faccd65e477461e107c7f29a0766cbbf3cea3 100644 (file)
@@ -64,15 +64,71 @@ extern void __bad_xchg(void);
 #define arch_cmpxchg_relaxed(ptr, o, n) \
        (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 
-#define arch_cmpxchg(ptr, o, n)                                \
+#define __cmpxchg_acquire(ptr, old, new, size)                 \
 ({                                                             \
+       __typeof__(ptr) __ptr = (ptr);                          \
+       __typeof__(new) __new = (new);                          \
+       __typeof__(new) __tmp;                                  \
+       __typeof__(old) __old = (old);                          \
+       __typeof__(*(ptr)) __ret;                               \
+       switch (size) {                                         \
+       case 4:                                                 \
+               asm volatile (                                  \
+               "1:     ldex.w          %0, (%3) \n"            \
+               "       cmpne           %0, %4   \n"            \
+               "       bt              2f       \n"            \
+               "       mov             %1, %2   \n"            \
+               "       stex.w          %1, (%3) \n"            \
+               "       bez             %1, 1b   \n"            \
+               ACQUIRE_FENCE                                   \
+               "2:                              \n"            \
+                       : "=&r" (__ret), "=&r" (__tmp)          \
+                       : "r" (__new), "r"(__ptr), "r"(__old)   \
+                       :);                                     \
+               break;                                          \
+       default:                                                \
+               __bad_xchg();                                   \
+       }                                                       \
+       __ret;                                                  \
+})
+
+#define arch_cmpxchg_acquire(ptr, o, n) \
+       (__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
+
+#define __cmpxchg(ptr, old, new, size)                         \
+({                                                             \
+       __typeof__(ptr) __ptr = (ptr);                          \
+       __typeof__(new) __new = (new);                          \
+       __typeof__(new) __tmp;                                  \
+       __typeof__(old) __old = (old);                          \
        __typeof__(*(ptr)) __ret;                               \
-       __smp_release_fence();                                  \
-       __ret = arch_cmpxchg_relaxed(ptr, o, n);                \
-       __smp_acquire_fence();                                  \
+       switch (size) {                                         \
+       case 4:                                                 \
+               asm volatile (                                  \
+               RELEASE_FENCE                                   \
+               "1:     ldex.w          %0, (%3) \n"            \
+               "       cmpne           %0, %4   \n"            \
+               "       bt              2f       \n"            \
+               "       mov             %1, %2   \n"            \
+               "       stex.w          %1, (%3) \n"            \
+               "       bez             %1, 1b   \n"            \
+               FULL_FENCE                                      \
+               "2:                              \n"            \
+                       : "=&r" (__ret), "=&r" (__tmp)          \
+                       : "r" (__new), "r"(__ptr), "r"(__old)   \
+                       :);                                     \
+               break;                                          \
+       default:                                                \
+               __bad_xchg();                                   \
+       }                                                       \
        __ret;                                                  \
 })
 
+#define arch_cmpxchg(ptr, o, n)                                        \
+       (__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
+
+#define arch_cmpxchg_local(ptr, o, n)                          \
+       (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 #else
 #include <asm-generic/cmpxchg.h>
 #endif
index f82654053dc056da95f43c3402741cee9263c0b2..4725bb977b0f4ff5edc30fb46bf8e4ea265f145a 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <linux/pgtable.h>
 #include <linux/types.h>
-#include <linux/version.h>
 
 /*
  * I/O memory access primitives. Reads are ordered relative to any
 #define writel(v,c)            ({ wmb(); writel_relaxed((v),(c)); mb(); })
 #endif
 
+/*
+ * String version of I/O memory access operations.
+ */
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c,v,l)        __memset_io((c),(v),(l))
+#define memcpy_fromio(a,c,l)    __memcpy_fromio((a),(c),(l))
+#define memcpy_toio(c,a,l)      __memcpy_toio((c),(a),(l))
+
 /*
  * I/O memory mapping functions.
  */
index 6c0f36010ed0509a8b1a89dbc0a518a36146da2b..4eb41421ca5b467205a09e5b926252ce93b497d1 100644 (file)
@@ -2,7 +2,7 @@
 extra-y := head.o vmlinux.lds
 
 obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
-obj-y += power.o syscall.o syscall_table.o setup.o
+obj-y += power.o syscall.o syscall_table.o setup.o io.o
 obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
 obj-y += probes/
 
diff --git a/arch/csky/kernel/io.c b/arch/csky/kernel/io.c
new file mode 100644 (file)
index 0000000..5883f13
--- /dev/null
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ */
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+       while (count && !IS_ALIGNED((unsigned long)from, 4)) {
+               *(u8 *)to = __raw_readb(from);
+               from++;
+               to++;
+               count--;
+       }
+
+       while (count >= 4) {
+               *(u32 *)to = __raw_readl(from);
+               from += 4;
+               to += 4;
+               count -= 4;
+       }
+
+       while (count) {
+               *(u8 *)to = __raw_readb(from);
+               from++;
+               to++;
+               count--;
+       }
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ */
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+       while (count && !IS_ALIGNED((unsigned long)to, 4)) {
+               __raw_writeb(*(u8 *)from, to);
+               from++;
+               to++;
+               count--;
+       }
+
+       while (count >= 4) {
+               __raw_writel(*(u32 *)from, to);
+               from += 4;
+               to += 4;
+               count -= 4;
+       }
+
+       while (count) {
+               __raw_writeb(*(u8 *)from, to);
+               from++;
+               to++;
+               count--;
+       }
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ */
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+       u32 qc = (u8)c;
+
+       qc |= qc << 8;
+       qc |= qc << 16;
+
+       while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
+               __raw_writeb(c, dst);
+               dst++;
+               count--;
+       }
+
+       while (count >= 4) {
+               __raw_writel(qc, dst);
+               dst += 4;
+               count -= 4;
+       }
+
+       while (count) {
+               __raw_writeb(c, dst);
+               dst++;
+               count--;
+       }
+}
+EXPORT_SYMBOL(__memset_io);
index 6cd82d69c6551c1861f1c10ac180896624865c77..f11b3e5733448202e4c04fe131a84c898f8ce5f2 100644 (file)
@@ -68,7 +68,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
                        *location = rel[i].r_addend + sym->st_value;
                        break;
                case R_CSKY_PC32:
-                       /* Add the value, subtract its postition */
+                       /* Add the value, subtract its position */
                        *location = rel[i].r_addend + sym->st_value
                                                        - (uint32_t)location;
                        break;
index 42920f25e73c8875a9c58401cb8f922d52f5eb20..34ba684d5962b1067a4121f515b6761fea7c3aaf 100644 (file)
@@ -30,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv)
        struct csky_insn_patch *param = priv;
        unsigned int addr = (unsigned int)param->addr;
 
-       if (atomic_inc_return(&param->cpu_count) == 1) {
+       if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
                *(u16 *) addr = cpu_to_le16(param->opcode);
                dcache_wb_range(addr, addr + 2);
                atomic_inc(&param->cpu_count);
index 1a9e0961b2b5b8a6ade8b583d6cbc299815c82a4..2d31a12e46cfee0bcb202957a4c4b8eb3bcca223 100644 (file)
@@ -102,7 +102,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
        struct uprobe_task *utask = current->utask;
 
        /*
-        * Task has received a fatal signal, so reset back to probbed
+        * Task has received a fatal signal, so reset back to probed
         * address.
         */
        instruction_pointer_set(regs, utask->vaddr);
index 3d0ca22cd0e2e6075c4bcdcb5d69d6e55f5131f1..5de04707aa07bc4ddf9ee17e59bf679dc5721bae 100644 (file)
@@ -2,7 +2,6 @@
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/debug.h>
index 7fbdbb2c4d12148abd1aeb1336f29358ed4d72a1..d0ce6e2d7ab25439042e14e823eb32b281e0984a 100644 (file)
@@ -1,3 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 lib-y  := usercopy.o delay.o
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
+lib-y  += string.o
+endif
diff --git a/arch/csky/lib/string.c b/arch/csky/lib/string.c
new file mode 100644 (file)
index 0000000..d65626f
--- /dev/null
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * String functions optimized for hardware which doesn't
+ * handle unaligned memory accesses efficiently.
+ *
+ * Copyright (C) 2021 Matteo Croce
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+/* Minimum size for a word copy to be convenient */
+#define BYTES_LONG     sizeof(long)
+#define WORD_MASK      (BYTES_LONG - 1)
+#define MIN_THRESHOLD  (BYTES_LONG * 2)
+
+/* convenience union to avoid cast between different pointer types */
+union types {
+       u8 *as_u8;
+       unsigned long *as_ulong;
+       uintptr_t as_uptr;
+};
+
+union const_types {
+       const u8 *as_u8;
+       unsigned long *as_ulong;
+       uintptr_t as_uptr;
+};
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+       union const_types s = { .as_u8 = src };
+       union types d = { .as_u8 = dest };
+       int distance = 0;
+
+       if (count < MIN_THRESHOLD)
+               goto copy_remainder;
+
+       /* Copy a byte at time until destination is aligned. */
+       for (; d.as_uptr & WORD_MASK; count--)
+               *d.as_u8++ = *s.as_u8++;
+
+       distance = s.as_uptr & WORD_MASK;
+
+       if (distance) {
+               unsigned long last, next;
+
+               /*
+                * s is distance bytes ahead of d, and d just reached
+                * the alignment boundary. Move s backward to word align it
+                * and shift data to compensate for distance, in order to do
+                * word-by-word copy.
+                */
+               s.as_u8 -= distance;
+
+               next = s.as_ulong[0];
+               for (; count >= BYTES_LONG; count -= BYTES_LONG) {
+                       last = next;
+                       next = s.as_ulong[1];
+
+                       d.as_ulong[0] = last >> (distance * 8) |
+                               next << ((BYTES_LONG - distance) * 8);
+
+                       d.as_ulong++;
+                       s.as_ulong++;
+               }
+
+               /* Restore s with the original offset. */
+               s.as_u8 += distance;
+       } else {
+               /*
+                * If the source and dest lower bits are the same, do a simple
+                * 32/64 bit wide copy.
+                */
+               for (; count >= BYTES_LONG; count -= BYTES_LONG)
+                       *d.as_ulong++ = *s.as_ulong++;
+       }
+
+copy_remainder:
+       while (count--)
+               *d.as_u8++ = *s.as_u8++;
+
+       return dest;
+}
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Simply check if the buffer overlaps an call memcpy() in case,
+ * otherwise do a simple one byte at time backward copy.
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+       if (dest < src || src + count <= dest)
+               return memcpy(dest, src, count);
+
+       if (dest > src) {
+               const char *s = src + count;
+               char *tmp = dest + count;
+
+               while (count--)
+                       *--tmp = *--s;
+       }
+       return dest;
+}
+EXPORT_SYMBOL(memmove);
+
+void *memset(void *s, int c, size_t count)
+{
+       union types dest = { .as_u8 = s };
+
+       if (count >= MIN_THRESHOLD) {
+               unsigned long cu = (unsigned long)c;
+
+               /* Compose an ulong with 'c' repeated 4/8 times */
+               cu |= cu << 8;
+               cu |= cu << 16;
+               /* Suppress warning on 32 bit machines */
+               cu |= (cu << 16) << 16;
+
+               for (; count && dest.as_uptr & WORD_MASK; count--)
+                       *dest.as_u8++ = c;
+
+               /* Copy using the largest size allowed */
+               for (; count >= BYTES_LONG; count -= BYTES_LONG)
+                       *dest.as_ulong++ = cu;
+       }
+
+       /* copy the remainder */
+       while (count--)
+               *dest.as_u8++ = c;
+
+       return s;
+}
+EXPORT_SYMBOL(memset);
index c3a775a7e8f9d84c684cfd58e9cd54efe848931d..82447029feb485aef3554121e8d58b2840d12391 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <asm/cache.h>
 
 static inline void cache_op(phys_addr_t paddr, size_t size,
index 869a3ac6bf23a240497affb45f9dd878c3394d44..7ccc077a60bedd9c67d74fae235e4d8fc2cc5df2 100644 (file)
@@ -39,6 +39,7 @@ get_cycles (void)
        ret = ia64_getreg(_IA64_REG_AR_ITC);
        return ret;
 }
+#define get_cycles get_cycles
 
 extern void ia64_cpu_local_tick (void);
 extern unsigned long long ia64_native_sched_clock (void);
index 18abb35c26a153297380b231c948f1e842edabad..7762af9f6defec5e235665c0cf18701bd7d0227a 100644 (file)
@@ -17,3 +17,4 @@ obj-$(CONFIG_M68060)          += ifpsp060/
 obj-$(CONFIG_M68KFPU_EMU)      += math-emu/
 obj-$(CONFIG_M68000)           += 68000/
 obj-$(CONFIG_COLDFIRE)         += coldfire/
+obj-$(CONFIG_VIRT)             += virt/
index 16ea9a67723c09dcb820d933c10a146bb0d1631a..3d5da25c73b5af8aa70f5dbe745557bd6298b49b 100644 (file)
@@ -327,7 +327,7 @@ comment "Processor Specific Options"
 
 config M68KFPU_EMU
        bool "Math emulation support"
-       depends on MMU
+       depends on M68KCLASSIC && FPU
        help
          At some point in the future, this will cause floating-point math
          instructions to be emulated by the kernel on machines that lack a
index eeab4f3e6c197dbdfd14f67776e4f85ce4834cd7..188a8f8a0104ae545dfed5e268df88fd2765a5e7 100644 (file)
@@ -149,6 +149,23 @@ config SUN3
 
          If you don't want to compile a kernel exclusively for a Sun 3, say N.
 
+config VIRT
+       bool "Virtual M68k Machine support"
+       depends on MMU
+       select GENERIC_CLOCKEVENTS
+       select GOLDFISH
+       select GOLDFISH_TIMER
+       select GOLDFISH_TTY
+       select M68040
+       select MMU_MOTOROLA if MMU
+       select RTC_CLASS
+       select RTC_DRV_GOLDFISH
+       select TTY
+       select VIRTIO_MMIO
+       help
+         This options enable a pure virtual machine based on m68k,
+         VIRTIO MMIO devices and GOLDFISH interfaces (TTY, RTC, PIC)
+
 config PILOT
        bool
 
index 114aaa3f955a387d9fa553d089ba8cb1fd7cade2..c181030218bf29db12d3c4616a24a7ae1f2fff30 100644 (file)
@@ -42,7 +42,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -581,6 +580,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -613,7 +613,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -638,7 +637,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -659,6 +657,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 30b9d932b930bc2740b6dd9d31df28a4a0884d2c..40755648fb6ce7a70d30a82793ab0ec395b62f52 100644 (file)
@@ -38,7 +38,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -538,6 +537,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -594,7 +593,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -615,6 +613,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 51ff3180e69d605ff0661202b88f8c8aa0ff6c9c..be0d9155fc5b8a34f94363d692f69ffcc8780cac 100644 (file)
@@ -45,7 +45,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -558,6 +557,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -590,7 +590,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -615,7 +614,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -636,6 +634,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 7d95ca4366e4d30d864ad9f9d43f3f7d411b9314..9af0e2d0d153be633821262e1f5a0b5ec3eeafb5 100644 (file)
@@ -35,7 +35,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -530,6 +529,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -562,7 +562,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -586,7 +585,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -607,6 +605,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index e306e38136071962ac3e22cde47ce649083c1046..49341d66feb6a6b552fc3da55c4275f38afb1e51 100644 (file)
@@ -37,7 +37,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -540,6 +539,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -596,7 +595,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -617,6 +615,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 41316cf0244121aa0e190ba22a334bcacb31ba96..92b33d5ffab190f742d2b0a5fdc3b7902d7406ed 100644 (file)
@@ -36,7 +36,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -560,6 +559,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -592,7 +592,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -617,7 +616,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -638,6 +636,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 2fc3f0df6d434f2ff26c6f43cc0ecc025a8b96bf..6aaa947bc8491a58e6116dc20faf2e9e1d869963 100644 (file)
@@ -56,7 +56,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -646,6 +645,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -678,7 +678,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -703,7 +702,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -724,6 +722,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 9603f4396469d19871bf9a53b6f81e708482d9d3..b62d65e5993838aa0f1a4c4c252c36a5e5f67906 100644 (file)
@@ -34,7 +34,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -529,6 +528,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -561,7 +561,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -585,7 +584,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -606,6 +604,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index c9cabd3344df8537d8e8e1f7d3d8fcde396d4e0d..8ecf261487d4262e1864f478302b0365b525b1cc 100644 (file)
@@ -35,7 +35,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -530,6 +529,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -562,7 +562,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -586,7 +585,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -607,6 +605,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 5f994bf44fb8353ba4463f17ded3b8b1e9502759..7540d908897bca8310f6557c0fe0383e00b29175 100644 (file)
@@ -36,7 +36,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -547,6 +546,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -579,7 +579,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 CONFIG_GLOB_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
@@ -604,7 +603,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -625,6 +623,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 183e33f7d4a07b74689742606665743690465c14..832b45944617c6c3948e40b9dd818c68ec419c5d 100644 (file)
@@ -32,7 +32,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -529,6 +528,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -561,7 +561,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -584,7 +583,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -605,6 +603,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
index 8214263b9ab8dc4b63add5482ae2fde12b4ae866..9171b687e565e2f11a278fd0ede62e55b9574476 100644 (file)
@@ -32,7 +32,6 @@ CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_MQ_IOSCHED_KYBER=m
 CONFIG_IOSCHED_BFQ=m
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
 CONFIG_ZPOOL=m
@@ -528,6 +527,7 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_AES_TI=m
@@ -560,7 +560,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
-CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
@@ -584,7 +583,6 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
-CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
@@ -605,6 +603,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
-CONFIG_TEST_STACKINIT=m
 CONFIG_TEST_MEMINIT=m
 CONFIG_TEST_FREE_PAGES=m
diff --git a/arch/m68k/configs/virt_defconfig b/arch/m68k/configs/virt_defconfig
new file mode 100644 (file)
index 0000000..8059bd6
--- /dev/null
@@ -0,0 +1,68 @@
+CONFIG_LOCALVERSION="-virt"
+CONFIG_SYSVIPC=y
+CONFIG_CGROUPS=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_VIRT=y
+CONFIG_PROC_HARDWARE=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+CONFIG_LDM_DEBUG=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYSV68_PARTITION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_VIRTIO=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_EARLY_PRINTK=y
index e73ffa23c4f567efe0eccc063935158b74d62cf3..9bb888ab50094b22992591be3688103d5229c11a 100644 (file)
@@ -17,6 +17,7 @@ extern int mac_parse_bootinfo(const struct bi_record *record);
 extern int mvme147_parse_bootinfo(const struct bi_record *record);
 extern int mvme16x_parse_bootinfo(const struct bi_record *record);
 extern int q40_parse_bootinfo(const struct bi_record *record);
+extern int virt_parse_bootinfo(const struct bi_record *record);
 
 extern void config_amiga(void);
 extern void config_apollo(void);
@@ -29,5 +30,6 @@ extern void config_mvme16x(void);
 extern void config_q40(void);
 extern void config_sun3(void);
 extern void config_sun3x(void);
+extern void config_virt(void);
 
 #endif /* _M68K_CONFIG_H */
index aabe6420ead2a5996d233a00af822d5dff0eac87..aaeabc65e63c1c785c61c95628328c833d64a4b6 100644 (file)
@@ -8,6 +8,9 @@
 #include <asm/io_mm.h>
 #endif
 
+#define gf_ioread32 ioread32be
+#define gf_iowrite32 iowrite32be
+
 #include <asm-generic/io.h>
 
 #endif /* _M68K_IO_H */
index 91dd493791d7469fad2281e8bb5973128a3669ff..7829e955ca0446e5fe6c589bb76fa8e458892fa3 100644 (file)
@@ -12,7 +12,8 @@
  */
 #if defined(CONFIG_COLDFIRE)
 #define NR_IRQS 256
-#elif defined(CONFIG_VME) || defined(CONFIG_SUN3) || defined(CONFIG_SUN3X)
+#elif defined(CONFIG_VME) || defined(CONFIG_SUN3) || \
+      defined(CONFIG_SUN3X) || defined(CONFIG_VIRT)
 #define NR_IRQS 200
 #elif defined(CONFIG_ATARI)
 #define NR_IRQS 141
index 143ba7de9bda1ce18993e5ddb3626566e6483294..9b4e2fe2ac821671ee5615801cbe791fbee4a8f1 100644 (file)
@@ -80,6 +80,9 @@
 #elif defined(CONFIG_COLDFIRE)
 #define KMAP_START     0xe0000000
 #define KMAP_END       0xf0000000
+#elif defined(CONFIG_VIRT)
+#define        KMAP_START      0xdf000000
+#define        KMAP_END        0xff000000
 #else
 #define        KMAP_START      0xd0000000
 #define        KMAP_END        0xf0000000
@@ -92,6 +95,10 @@ extern unsigned long m68k_vmalloc_end;
 #elif defined(CONFIG_COLDFIRE)
 #define VMALLOC_START  0xd0000000
 #define VMALLOC_END    0xe0000000
+#elif defined(CONFIG_VIRT)
+#define VMALLOC_OFFSET PAGE_SIZE
+#define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
+#define VMALLOC_END     KMAP_START
 #else
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
index 80eb2396d01ebf61895f8471c33b0e9728f25bcd..3ba40bc1dfaa947113a3b7cc2c769736ed214054 100644 (file)
        ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; })
 
 #define rom_out_8(addr, b)     \
-       ({u8 __maybe_unused __w, __v = (b);  u32 _addr = ((u32) (addr)); \
+       (void)({u8 __maybe_unused __w, __v = (b);  u32 _addr = ((u32) (addr)); \
        __w = ((*(__force volatile u8 *)  ((_addr | 0x10000) + (__v<<1)))); })
 #define rom_out_be16(addr, w)  \
-       ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
+       (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
        __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \
        __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); })
 #define rom_out_le16(addr, w)  \
-       ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
+       (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
        __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \
        __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); })
 
index 8f2023f8c1c45727a6904d0567fb070e9f8b7364..2c99477aaf89aca3cca37f071f71c0ef0c5ffb31 100644 (file)
@@ -37,7 +37,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_ATARI) || defined(CONFIG_MAC) || defined(CONFIG_APOLLO) \
        || defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000)               \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                      \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                  \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_AMIGA (m68k_machtype == MACH_AMIGA)
 #else
 #  define MACH_AMIGA_ONLY
@@ -50,7 +51,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_APOLLO) \
        || defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000)               \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                      \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                  \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_ATARI (m68k_machtype == MACH_ATARI)
 #else
 #  define MACH_ATARI_ONLY
@@ -63,7 +65,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_ATARI) || defined(CONFIG_APOLLO) \
        || defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000)                 \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                        \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                    \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_MAC (m68k_machtype == MACH_MAC)
 #else
 #  define MACH_MAC_ONLY
@@ -84,7 +87,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000)              \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                     \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                 \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_APOLLO (m68k_machtype == MACH_APOLLO)
 #else
 #  define MACH_APOLLO_ONLY
@@ -97,7 +101,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_APOLLO) || defined(CONFIG_BVME6000)               \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                     \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME16x)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME16x)                 \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_MVME147 (m68k_machtype == MACH_MVME147)
 #else
 #  define MACH_MVME147_ONLY
@@ -110,7 +115,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_APOLLO) || defined(CONFIG_BVME6000)               \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                     \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                 \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_MVME16x (m68k_machtype == MACH_MVME16x)
 #else
 #  define MACH_MVME16x_ONLY
@@ -123,7 +129,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x)                \
        || defined(CONFIG_HP300) || defined(CONFIG_Q40)                     \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                 \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_BVME6000 (m68k_machtype == MACH_BVME6000)
 #else
 #  define MACH_BVME6000_ONLY
@@ -136,7 +143,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x) \
        || defined(CONFIG_BVME6000) || defined(CONFIG_Q40) \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147) \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_HP300 (m68k_machtype == MACH_HP300)
 #else
 #  define MACH_HP300_ONLY
@@ -149,7 +157,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x)                \
        || defined(CONFIG_BVME6000) || defined(CONFIG_HP300)                \
-       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)
+       || defined(CONFIG_SUN3X) || defined(CONFIG_MVME147)                 \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_Q40 (m68k_machtype == MACH_Q40)
 #else
 #  define MACH_Q40_ONLY
@@ -162,7 +171,8 @@ extern unsigned long m68k_machtype;
 #elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
        || defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x)                \
        || defined(CONFIG_BVME6000) || defined(CONFIG_HP300)                \
-       || defined(CONFIG_Q40) || defined(CONFIG_MVME147)
+       || defined(CONFIG_Q40) || defined(CONFIG_MVME147)                   \
+       || defined(CONFIG_VIRT)
 #  define MACH_IS_SUN3X (m68k_machtype == MACH_SUN3X)
 #else
 #  define CONFIG_SUN3X_ONLY
@@ -170,6 +180,20 @@ extern unsigned long m68k_machtype;
 #  define MACH_TYPE (MACH_SUN3X)
 #endif
 
+#if !defined(CONFIG_VIRT)
+#  define MACH_IS_VIRT (0)
+#elif defined(CONFIG_AMIGA) || defined(CONFIG_MAC) || defined(CONFIG_ATARI) \
+       || defined(CONFIG_APOLLO) || defined(CONFIG_MVME16x)                \
+       || defined(CONFIG_BVME6000) || defined(CONFIG_HP300)                \
+       || defined(CONFIG_Q40) || defined(CONFIG_SUN3X)                     \
+       || defined(CONFIG_MVME147)
+#  define MACH_IS_VIRT (m68k_machtype == MACH_VIRT)
+#else
+#  define MACH_VIRT_ONLY
+#  define MACH_IS_VIRT (1)
+#  define MACH_TYPE (MACH_VIRT)
+#endif
+
 #ifndef MACH_TYPE
 #  define MACH_TYPE (m68k_machtype)
 #endif
index 6a21d93582805540bdaba6b7699c157a9f73cc31..f4a7a340f4cae6da2e5738ca1c15746976f12cc1 100644 (file)
@@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void)
 {
        if (mach_random_get_entropy)
                return mach_random_get_entropy();
-       return 0;
+       return random_get_entropy_fallback();
 }
 #define random_get_entropy     random_get_entropy
 
diff --git a/arch/m68k/include/asm/virt.h b/arch/m68k/include/asm/virt.h
new file mode 100644 (file)
index 0000000..d3320c9
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VIRT_H
+#define __ASM_VIRT_H
+
+#define NUM_VIRT_SOURCES 200
+
+struct virt_booter_device_data {
+       u32 mmio;
+       u32 irq;
+};
+
+struct virt_booter_data {
+       u32 qemu_version;
+       struct virt_booter_device_data pic;
+       struct virt_booter_device_data rtc;
+       struct virt_booter_device_data tty;
+       struct virt_booter_device_data ctrl;
+       struct virt_booter_device_data virtio;
+};
+
+extern struct virt_booter_data virt_bi_data;
+
+extern void __init virt_init_IRQ(void);
+
+#endif
diff --git a/arch/m68k/include/uapi/asm/bootinfo-virt.h b/arch/m68k/include/uapi/asm/bootinfo-virt.h
new file mode 100644 (file)
index 0000000..e4db7e2
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * asm/bootinfo-virt.h -- Virtual-m68k-specific boot information definitions
+ */
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_VIRT_H
+#define _UAPI_ASM_M68K_BOOTINFO_VIRT_H
+
+#define BI_VIRT_QEMU_VERSION   0x8000
+#define BI_VIRT_GF_PIC_BASE    0x8001
+#define BI_VIRT_GF_RTC_BASE    0x8002
+#define BI_VIRT_GF_TTY_BASE    0x8003
+#define BI_VIRT_VIRTIO_BASE    0x8004
+#define BI_VIRT_CTRL_BASE      0x8005
+
+#define VIRT_BOOTI_VERSION     MK_BI_VERSION(2, 0)
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */
index 38d3140381fa8e4355c71456c71199c760351cf8..203d9cbf9630289364acab750b0dcb265570b3e2 100644 (file)
@@ -83,6 +83,7 @@ struct mem_info {
 #define MACH_SUN3X             11
 #define MACH_M54XX             12
 #define MACH_M5441X            13
+#define MACH_VIRT              14
 
 
     /*
index dbac7f8743fcc44df067d6d7137f283d14ec5866..c0833da6a2ca6b8f608267ad6aa41c363faccca6 100644 (file)
@@ -11,6 +11,7 @@ extra-$(CONFIG_VME)   := head.o
 extra-$(CONFIG_HP300)  := head.o
 extra-$(CONFIG_Q40)    := head.o
 extra-$(CONFIG_SUN3X)  := head.o
+extra-$(CONFIG_VIRT)   := head.o
 extra-$(CONFIG_SUN3)   := sun3-head.o
 extra-y                        += vmlinux.lds
 
index 9434fca68de5d018b9f301f9b8813f7b6826c667..18f278bdbd218ed36983feb0b46d2d56899fa839 100644 (file)
@@ -181,7 +181,7 @@ do_trace_entry:
        movel   #-ENOSYS,%sp@(PT_OFF_D0)| needed for strace
        subql   #4,%sp
        SAVE_SWITCH_STACK
-       jbsr    syscall_trace
+       jbsr    syscall_trace_enter
        RESTORE_SWITCH_STACK
        addql   #4,%sp
        movel   %sp@(PT_OFF_ORIG_D0),%d0
@@ -194,7 +194,7 @@ badsys:
 do_trace_exit:
        subql   #4,%sp
        SAVE_SWITCH_STACK
-       jbsr    syscall_trace
+       jbsr    syscall_trace_leave
        RESTORE_SWITCH_STACK
        addql   #4,%sp
        jra     .Lret_from_exception
index 493c95db0e512d2c2ee0ff4c675179d970a0b010..9e812d8606be830189339eb02cf2dafe1c42a92e 100644 (file)
 #include <asm/bootinfo-hp300.h>
 #include <asm/bootinfo-mac.h>
 #include <asm/bootinfo-q40.h>
+#include <asm/bootinfo-virt.h>
 #include <asm/bootinfo-vme.h>
 #include <asm/setup.h>
 #include <asm/entry.h>
@@ -534,6 +535,7 @@ func_define putn,1
 #define is_not_apollo(lab) cmpl &MACH_APOLLO,%pc@(m68k_machtype); jne lab
 #define is_not_q40(lab) cmpl &MACH_Q40,%pc@(m68k_machtype); jne lab
 #define is_not_sun3x(lab) cmpl &MACH_SUN3X,%pc@(m68k_machtype); jne lab
+#define is_not_virt(lab) cmpl &MACH_VIRT,%pc@(m68k_machtype); jne lab
 
 #define hasnt_leds(lab) cmpl &MACH_HP300,%pc@(m68k_machtype); \
                        jeq 42f; \
@@ -647,6 +649,14 @@ ENTRY(__start)
 L(test_notmac):
 #endif /* CONFIG_MAC */
 
+#ifdef CONFIG_VIRT
+       is_not_virt(L(test_notvirt))
+
+       get_bi_record BI_VIRT_GF_TTY_BASE
+       lea     %pc@(L(virt_gf_tty_base)),%a1
+       movel   %a0@,%a1@
+L(test_notvirt):
+#endif /* CONFIG_VIRT */
 
 /*
  * There are ultimately two pieces of information we want for all kinds of
@@ -1237,6 +1247,13 @@ L(mmu_init_not_mac):
 L(notsun3x):
 #endif
 
+#ifdef CONFIG_VIRT
+       is_not_virt(L(novirt))
+       mmu_map_tt      #1,#0xFF000000,#0x01000000,#_PAGE_NOCACHE_S
+       jbra    L(mmu_init_done)
+L(novirt):
+#endif
+
 #ifdef CONFIG_APOLLO
        is_not_apollo(L(notapollo))
 
@@ -3186,6 +3203,14 @@ func_start       serial_putc,%d0/%d1/%a0/%a1
 3:
 #endif
 
+#ifdef CONFIG_VIRT
+       is_not_virt(1f)
+
+       movel L(virt_gf_tty_base),%a1
+       movel %d0,%a1@(GF_PUT_CHAR)
+1:
+#endif
+
 L(serial_putc_done):
 func_return    serial_putc
 
@@ -3865,3 +3890,9 @@ q40_mem_cptr:
 L(q40_do_debug):
        .long   0
 #endif
+
+#if defined(CONFIG_VIRT)
+GF_PUT_CHAR = 0x00
+L(virt_gf_tty_base):
+       .long 0
+#endif /* CONFIG_VIRT */
index 6342ff4d2073f58e5d82959ae075c1379013e3b9..daebccdd2c09398d8ebe61b46b830f1fe0602c94 100644 (file)
@@ -270,12 +270,6 @@ out_eio:
        return -EIO;
 }
 
-asmlinkage void syscall_trace(void)
-{
-       ptrace_report_syscall(0);
-}
-
-#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
 asmlinkage int syscall_trace_enter(void)
 {
        int ret = 0;
@@ -290,4 +284,3 @@ asmlinkage void syscall_trace_leave(void)
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                ptrace_report_syscall_exit(task_pt_regs(current), 0);
 }
-#endif /* CONFIG_COLDFIRE */
index 8f94feed969c41ba8c66055ea907321574a892ca..78ab562beb315f87b9abb7aed46db92056c1074a 100644 (file)
@@ -181,6 +181,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
                                unknown = hp300_parse_bootinfo(record);
                        else if (MACH_IS_APOLLO)
                                unknown = apollo_parse_bootinfo(record);
+                       else if (MACH_IS_VIRT)
+                               unknown = virt_parse_bootinfo(record);
                        else
                                unknown = 1;
                }
@@ -311,6 +313,11 @@ void __init setup_arch(char **cmdline_p)
                cf_mmu_context_init();
                config_BSP(NULL, 0);
                break;
+#endif
+#ifdef CONFIG_VIRT
+       case MACH_VIRT:
+               config_virt();
+               break;
 #endif
        default:
                panic("No configuration setup");
index 49533f65958a64a9710fd3d3c44edef6b63865a5..b9f6908a31bc39f94c0418a298aed87470755170 100644 (file)
@@ -625,6 +625,7 @@ static inline void siginfo_build_tests(void)
        /* _sigfault._perf */
        BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10);
        BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x18);
 
        /* _sigpoll */
        BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x0c);
index d9033238d09703078c4f0f1fece8405405000f6a..f4a06492cd7a118a78d912dc82281214cbb44aa4 100644 (file)
@@ -243,7 +243,7 @@ fp_fdiv(struct fp_ext *dest, struct fp_ext *src)
                /* infinity / infinity = NaN (quiet, as always) */
                if (IS_INF(src))
                        fp_set_nan(dest);
-               /* infinity / anything else = infinity (with approprate sign) */
+               /* infinity / anything else = infinity (with appropriate sign) */
                return dest;
        }
        if (IS_INF(src)) {
index 20ddf71b43d05a2c42819acbce516d2bc5a9444a..7594a945732b11668616817cd3cb4156b00da7fe 100644 (file)
@@ -179,6 +179,12 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
                        return (void __iomem *)physaddr;
        }
 #endif
+#ifdef CONFIG_VIRT
+       if (MACH_IS_VIRT) {
+               if (physaddr >= 0xff000000 && cacheflag == IOMAP_NOCACHE_SER)
+                       return (void __iomem *)physaddr;
+       }
+#endif
 #ifdef CONFIG_COLDFIRE
        if (__cf_internalio(physaddr))
                return (void __iomem *) physaddr;
@@ -293,17 +299,20 @@ EXPORT_SYMBOL(__ioremap);
 void iounmap(void __iomem *addr)
 {
 #ifdef CONFIG_AMIGA
-       if ((!MACH_IS_AMIGA) ||
-           (((unsigned long)addr < 0x40000000) ||
-            ((unsigned long)addr > 0x60000000)))
-                       free_io_area((__force void *)addr);
-#else
+       if (MACH_IS_AMIGA &&
+           ((unsigned long)addr >= 0x40000000) &&
+           ((unsigned long)addr < 0x60000000))
+               return;
+#endif
+#ifdef CONFIG_VIRT
+       if (MACH_IS_VIRT && (unsigned long)addr >= 0xff000000)
+               return;
+#endif
 #ifdef CONFIG_COLDFIRE
        if (cf_internalio(addr))
                return;
 #endif
        free_io_area((__force void *)addr);
-#endif
 }
 EXPORT_SYMBOL(iounmap);
 
diff --git a/arch/m68k/virt/Makefile b/arch/m68k/virt/Makefile
new file mode 100644 (file)
index 0000000..54b9b28
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Linux arch/m68k/virt source directory
+#
+
+obj-y          := config.o ints.o platform.o
diff --git a/arch/m68k/virt/config.c b/arch/m68k/virt/config.c
new file mode 100644 (file)
index 0000000..68d29c8
--- /dev/null
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/serial_core.h>
+#include <clocksource/timer-goldfish.h>
+
+#include <asm/bootinfo.h>
+#include <asm/bootinfo-virt.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/virt.h>
+#include <asm/config.h>
+
+struct virt_booter_data virt_bi_data;
+
+#define VIRT_CTRL_REG_FEATURES 0x00
+#define VIRT_CTRL_REG_CMD      0x04
+
+static struct resource ctrlres;
+
+enum {
+       CMD_NOOP,
+       CMD_RESET,
+       CMD_HALT,
+       CMD_PANIC,
+};
+
+static void virt_get_model(char *str)
+{
+       /* str is 80 characters long */
+       sprintf(str, "QEMU Virtual M68K Machine (%u.%u.%u)",
+               (u8)(virt_bi_data.qemu_version >> 24),
+               (u8)(virt_bi_data.qemu_version >> 16),
+               (u8)(virt_bi_data.qemu_version >> 8));
+}
+
+static void virt_halt(void)
+{
+       void __iomem *base = (void __iomem *)virt_bi_data.ctrl.mmio;
+
+       iowrite32be(CMD_HALT, base + VIRT_CTRL_REG_CMD);
+       local_irq_disable();
+       while (1)
+               ;
+}
+
+static void virt_reset(void)
+{
+       void __iomem *base = (void __iomem *)virt_bi_data.ctrl.mmio;
+
+       iowrite32be(CMD_RESET, base + VIRT_CTRL_REG_CMD);
+       local_irq_disable();
+       while (1)
+               ;
+}
+
+/*
+ * Parse a virtual-m68k-specific record in the bootinfo
+ */
+
+int __init virt_parse_bootinfo(const struct bi_record *record)
+{
+       int unknown = 0;
+       const void *data = record->data;
+
+       switch (be16_to_cpu(record->tag)) {
+       case BI_VIRT_QEMU_VERSION:
+               virt_bi_data.qemu_version = be32_to_cpup(data);
+               break;
+       case BI_VIRT_GF_PIC_BASE:
+               virt_bi_data.pic.mmio = be32_to_cpup(data);
+               data += 4;
+               virt_bi_data.pic.irq = be32_to_cpup(data);
+               break;
+       case BI_VIRT_GF_RTC_BASE:
+               virt_bi_data.rtc.mmio = be32_to_cpup(data);
+               data += 4;
+               virt_bi_data.rtc.irq = be32_to_cpup(data);
+               break;
+       case BI_VIRT_GF_TTY_BASE:
+               virt_bi_data.tty.mmio = be32_to_cpup(data);
+               data += 4;
+               virt_bi_data.tty.irq = be32_to_cpup(data);
+               break;
+       case BI_VIRT_CTRL_BASE:
+               virt_bi_data.ctrl.mmio = be32_to_cpup(data);
+               data += 4;
+               virt_bi_data.ctrl.irq = be32_to_cpup(data);
+               break;
+       case BI_VIRT_VIRTIO_BASE:
+               virt_bi_data.virtio.mmio = be32_to_cpup(data);
+               data += 4;
+               virt_bi_data.virtio.irq = be32_to_cpup(data);
+               break;
+       default:
+               unknown = 1;
+               break;
+       }
+       return unknown;
+}
+
+static void __init virt_sched_init(void)
+{
+       goldfish_timer_init(virt_bi_data.rtc.irq,
+                           (void __iomem *)virt_bi_data.rtc.mmio);
+}
+
+void __init config_virt(void)
+{
+       char earlycon[24];
+
+       snprintf(earlycon, sizeof(earlycon), "early_gf_tty,0x%08x",
+                virt_bi_data.tty.mmio);
+       setup_earlycon(earlycon);
+
+       ctrlres = (struct resource)
+                  DEFINE_RES_MEM_NAMED(virt_bi_data.ctrl.mmio, 0x100,
+                                       "virtctrl");
+
+       if (request_resource(&iomem_resource, &ctrlres)) {
+               pr_err("Cannot allocate virt controller resource\n");
+               return;
+       }
+
+       mach_init_IRQ = virt_init_IRQ;
+       mach_sched_init = virt_sched_init;
+       mach_get_model = virt_get_model;
+       mach_reset = virt_reset;
+       mach_halt = virt_halt;
+       mach_power_off = virt_halt;
+}
diff --git a/arch/m68k/virt/ints.c b/arch/m68k/virt/ints.c
new file mode 100644 (file)
index 0000000..95818f9
--- /dev/null
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+#include <asm/hwtest.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/virt.h>
+
+#define GFPIC_REG_IRQ_PENDING           0x04
+#define GFPIC_REG_IRQ_DISABLE_ALL       0x08
+#define GFPIC_REG_IRQ_DISABLE           0x0c
+#define GFPIC_REG_IRQ_ENABLE            0x10
+
+extern void show_registers(struct pt_regs *regs);
+
+static struct resource picres[6];
+static const char *picname[6] = {
+       "goldfish_pic.0",
+       "goldfish_pic.1",
+       "goldfish_pic.2",
+       "goldfish_pic.3",
+       "goldfish_pic.4",
+       "goldfish_pic.5"
+};
+
+/*
+ * 6 goldfish-pic for CPU IRQ #1 to IRQ #6
+ * CPU IRQ #1 -> PIC #1
+ *               IRQ #1 to IRQ #31 -> unused
+ *               IRQ #32 -> goldfish-tty
+ * CPU IRQ #2 -> PIC #2
+ *               IRQ #1 to IRQ #32 -> virtio-mmio from 1 to 32
+ * CPU IRQ #3 -> PIC #3
+ *               IRQ #1 to IRQ #32 -> virtio-mmio from 33 to 64
+ * CPU IRQ #4 -> PIC #4
+ *               IRQ #1 to IRQ #32 -> virtio-mmio from 65 to 96
+ * CPU IRQ #5 -> PIC #5
+ *               IRQ #1 to IRQ #32 -> virtio-mmio from 97 to 128
+ * CPU IRQ #6 -> PIC #6
+ *               IRQ #1 -> goldfish-timer
+ *               IRQ #2 -> goldfish-rtc
+ *               IRQ #3 to IRQ #32 -> unused
+ * CPU IRQ #7 -> NMI
+ */
+
+static u32 gfpic_read(int pic, int reg)
+{
+       void __iomem *base = (void __iomem *)(virt_bi_data.pic.mmio +
+                                             pic * 0x1000);
+
+       return ioread32be(base + reg);
+}
+
+static void gfpic_write(u32 value, int pic, int reg)
+{
+       void __iomem *base = (void __iomem *)(virt_bi_data.pic.mmio +
+                                             pic * 0x1000);
+
+       iowrite32be(value, base + reg);
+}
+
+#define GF_PIC(irq) ((irq - IRQ_USER) / 32)
+#define GF_IRQ(irq) ((irq - IRQ_USER) % 32)
+
+static void virt_irq_enable(struct irq_data *data)
+{
+       gfpic_write(BIT(GF_IRQ(data->irq)), GF_PIC(data->irq),
+                   GFPIC_REG_IRQ_ENABLE);
+}
+
+static void virt_irq_disable(struct irq_data *data)
+{
+       gfpic_write(BIT(GF_IRQ(data->irq)), GF_PIC(data->irq),
+                   GFPIC_REG_IRQ_DISABLE);
+}
+
+static unsigned int virt_irq_startup(struct irq_data *data)
+{
+       virt_irq_enable(data);
+       return 0;
+}
+
+static irqreturn_t virt_nmi_handler(int irq, void *dev_id)
+{
+       static int in_nmi;
+
+       if (READ_ONCE(in_nmi))
+               return IRQ_HANDLED;
+       WRITE_ONCE(in_nmi, 1);
+
+       pr_warn("Non-Maskable Interrupt\n");
+       show_registers(get_irq_regs());
+
+       WRITE_ONCE(in_nmi, 0);
+       return IRQ_HANDLED;
+}
+
+static struct irq_chip virt_irq_chip = {
+       .name           = "virt",
+       .irq_enable     = virt_irq_enable,
+       .irq_disable    = virt_irq_disable,
+       .irq_startup    = virt_irq_startup,
+       .irq_shutdown   = virt_irq_disable,
+};
+
+static void goldfish_pic_irq(struct irq_desc *desc)
+{
+       u32 irq_pending;
+       unsigned int irq_num;
+       unsigned int pic = desc->irq_data.irq - 1;
+
+       irq_pending = gfpic_read(pic, GFPIC_REG_IRQ_PENDING);
+       irq_num = IRQ_USER + pic * 32;
+
+       do {
+               if (irq_pending & 1)
+                       generic_handle_irq(irq_num);
+               ++irq_num;
+               irq_pending >>= 1;
+       } while (irq_pending);
+}
+
+void __init virt_init_IRQ(void)
+{
+       unsigned int i;
+
+       m68k_setup_irq_controller(&virt_irq_chip, handle_simple_irq, IRQ_USER,
+                                 NUM_VIRT_SOURCES - IRQ_USER);
+
+       for (i = 0; i < 6; i++) {
+
+               picres[i] = (struct resource)
+                   DEFINE_RES_MEM_NAMED(virt_bi_data.pic.mmio + i * 0x1000,
+                                        0x1000, picname[i]);
+               if (request_resource(&iomem_resource, &picres[i])) {
+                       pr_err("Cannot allocate %s resource\n", picname[i]);
+                       return;
+               }
+
+               irq_set_chained_handler(virt_bi_data.pic.irq + i,
+                                       goldfish_pic_irq);
+       }
+
+       if (request_irq(IRQ_AUTO_7, virt_nmi_handler, 0, "NMI",
+                       virt_nmi_handler))
+               pr_err("Couldn't register NMI\n");
+}
diff --git a/arch/m68k/virt/platform.c b/arch/m68k/virt/platform.c
new file mode 100644 (file)
index 0000000..cb820f1
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/memblock.h>
+#include <asm/virt.h>
+#include <asm/irq.h>
+
+#define VIRTIO_BUS_NB  128
+
+static int __init virt_virtio_init(unsigned int id)
+{
+       const struct resource res[] = {
+               DEFINE_RES_MEM(virt_bi_data.virtio.mmio + id * 0x200, 0x200),
+               DEFINE_RES_IRQ(virt_bi_data.virtio.irq + id),
+       };
+       struct platform_device *pdev;
+
+       pdev = platform_device_register_simple("virtio-mmio", id,
+                                              res, ARRAY_SIZE(res));
+       if (IS_ERR(pdev))
+               return PTR_ERR(pdev);
+
+       return 0;
+}
+
+static int __init virt_platform_init(void)
+{
+       const struct resource goldfish_tty_res[] = {
+               DEFINE_RES_MEM(virt_bi_data.tty.mmio, 1),
+               DEFINE_RES_IRQ(virt_bi_data.tty.irq),
+       };
+       /* this is the second gf-rtc, the first one is used by the scheduler */
+       const struct resource goldfish_rtc_res[] = {
+               DEFINE_RES_MEM(virt_bi_data.rtc.mmio + 0x1000, 0x1000),
+               DEFINE_RES_IRQ(virt_bi_data.rtc.irq + 1),
+       };
+       struct platform_device *pdev;
+       unsigned int i;
+
+       if (!MACH_IS_VIRT)
+               return -ENODEV;
+
+       /* We need this to have DMA'able memory provided to goldfish-tty */
+       min_low_pfn = 0;
+
+       pdev = platform_device_register_simple("goldfish_tty",
+                                              PLATFORM_DEVID_NONE,
+                                              goldfish_tty_res,
+                                              ARRAY_SIZE(goldfish_tty_res));
+       if (IS_ERR(pdev))
+               return PTR_ERR(pdev);
+
+       pdev = platform_device_register_simple("goldfish_rtc",
+                                              PLATFORM_DEVID_NONE,
+                                              goldfish_rtc_res,
+                                              ARRAY_SIZE(goldfish_rtc_res));
+       if (IS_ERR(pdev))
+               return PTR_ERR(pdev);
+
+       for (i = 0; i < VIRTIO_BUS_NB; i++) {
+               int err;
+
+               err = virt_virtio_init(i);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+arch_initcall(virt_platform_init);
index 8026baf46e729262065571c785635bd6ed876795..2e107886f97ac73d38c6ad70c5fb56e7d2a9cd73 100644 (file)
@@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void)
        else
                return 0;       /* no usable counter */
 }
+#define get_cycles get_cycles
 
 /*
  * Like get_cycles - but where c0_count is not available we desperately
  * use c0_random in an attempt to get at least a little bit of entropy.
- *
- * R6000 and R6000A neither have a count register nor a random register.
- * That leaves no entropy source in the CPU itself.
  */
 static inline unsigned long random_get_entropy(void)
 {
-       unsigned int prid = read_c0_prid();
-       unsigned int imp = prid & PRID_IMP_MASK;
+       unsigned int c0_random;
 
-       if (can_use_mips_counter(prid))
+       if (can_use_mips_counter(read_c0_prid()))
                return read_c0_count();
-       else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
-               return read_c0_random();
+
+       if (cpu_has_3kex)
+               c0_random = (read_c0_random() >> 8) & 0x3f;
        else
-               return 0;       /* no usable register */
+               c0_random = read_c0_random() & 0x3f;
+       return (random_get_entropy_fallback() << 6) | (0x3f - c0_random);
 }
 #define random_get_entropy random_get_entropy
 
index a769f871b28d91bef6f14157f0249b4cbcf8a67e..40a1adc9bd03e9a62c05911bf30befa10b75b0e2 100644 (file)
@@ -8,5 +8,8 @@
 typedef unsigned long cycles_t;
 
 extern cycles_t get_cycles(void);
+#define get_cycles get_cycles
+
+#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
 
 #endif
index d52b4e536e3f905e2e20ffa7060d474f4d03ee44..5487fa93dd9becb2ccfdc589fd20c2e17310dcf2 100644 (file)
@@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void)
 {
        return mfspr(SPR_TTCR);
 }
+#define get_cycles get_cycles
 
 /* This isn't really used any more */
 #define CLOCK_TICK_RATE 1000
index 15f1b38dfe03b70d6c6d129081b3ea3469017962..2fa6cefa62ca756d9aec84cb1ada88e484474a43 100644 (file)
@@ -521,6 +521,15 @@ _start:
        l.ori   r3,r0,0x1
        l.mtspr r0,r3,SPR_SR
 
+       /*
+        * Start the TTCR as early as possible, so that the RNG can make use of
+        * measurements of boot time from the earliest opportunity. Especially
+        * important is that the TTCR does not return zero by the time we reach
+        * random_init().
+        */
+       l.movhi r3,hi(SPR_TTMR_CR)
+       l.mtspr r0,r3,SPR_TTMR
+
        CLEAR_GPR(r1)
        CLEAR_GPR(r2)
        CLEAR_GPR(r3)
index e8b4a03343d393f6baa6f44e1a1b05b4d0cdca66..8d03b3b26229e7d057076999fc02e77145494b1e 100644 (file)
@@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page);
        flush_kernel_icache_range_asm(s,e);             \
 } while (0)
 
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { \
-       flush_cache_page(vma, vaddr, page_to_pfn(page)); \
-       memcpy(dst, src, len); \
-       flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \
-} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-do { \
-       flush_cache_page(vma, vaddr, page_to_pfn(page)); \
-       memcpy(dst, src, len); \
-} while (0)
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len);
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len);
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
+               unsigned long pfn);
 void flush_cache_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end);
 
@@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma,
 void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 
 #define ARCH_HAS_FLUSH_ANON_PAGE
-static inline void
-flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
-{
-       if (PageAnon(page)) {
-               flush_tlb_page(vma, vmaddr);
-               preempt_disable();
-               flush_dcache_page_asm(page_to_phys(page), vmaddr);
-               preempt_enable();
-       }
-}
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
 
 #define ARCH_HAS_FLUSH_ON_KUNMAP
 static inline void kunmap_flush_on_unmap(void *addr)
index 0561568f7b4898d4959b97576f25ca08cde37ebe..6faaaa3ebe9b8f84fab3f34471a6d049b34291fa 100644 (file)
 #define copy_page(to, from)    copy_page_asm((void *)(to), (void *)(from))
 
 struct page;
+struct vm_area_struct;
 
 void clear_page_asm(void *page);
 void copy_page_asm(void *to, void *from);
 #define clear_user_page(vto, vaddr, page) clear_page_asm(vto)
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-                       struct page *pg);
+void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr,
+               struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
 
 /*
  * These are used to make use of C type-checking..
index 06b510f8172e3f429c90e73410d1b574c59345a8..b4622cb06a75e36a45e368d5d7d82bb787a553a9 100644 (file)
 
 typedef unsigned long cycles_t;
 
-static inline cycles_t get_cycles (void)
+static inline cycles_t get_cycles(void)
 {
        return mfctl(16);
 }
+#define get_cycles get_cycles
 
 #endif
index e7911225a4f898e023f6879ee7bc9706d4f1a5ac..0fd04073d4b685e5fea2b2ef8389f722db5c6793 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/shmparam.h>
+#include <asm/mmu_context.h>
 
 int split_tlb __ro_after_init;
 int dcache_stride __ro_after_init;
@@ -91,7 +92,7 @@ static inline void flush_data_cache(void)
 }
 
 
-/* Virtual address of pfn.  */
+/* Kernel virtual address of pfn.  */
 #define pfn_va(pfn)    __va(PFN_PHYS(pfn))
 
 void
@@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m)
                cache_info.ic_size/1024 );
        if (cache_info.dc_loop != 1)
                snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop);
-       seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s)\n",
+       seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s, alias=%d)\n",
                cache_info.dc_size/1024,
                (cache_info.dc_conf.cc_wt ? "WT":"WB"),
                (cache_info.dc_conf.cc_sh ? ", shared I/D":""),
-               ((cache_info.dc_loop == 1) ? "direct mapped" : buf));
+               ((cache_info.dc_loop == 1) ? "direct mapped" : buf),
+               cache_info.dc_conf.cc_alias
+       );
        seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n",
                cache_info.it_size,
                cache_info.dt_size,
@@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
        preempt_enable();
 }
 
-static inline void
-__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
-                  unsigned long physaddr)
+static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
-       if (!static_branch_likely(&parisc_has_cache))
-               return;
+       unsigned long flags, space, pgd, prot;
+#ifdef CONFIG_TLB_PTLOCK
+       unsigned long pgd_lock;
+#endif
+
+       vmaddr &= PAGE_MASK;
+
        preempt_disable();
-       purge_dcache_page_asm(physaddr, vmaddr);
+
+       /* Set context for flush */
+       local_irq_save(flags);
+       prot = mfctl(8);
+       space = mfsp(SR_USER);
+       pgd = mfctl(25);
+#ifdef CONFIG_TLB_PTLOCK
+       pgd_lock = mfctl(28);
+#endif
+       switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
+       local_irq_restore(flags);
+
+       flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
        if (vma->vm_flags & VM_EXEC)
-               flush_icache_page_asm(physaddr, vmaddr);
+               flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
+       flush_tlb_page(vma, vmaddr);
+
+       /* Restore previous context */
+       local_irq_save(flags);
+#ifdef CONFIG_TLB_PTLOCK
+       mtctl(pgd_lock, 28);
+#endif
+       mtctl(pgd, 25);
+       mtsp(space, SR_USER);
+       mtctl(prot, 8);
+       local_irq_restore(flags);
+
        preempt_enable();
 }
 
+static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
+{
+       pte_t *ptep = NULL;
+       pgd_t *pgd = mm->pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       if (!pgd_none(*pgd)) {
+               p4d = p4d_offset(pgd, addr);
+               if (!p4d_none(*p4d)) {
+                       pud = pud_offset(p4d, addr);
+                       if (!pud_none(*pud)) {
+                               pmd = pmd_offset(pud, addr);
+                               if (!pmd_none(*pmd))
+                                       ptep = pte_offset_map(pmd, addr);
+                       }
+               }
+       }
+       return ptep;
+}
+
+static inline bool pte_needs_flush(pte_t pte)
+{
+       return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE))
+               == (_PAGE_PRESENT | _PAGE_ACCESSED);
+}
+
 void flush_dcache_page(struct page *page)
 {
        struct address_space *mapping = page_mapping_file(page);
        struct vm_area_struct *mpnt;
        unsigned long offset;
        unsigned long addr, old_addr = 0;
+       unsigned long count = 0;
        pgoff_t pgoff;
 
        if (mapping && !mapping_mapped(mapping)) {
@@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page)
 
        pgoff = page->index;
 
-       /* We have carefully arranged in arch_get_unmapped_area() that
+       /*
+        * We have carefully arranged in arch_get_unmapped_area() that
         * *any* mappings of a file are always congruently mapped (whether
         * declared as MAP_PRIVATE or MAP_SHARED), so we only need
-        * to flush one address here for them all to become coherent */
-
+        * to flush one address here for them all to become coherent
+        * on machines that support equivalent aliasing
+        */
        flush_dcache_mmap_lock(mapping);
        vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
                offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
                addr = mpnt->vm_start + offset;
+               if (parisc_requires_coherency()) {
+                       pte_t *ptep;
 
-               /* The TLB is the engine of coherence on parisc: The
-                * CPU is entitled to speculate any page with a TLB
-                * mapping, so here we kill the mapping then flush the
-                * page along a special flush only alias mapping.
-                * This guarantees that the page is no-longer in the
-                * cache for any process and nor may it be
-                * speculatively read in (until the user or kernel
-                * specifically accesses it, of course) */
-
-               flush_tlb_page(mpnt, addr);
-               if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
-                                     != (addr & (SHM_COLOUR - 1))) {
-                       __flush_cache_page(mpnt, addr, page_to_phys(page));
-                       if (parisc_requires_coherency() && old_addr)
-                               printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file);
-                       old_addr = addr;
+                       ptep = get_ptep(mpnt->vm_mm, addr);
+                       if (ptep && pte_needs_flush(*ptep))
+                               flush_user_cache_page(mpnt, addr);
+               } else {
+                       /*
+                        * The TLB is the engine of coherence on parisc:
+                        * The CPU is entitled to speculate any page
+                        * with a TLB mapping, so here we kill the
+                        * mapping then flush the page along a special
+                        * flush only alias mapping. This guarantees that
+                        * the page is no-longer in the cache for any
+                        * process and nor may it be speculatively read
+                        * in (until the user or kernel specifically
+                        * accesses it, of course)
+                        */
+                       flush_tlb_page(mpnt, addr);
+                       if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+                                       != (addr & (SHM_COLOUR - 1))) {
+                               __flush_cache_page(mpnt, addr, page_to_phys(page));
+                               /*
+                                * Software is allowed to have any number
+                                * of private mappings to a page.
+                                */
+                               if (!(mpnt->vm_flags & VM_SHARED))
+                                       continue;
+                               if (old_addr)
+                                       pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
+                                               old_addr, addr, mpnt->vm_file);
+                               old_addr = addr;
+                       }
                }
+               WARN_ON(++count == 4096);
        }
        flush_dcache_mmap_unlock(mapping);
 }
@@ -403,7 +481,7 @@ void __init parisc_setup_cache_timing(void)
 {
        unsigned long rangetime, alltime;
        unsigned long size;
-       unsigned long threshold;
+       unsigned long threshold, threshold2;
 
        alltime = mfctl(16);
        flush_data_cache();
@@ -417,11 +495,16 @@ void __init parisc_setup_cache_timing(void)
        printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
                alltime, size, rangetime);
 
-       threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
-       if (threshold > cache_info.dc_size)
-               threshold = cache_info.dc_size;
-       if (threshold)
-               parisc_cache_flush_threshold = threshold;
+       threshold = L1_CACHE_ALIGN((unsigned long)((uint64_t)size * alltime / rangetime));
+       pr_info("Calculated flush threshold is %lu KiB\n",
+               threshold/1024);
+
+       /*
+        * The threshold computed above isn't very reliable. The following
+        * heuristic works reasonably well on c8000/rp3440.
+        */
+       threshold2 = cache_info.dc_size * num_online_cpus();
+       parisc_cache_flush_threshold = threshold2;
        printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
                parisc_cache_flush_threshold/1024);
 
@@ -477,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr)
 }
 EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
 
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-       struct page *pg)
+static void flush_cache_page_if_present(struct vm_area_struct *vma,
+       unsigned long vmaddr, unsigned long pfn)
 {
-       /* Copy using kernel mapping.  No coherency is needed (all in
-         kunmap) for the `to' page.  However, the `from' page needs to
-         be flushed through a mapping equivalent to the user mapping
-         before it can be accessed through the kernel mapping. */
-       preempt_disable();
-       flush_dcache_page_asm(__pa(vfrom), vaddr);
-       copy_page_asm(vto, vfrom);
-       preempt_enable();
+       pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
+
+       /*
+        * The pte check is racy and sometimes the flush will trigger
+        * a non-access TLB miss. Hopefully, the page has already been
+        * flushed.
+        */
+       if (ptep && pte_needs_flush(*ptep))
+               flush_cache_page(vma, vmaddr, pfn);
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+       unsigned long vaddr, struct vm_area_struct *vma)
+{
+       void *kto, *kfrom;
+
+       kfrom = kmap_local_page(from);
+       kto = kmap_local_page(to);
+       flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
+       copy_page_asm(kto, kfrom);
+       kunmap_local(kto);
+       kunmap_local(kfrom);
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len)
+{
+       flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+       memcpy(dst, src, len);
+       flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len)
+{
+       flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+       memcpy(dst, src, len);
 }
-EXPORT_SYMBOL(copy_user_page);
 
 /* __flush_tlb_range()
  *
@@ -520,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
        return 0;
 }
 
-static inline unsigned long mm_total_size(struct mm_struct *mm)
+static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-       struct vm_area_struct *vma;
-       unsigned long usize = 0;
-
-       for (vma = mm->mmap; vma; vma = vma->vm_next)
-               usize += vma->vm_end - vma->vm_start;
-       return usize;
-}
-
-static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
-{
-       pte_t *ptep = NULL;
+       unsigned long addr, pfn;
+       pte_t *ptep;
 
-       if (!pgd_none(*pgd)) {
-               p4d_t *p4d = p4d_offset(pgd, addr);
-               if (!p4d_none(*p4d)) {
-                       pud_t *pud = pud_offset(p4d, addr);
-                       if (!pud_none(*pud)) {
-                               pmd_t *pmd = pmd_offset(pud, addr);
-                               if (!pmd_none(*pmd))
-                                       ptep = pte_offset_map(pmd, addr);
+       for (addr = start; addr < end; addr += PAGE_SIZE) {
+               /*
+                * The vma can contain pages that aren't present. Although
+                * the pte search is expensive, we need the pte to find the
+                * page pfn and to check whether the page should be flushed.
+                */
+               ptep = get_ptep(vma->vm_mm, addr);
+               if (ptep && pte_needs_flush(*ptep)) {
+                       if (parisc_requires_coherency()) {
+                               flush_user_cache_page(vma, addr);
+                       } else {
+                               pfn = pte_pfn(*ptep);
+                               if (WARN_ON(!pfn_valid(pfn)))
+                                       return;
+                               __flush_cache_page(vma, addr, PFN_PHYS(pfn));
                        }
                }
        }
-       return ptep;
 }
 
-static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
-                             unsigned long start, unsigned long end)
+static inline unsigned long mm_total_size(struct mm_struct *mm)
 {
-       unsigned long addr, pfn;
-       pte_t *ptep;
+       struct vm_area_struct *vma;
+       unsigned long usize = 0;
 
-       for (addr = start; addr < end; addr += PAGE_SIZE) {
-               ptep = get_ptep(mm->pgd, addr);
-               if (ptep) {
-                       pfn = pte_pfn(*ptep);
-                       flush_cache_page(vma, addr, pfn);
-               }
-       }
+       for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
+               usize += vma->vm_end - vma->vm_start;
+       return usize;
 }
 
 void flush_cache_mm(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
 
-       /* Flushing the whole cache on each cpu takes forever on
-          rp3440, etc.  So, avoid it if the mm isn't too big.  */
-       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
-           mm_total_size(mm) >= parisc_cache_flush_threshold) {
-               if (mm->context.space_id)
-                       flush_tlb_all();
+       /*
+        * Flushing the whole cache on each cpu takes forever on
+        * rp3440, etc. So, avoid it if the mm isn't too big.
+        *
+        * Note that we must flush the entire cache on machines
+        * with aliasing caches to prevent random segmentation
+        * faults.
+        */
+       if (!parisc_requires_coherency()
+           ||  mm_total_size(mm) >= parisc_cache_flush_threshold) {
+               if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+                       return;
+               flush_tlb_all();
                flush_cache_all();
                return;
        }
 
+       /* Flush mm */
        for (vma = mm->mmap; vma; vma = vma->vm_next)
-               flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
+               flush_cache_pages(vma, vma->vm_start, vma->vm_end);
 }
 
-void flush_cache_range(struct vm_area_struct *vma,
-               unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
-           end - start >= parisc_cache_flush_threshold) {
-               if (vma->vm_mm->context.space_id)
-                       flush_tlb_range(vma, start, end);
+       if (!parisc_requires_coherency()
+           || end - start >= parisc_cache_flush_threshold) {
+               if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+                       return;
+               flush_tlb_range(vma, start, end);
                flush_cache_all();
                return;
        }
 
-       flush_cache_pages(vma, vma->vm_mm, start, end);
+       flush_cache_pages(vma, start, end);
 }
 
-void
-flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
-       if (pfn_valid(pfn)) {
-               if (likely(vma->vm_mm->context.space_id)) {
-                       flush_tlb_page(vma, vmaddr);
-                       __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
-               } else {
-                       __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
-               }
+       if (WARN_ON(!pfn_valid(pfn)))
+               return;
+       if (parisc_requires_coherency())
+               flush_user_cache_page(vma, vmaddr);
+       else
+               __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+       if (!PageAnon(page))
+               return;
+
+       if (parisc_requires_coherency()) {
+               flush_user_cache_page(vma, vmaddr);
+               return;
        }
+
+       flush_tlb_page(vma, vmaddr);
+       preempt_disable();
+       flush_dcache_page_asm(page_to_phys(page), vmaddr);
+       preempt_enable();
 }
 
 void flush_kernel_vmap_range(void *vaddr, int size)
index 80a0ab372802db148a1ccb0867bc0eb8ae3b7b69..e59574f65e641a09cbedb2e0ca7fa5e6045f3650 100644 (file)
@@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags,
 
        *need_unmap = 1;
        set_fixmap(fixmap, page_to_phys(page));
-       if (flags)
-               raw_spin_lock_irqsave(&patch_lock, *flags);
-       else
-               __acquire(&patch_lock);
+       raw_spin_lock_irqsave(&patch_lock, *flags);
 
        return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
 }
@@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
 {
        clear_fixmap(fixmap);
 
-       if (flags)
-               raw_spin_unlock_irqrestore(&patch_lock, *flags);
-       else
-               __release(&patch_lock);
+       raw_spin_unlock_irqrestore(&patch_lock, *flags);
 }
 
 void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
@@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
        int mapped;
 
        /* Make sure we don't have any aliases in cache */
-       flush_kernel_vmap_range(addr, len);
-       flush_icache_range(start, end);
+       flush_kernel_dcache_range_asm(start, end);
+       flush_kernel_icache_range_asm(start, end);
+       flush_tlb_kernel_range(start, end);
 
        p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped);
 
@@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
                         * We're crossing a page boundary, so
                         * need to remap
                         */
-                       flush_kernel_vmap_range((void *)fixmap,
-                                               (p-fixmap) * sizeof(*p));
+                       flush_kernel_dcache_range_asm((unsigned long)fixmap,
+                                                     (unsigned long)p);
+                       flush_tlb_kernel_range((unsigned long)fixmap,
+                                              (unsigned long)p);
                        if (mapped)
                                patch_unmap(FIX_TEXT_POKE0, &flags);
                        p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags,
@@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
                }
        }
 
-       flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p));
+       flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p);
+       flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p);
        if (mapped)
                patch_unmap(FIX_TEXT_POKE0, &flags);
-       flush_icache_range(start, end);
 }
 
 void __kprobes __patch_text(void *addr, u32 insn)
index f114e102aaf213ab69c5f39665faf9a5737693a9..84bc437be5cd1f4d5efea94d024389650d6bd46a 100644 (file)
@@ -22,6 +22,8 @@
 
 #include <asm/traps.h>
 
+#define DEBUG_NATLB 0
+
 /* Various important other fields */
 #define bit22set(x)            (x & 0x00000200)
 #define bits23_25set(x)                (x & 0x000001c0)
@@ -450,8 +452,8 @@ handle_nadtlb_fault(struct pt_regs *regs)
                fallthrough;
        case 0x380:
                /* PDC and FIC instructions */
-               if (printk_ratelimit()) {
-                       pr_warn("BUG: nullifying cache flush/purge instruction\n");
+               if (DEBUG_NATLB && printk_ratelimit()) {
+                       pr_warn("WARNING: nullifying cache flush/purge instruction\n");
                        show_regs(regs);
                }
                if (insn & 0x20) {
index ecbae1832de31ffd540ccaaf085d288d478c83ab..61a4736355c244448104080e144631a3cb8839b2 100644 (file)
@@ -13,7 +13,8 @@
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 .macro __EMIT_BUG_ENTRY addr,file,line,flags
         .section __bug_table,"aw"
-5001:   .4byte \addr - 5001b, 5002f - 5001b
+5001:   .4byte \addr - .
+        .4byte 5002f - .
         .short \line, \flags
         .org 5001b+BUG_ENTRY_SIZE
         .previous
@@ -24,7 +25,7 @@
 #else
 .macro __EMIT_BUG_ENTRY addr,file,line,flags
         .section __bug_table,"aw"
-5001:   .4byte \addr - 5001b
+5001:   .4byte \addr - .
         .short \flags
         .org 5001b+BUG_ENTRY_SIZE
         .previous
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define _EMIT_BUG_ENTRY                                \
        ".section __bug_table,\"aw\"\n"         \
-       "2:\t.4byte 1b - 2b, %0 - 2b\n"         \
-       "\t.short %1, %2\n"                     \
+       "2:     .4byte 1b - .\n"                \
+       "       .4byte %0 - .\n"                \
+       "       .short %1, %2\n"                \
        ".org 2b+%3\n"                          \
        ".previous\n"
 #else
 #define _EMIT_BUG_ENTRY                                \
        ".section __bug_table,\"aw\"\n"         \
-       "2:\t.4byte 1b - 2b\n"                  \
-       "\t.short %2\n"                         \
+       "2:     .4byte 1b - .\n"                \
+       "       .short %2\n"                    \
        ".org 2b+%3\n"                          \
        ".previous\n"
 #endif
index fa2e76e4093a366d7ed75f45fbe8315cf84c76ba..14b4489de52c539922fe9f547076087c85f2af38 100644 (file)
@@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void)
 {
        return mftb();
 }
+#define get_cycles get_cycles
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_TIMEX_H */
index 65562c4a0a690b7672eddacfe81c2ecda083fcd7..4c09c6688ac6685665ddfc82aef3376a52b366d9 100644 (file)
@@ -752,7 +752,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
         * FIXME: How do i get PID? Do I really need it?
         * prstatus.pr_pid = ????
         */
-       elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+       elf_core_copy_regs(&prstatus.pr_reg, regs);
        buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
                              &prstatus, sizeof(prstatus));
        return buf;
index e3ab9df6cf199c1151464ac08cc62460d08e210f..6cfcd20d466862dd8bdb9c4ee63ccc9496a25f84 100644 (file)
 
        /* 0x0 - 0xb */
 
-       /* 'current->mm' needs to be in r4 */
-       tophys(r4, r2)
-       lwz     r4, MM(r4)
-       tophys(r4, r4)
-       /* This only clobbers r0, r3, r4 and r5 */
+       /* switch_mmu_context() needs paging, let's enable it */
+       mfmsr   r9
+       ori     r11, r9, MSR_DR
+       mtmsr   r11
+       sync
+
+       /* switch_mmu_context() clobbers r12, rescue it */
+       SAVE_GPR(12, r1)
+
+       /* Calling switch_mmu_context(<inv>, current->mm, <inv>); */
+       lwz     r4, MM(r2)
        bl      switch_mmu_context
 
+       /* restore r12 */
+       REST_GPR(12, r1)
+
+       /* Disable paging again */
+       mfmsr   r9
+       li      r6, MSR_DR
+       andc    r9, r9, r6
+       mtmsr   r9
+       sync
+
 .endm
index b97bc179f65ad25f5018060aed91f0a8ef13a914..adcb1a1a2bfe805519e1774101ecdc5c5eb913c9 100644 (file)
@@ -112,7 +112,7 @@ static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir,
                          struct pt_regs *regs)
 {
        memset(prstatus, 0, sizeof(struct elf_prstatus));
-       elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs);
+       elf_core_copy_regs(&(prstatus->pr_reg), regs);
 
        /*
         * Overload PID with PIR value.
index 00fd9c548f2631c6fc537f3e3dfdd28780c3e82a..3ac2a81a55ebc0a281cc1ac967a526a667d4a245 100644 (file)
@@ -468,7 +468,7 @@ config CC_HAVE_STACKPROTECTOR_TLS
 
 config STACKPROTECTOR_PER_TASK
        def_bool y
-       depends on !GCC_PLUGIN_RANDSTRUCT
+       depends on !RANDSTRUCT
        depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
 
 config PHYS_RAM_BASE_FIXED
index 746c4d4e7686689963bb6af62ff99c8dd7da1caa..cf2f55e1dcb679776bf0e1aa84e67fab4064577d 100644 (file)
 
                gpio1: gpio@20121000 {
                        compatible = "microchip,mpfs-gpio";
-                       reg = <000 0x20121000 0x0 0x1000>;
+                       reg = <0x0 0x20121000 0x0 0x1000>;
                        interrupt-parent = <&plic>;
                        interrupt-controller;
                        #interrupt-cells = <1>;
index aad45d7f498fd90f8ddc851827a31e2265e0a67b..5c638fd5b35c78fb0e914e0f8724c01056f43d6a 100644 (file)
                        clocks = <&prci FU540_PRCI_CLK_TLCLK>;
                        status = "disabled";
                };
-               dma: dma@3000000 {
+               dma: dma-controller@3000000 {
                        compatible = "sifive,fu540-c000-pdma";
                        reg = <0x0 0x3000000 0x0 0x8000>;
                        interrupt-parent = <&plic0>;
index d3804a2f9aad33fb8089691ca5f5ccd847452b51..1aaea81fb1413eeb87dc23cbd9346055fb6f6ef8 100644 (file)
@@ -30,8 +30,8 @@
 typedef u32 bug_insn_t;
 
 #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-#define __BUG_ENTRY_ADDR       RISCV_INT " 1b - 2b"
-#define __BUG_ENTRY_FILE       RISCV_INT " %0 - 2b"
+#define __BUG_ENTRY_ADDR       RISCV_INT " 1b - ."
+#define __BUG_ENTRY_FILE       RISCV_INT " %0 - ."
 #else
 #define __BUG_ENTRY_ADDR       RISCV_PTR " 1b"
 #define __BUG_ENTRY_FILE       RISCV_PTR " %0"
index 507cae273bc62cec5ff052cc78a7b67e4854aa87..d6a7428f6248d4251f6ec9121cc373d101e994a5 100644 (file)
@@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void)
 static inline unsigned long random_get_entropy(void)
 {
        if (unlikely(clint_time_val == NULL))
-               return 0;
+               return random_get_entropy_fallback();
        return get_cycles();
 }
 #define random_get_entropy()   random_get_entropy()
index df325eacf62d261b8b0f97711120cf4ae7acc73f..80eb3ee84ff170b0081ed7fd1f18e27bcb70d931 100644 (file)
@@ -20,7 +20,9 @@ LDFLAGS_vmlinux       := -pie
 endif
 aflags_dwarf   := -Wa,-gdwarf-2
 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
+ifndef CONFIG_AS_IS_LLVM
 KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
+endif
 KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
index b265bfede188acbb7c67717ac22f85f5c8a40cf4..f56591bc089754fee1a4f67f19f38a0bd22696ba 100644 (file)
@@ -2,3 +2,6 @@
 image
 bzImage
 section_cmp.*
+vmlinux
+vmlinux.lds
+vmlinux.syms
index 0ba646899131625ab29ab3afdf84336957870353..883357a211a3bd67ebe771bbc51d1d16582de50a 100644 (file)
@@ -37,14 +37,21 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
 obj-y  := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
 obj-y  += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y  += version.o pgm_check_info.o ctype.o
+obj-y  += version.o pgm_check_info.o ctype.o ipl_data.o
 obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))  += uv.o
 obj-$(CONFIG_RELOCATABLE)      += machine_kexec_reloc.o
 obj-$(CONFIG_RANDOMIZE_BASE)   += kaslr.o
-targets        := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
-subdir-        := compressed
+obj-y  += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-all := $(obj-y) piggy.o syms.o
+
+targets        := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
+targets        += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
 
 OBJECTS := $(addprefix $(obj)/,$(obj-y))
+OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
 
 quiet_cmd_section_cmp = SECTCMP $*
 define cmd_section_cmp
@@ -59,14 +66,67 @@ define cmd_section_cmp
        touch $@
 endef
 
-$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
+$(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
        $(call if_changed,objcopy)
 
-$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
+$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
        $(call if_changed,section_cmp)
 
-$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
-       $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
+       $(call if_changed,ld)
+
+LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
+       $(call if_changed,ld)
+
+quiet_cmd_dumpsyms = DUMPSYMS $<
+define cmd_dumpsyms
+       $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
+endef
+
+$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
+       $(call if_changed,dumpsyms)
+
+OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
+$(obj)/syms.o: $(obj)/syms.bin FORCE
+       $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
+$(obj)/info.bin: vmlinux FORCE
+       $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
+$(obj)/info.o: $(obj)/info.bin FORCE
+       $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
+$(obj)/vmlinux.bin: vmlinux FORCE
+       $(call if_changed,objcopy)
+
+suffix-$(CONFIG_KERNEL_GZIP)  := .gz
+suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
+suffix-$(CONFIG_KERNEL_LZ4)  := .lz4
+suffix-$(CONFIG_KERNEL_LZMA)  := .lzma
+suffix-$(CONFIG_KERNEL_LZO)  := .lzo
+suffix-$(CONFIG_KERNEL_XZ)  := .xz
+suffix-$(CONFIG_KERNEL_ZSTD)  := .zst
 
-$(obj)/startup.a: $(OBJECTS) FORCE
-       $(call if_changed,ar)
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,zstd22_with_size)
+
+OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
+$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
+       $(call if_changed,objcopy)
index 641ce0fc5c3eb0d2028028bbdad48c18f8e7c5c3..70418389414d305874d0c3f77e9112ae58f0e670 100644 (file)
@@ -2,9 +2,12 @@
 #ifndef BOOT_BOOT_H
 #define BOOT_BOOT_H
 
-#include <asm/extable.h>
 #include <linux/types.h>
 
+#define IPL_START      0x200
+
+#ifndef __ASSEMBLY__
+
 void startup_kernel(void);
 unsigned long detect_memory(void);
 bool is_ipl_block_dump(void);
@@ -31,4 +34,5 @@ extern char _stack_start[], _stack_end[];
 
 unsigned long read_ipl_report(unsigned long safe_offset);
 
+#endif /* __ASSEMBLY__ */
 #endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
deleted file mode 100644 (file)
index d04e0e7..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# linux/arch/s390/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-KCOV_INSTRUMENT := n
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
-obj-y  := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
-obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
-obj-all := $(obj-y) piggy.o syms.o
-targets        := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
-targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
-targets += vmlinux.bin.zst
-targets += info.bin syms.bin vmlinux.syms $(obj-all)
-
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
-KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
-OBJCOPYFLAGS :=
-
-OBJECTS := $(addprefix $(obj)/,$(obj-y))
-OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
-
-LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE
-       $(call if_changed,ld)
-
-LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
-       $(call if_changed,ld)
-
-quiet_cmd_dumpsyms = DUMPSYMS $<
-define cmd_dumpsyms
-       $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
-endef
-
-$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
-       $(call if_changed,dumpsyms)
-
-OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
-$(obj)/syms.o: $(obj)/syms.bin FORCE
-       $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
-$(obj)/info.bin: vmlinux FORCE
-       $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
-$(obj)/info.o: $(obj)/info.bin FORCE
-       $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
-$(obj)/vmlinux.bin: vmlinux FORCE
-       $(call if_changed,objcopy)
-
-suffix-$(CONFIG_KERNEL_GZIP)  := .gz
-suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
-suffix-$(CONFIG_KERNEL_LZ4)  := .lz4
-suffix-$(CONFIG_KERNEL_LZMA)  := .lzma
-suffix-$(CONFIG_KERNEL_LZO)  := .lzo
-suffix-$(CONFIG_KERNEL_XZ)  := .xz
-suffix-$(CONFIG_KERNEL_ZSTD)  := .zst
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,bzip2_with_size)
-$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,lz4_with_size)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,lzma_with_size)
-$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,lzo_with_size)
-$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,xzkern_with_size)
-$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,zstd22_with_size)
-
-OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
-$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
-       $(call if_changed,objcopy)
index 666692429db0a4b78e3906214672151d4fa940c3..3f79b9efb8034901a0a697187f5cd6f40a3e48d7 100644 (file)
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/sclp.h>
-
-#define ARCH_OFFSET    4
+#include "boot.h"
 
 #define EP_OFFSET      0x10008
 #define EP_STRING      "S390EP"
+#define IPL_BS         0x730
 
 __HEAD
-
-#define IPL_BS 0x730
-       .org    0
-       .long   0x00080000,0x80000000+iplstart  # The first 24 bytes are loaded
-       .long   0x02000018,0x60000050           # by ipl to addresses 0-23.
-       .long   0x02000068,0x60000050           # (a PSW and two CCWs).
-       .fill   80-24,1,0x40                    # bytes 24-79 are discarded !!
-       .long   0x020000f0,0x60000050           # The next 160 byte are loaded
-       .long   0x02000140,0x60000050           # to addresses 0x18-0xb7
-       .long   0x02000190,0x60000050           # They form the continuation
-       .long   0x020001e0,0x60000050           # of the CCW program started
-       .long   0x02000230,0x60000050           # by ipl and load the range
-       .long   0x02000280,0x60000050           # 0x0f0-0x730 from the image
-       .long   0x020002d0,0x60000050           # to the range 0x0f0-0x730
-       .long   0x02000320,0x60000050           # in memory. At the end of
-       .long   0x02000370,0x60000050           # the channel program the PSW
-       .long   0x020003c0,0x60000050           # at location 0 is loaded.
-       .long   0x02000410,0x60000050           # Initial processing starts
-       .long   0x02000460,0x60000050           # at 0x200 = iplstart.
-       .long   0x020004b0,0x60000050
-       .long   0x02000500,0x60000050
-       .long   0x02000550,0x60000050
-       .long   0x020005a0,0x60000050
-       .long   0x020005f0,0x60000050
-       .long   0x02000640,0x60000050
-       .long   0x02000690,0x60000050
-       .long   0x020006e0,0x20000050
-
-       .org    __LC_RST_NEW_PSW                # 0x1a0
-       .quad   0,iplstart
-       .org    __LC_EXT_NEW_PSW                # 0x1b0
-       .quad   0x0002000180000000,0x1b0        # disabled wait
-       .org    __LC_PGM_NEW_PSW                # 0x1d0
-       .quad   0x0000000180000000,startup_pgm_check_handler
-       .org    __LC_IO_NEW_PSW                 # 0x1f0
-       .quad   0x0002000180000000,0x1f0        # disabled wait
-
-       .org    0x200
-
+ipl_start:
+       mvi     __LC_AR_MODE_ID,1       # set esame flag
+       slr     %r0,%r0                 # set cpuid to zero
+       lhi     %r1,2                   # mode 2 = esame (dump)
+       sigp    %r1,%r0,0x12            # switch to esame mode
+       sam64                           # switch to 64 bit addressing mode
+       lgh     %r1,__LC_SUBCHANNEL_ID  # test if subchannel number
+       brctg   %r1,.Lnoload            #  is valid
+       llgf    %r1,__LC_SUBCHANNEL_ID  # load ipl subchannel number
+       lghi    %r2,IPL_BS              # load start address
+       bras    %r14,.Lloader           # load rest of ipl image
+       larl    %r12,parmarea           # pointer to parameter area
+       stg     %r1,IPL_DEVICE-PARMAREA(%r12) # save ipl device number
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+       larl    %r2,_end                # ramdisk loc. is temp
+       bras    %r14,.Lloader           # load parameter file
+       ltgr    %r2,%r2                 # got anything ?
+       jz      .Lnopf
+       lg      %r3,MAX_COMMAND_LINE_SIZE-PARMAREA(%r12)
+       aghi    %r3,-1
+       clgr    %r2,%r3
+       jl      .Lnotrunc
+       lgr     %r2,%r3
+.Lnotrunc:
+       larl    %r4,_end
+       larl    %r13,.L_hdr
+       clc     0(3,%r4),0(%r13)        # if it is HDRx
+       jz      .Lagain1                # skip dataset header
+       larl    %r13,.L_eof
+       clc     0(3,%r4),0(%r13)        # if it is EOFx
+       jz      .Lagain1                # skip dateset trailer
+       lgr     %r5,%r2
+       la      %r6,COMMAND_LINE-PARMAREA(%r12)
+       lgr     %r7,%r2
+       aghi    %r7,1
+       mvcl    %r6,%r4
+.Lnopf:
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+       larl    %r2,_end                # addr of ramdisk
+       stg     %r2,INITRD_START-PARMAREA(%r12)
+       bras    %r14,.Lloader           # load ramdisk
+       stg     %r2,INITRD_SIZE-PARMAREA(%r12) # store size of rd
+       ltgr    %r2,%r2
+       jnz     .Lrdcont
+       stg     %r2,INITRD_START-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+       larl    %r2,_end
+       larl    %r13,.L_hdr             # skip HDRx and EOFx
+       clc     0(3,%r2),0(%r13)
+       jz      .Lagain2
+       larl    %r13,.L_eof
+       clc     0(3,%r2),0(%r13)
+       jz      .Lagain2
+#
+# reset files in VM reader
+#
+       larl    %r13,.Lcpuid
+       stidp   0(%r13)                 # store cpuid
+       tm      0(%r13),0xff            # running VM ?
+       jno     .Lnoreset
+       larl    %r2,.Lreset
+       lghi    %r3,26
+       diag    %r2,%r3,8
+       larl    %r5,.Lirb
+       stsch   0(%r5)                  # check if irq is pending
+       tm      30(%r5),0x0f            # by verifying if any of the
+       jnz     .Lwaitforirq            # activity or status control
+       tm      31(%r5),0xff            # bits is set in the schib
+       jz      .Lnoreset
+.Lwaitforirq:
+       bras    %r14,.Lirqwait          # wait for IO interrupt
+       c       %r1,__LC_SUBCHANNEL_ID  # compare subchannel number
+       jne     .Lwaitforirq
+       larl    %r5,.Lirb
+       tsch    0(%r5)
+.Lnoreset:
+       j       .Lnoload
+#
+# everything loaded, go for it
+#
+.Lnoload:
+       jg      startup
 #
 # subroutine to wait for end I/O
 #
 .Lirqwait:
-       mvc     __LC_IO_NEW_PSW(16),.Lnewpsw    # set up IO interrupt psw
-       lpsw    .Lwaitpsw
+       larl    %r13,.Lnewpswmask       # set up IO interrupt psw
+       mvc     __LC_IO_NEW_PSW(8),0(%r13)
+       stg     %r14,__LC_IO_NEW_PSW+8
+       larl    %r13,.Lwaitpsw
+       lpswe   0(%r13)
 .Lioint:
-       br      %r14
-       .align  8
-.Lnewpsw:
-       .quad   0x0000000080000000,.Lioint
-.Lwaitpsw:
-       .long   0x020a0000,0x80000000+.Lioint
-
 #
 # subroutine for loading cards from the reader
 #
 .Lloader:
-       la      %r4,0(%r14)
-       la      %r3,.Lorb               # r2 = address of orb into r2
-       la      %r5,.Lirb               # r4 = address of irb
-       la      %r6,.Lccws
-       la      %r7,20
+       lgr     %r4,%r14
+       larl    %r3,.Lorb               # r2 = address of orb into r2
+       larl    %r5,.Lirb               # r4 = address of irb
+       larl    %r6,.Lccws
+       lghi    %r7,20
 .Linit:
        st      %r2,4(%r6)              # initialize CCW data addresses
        la      %r2,0x50(%r2)
        la      %r6,8(%r6)
-       bct     7,.Linit
-
-       lctl    %c6,%c6,.Lcr6           # set IO subclass mask
-       slr     %r2,%r2
+       brctg   %r7,.Linit
+       larl    %r13,.Lcr6
+       lctlg   %c6,%c6,0(%r13)
+       xgr     %r2,%r2
 .Lldlp:
        ssch    0(%r3)                  # load chunk of 1600 bytes
-       bnz     .Llderr
+       jnz     .Llderr
 .Lwait4irq:
-       bas     %r14,.Lirqwait
+       bras    %r14,.Lirqwait
        c       %r1,__LC_SUBCHANNEL_ID  # compare subchannel number
-       bne     .Lwait4irq
+       jne     .Lwait4irq
        tsch    0(%r5)
-
-       slr     %r0,%r0
+       xgr     %r0,%r0
        ic      %r0,8(%r5)              # get device status
-       chi     %r0,8                   # channel end ?
-       be      .Lcont
-       chi     %r0,12                  # channel end + device end ?
-       be      .Lcont
-
-       l       %r0,4(%r5)
-       s       %r0,8(%r3)              # r0/8 = number of ccws executed
-       mhi     %r0,10                  # *10 = number of bytes in ccws
-       lh      %r3,10(%r5)             # get residual count
-       sr      %r0,%r3                 # #ccws*80-residual=#bytes read
-       ar      %r2,%r0
-
+       cghi    %r0,8                   # channel end ?
+       je      .Lcont
+       cghi    %r0,12                  # channel end + device end ?
+       je      .Lcont
+       llgf    %r0,4(%r5)
+       sgf     %r0,8(%r3)              # r0/8 = number of ccws executed
+       mghi    %r0,10                  # *10 = number of bytes in ccws
+       llgh    %r3,10(%r5)             # get residual count
+       sgr     %r0,%r3                 # #ccws*80-residual=#bytes read
+       agr     %r2,%r0
        br      %r4                     # r2 contains the total size
-
 .Lcont:
-       ahi     %r2,0x640               # add 0x640 to total size
-       la      %r6,.Lccws
-       la      %r7,20
+       aghi    %r2,0x640               # add 0x640 to total size
+       larl    %r6,.Lccws
+       lghi    %r7,20
 .Lincr:
        l       %r0,4(%r6)              # update CCW data addresses
-       ahi     %r0,0x640
+       aghi    %r0,0x640
        st      %r0,4(%r6)
-       ahi     %r6,8
-       bct     7,.Lincr
-
-       b       .Lldlp
+       aghi    %r6,8
+       brctg   %r7,.Lincr
+       j       .Lldlp
 .Llderr:
-       lpsw    .Lcrash
+       larl    %r13,.Lcrash
+       lpsw    0(%r13)
 
+       .align  8
+.Lwaitpsw:
+       .quad   0x0202000180000000,.Lioint
+.Lnewpswmask:
+       .quad   0x0000000180000000
        .align  8
 .Lorb: .long   0x00000000,0x0080ff00,.Lccws
 .Lirb: .long   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lcr6: .long   0xff000000
-.Lloadp:.long  0,0
+       .align  8
+.Lcr6: .quad   0x00000000ff000000
        .align  8
 .Lcrash:.long  0x000a0000,0x00000000
-
        .align  8
 .Lccws: .rept  19
        .long   0x02600050,0x00000000
        .endr
        .long   0x02200050,0x00000000
-
-iplstart:
-       mvi     __LC_AR_MODE_ID,1       # set esame flag
-       slr     %r0,%r0                 # set cpuid to zero
-       lhi     %r1,2                   # mode 2 = esame (dump)
-       sigp    %r1,%r0,0x12            # switch to esame mode
-       bras    %r13,0f
-       .fill   16,4,0x0
-0:     lmh     %r0,%r15,0(%r13)        # clear high-order half of gprs
-       sam31                           # switch to 31 bit addressing mode
-       lh      %r1,__LC_SUBCHANNEL_ID  # test if subchannel number
-       bct     %r1,.Lnoload            #  is valid
-       l       %r1,__LC_SUBCHANNEL_ID  # load ipl subchannel number
-       la      %r2,IPL_BS              # load start address
-       bas     %r14,.Lloader           # load rest of ipl image
-       l       %r12,.Lparm             # pointer to parameter area
-       st      %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
-
-#
-# load parameter file from ipl device
-#
-.Lagain1:
-       l       %r2,.Linitrd            # ramdisk loc. is temp
-       bas     %r14,.Lloader           # load parameter file
-       ltr     %r2,%r2                 # got anything ?
-       bz      .Lnopf
-       l       %r3,MAX_COMMAND_LINE_SIZE+ARCH_OFFSET-PARMAREA(%r12)
-       ahi     %r3,-1
-       clr     %r2,%r3
-       bl      .Lnotrunc
-       lr      %r2,%r3
-.Lnotrunc:
-       l       %r4,.Linitrd
-       clc     0(3,%r4),.L_hdr         # if it is HDRx
-       bz      .Lagain1                # skip dataset header
-       clc     0(3,%r4),.L_eof         # if it is EOFx
-       bz      .Lagain1                # skip dateset trailer
-
-       lr      %r5,%r2
-       la      %r6,COMMAND_LINE-PARMAREA(%r12)
-       lr      %r7,%r2
-       ahi     %r7,1
-       mvcl    %r6,%r4
-.Lnopf:
-
-#
-# load ramdisk from ipl device
-#
-.Lagain2:
-       l       %r2,.Linitrd            # addr of ramdisk
-       st      %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
-       bas     %r14,.Lloader           # load ramdisk
-       st      %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
-       ltr     %r2,%r2
-       bnz     .Lrdcont
-       st      %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
-.Lrdcont:
-       l       %r2,.Linitrd
-
-       clc     0(3,%r2),.L_hdr         # skip HDRx and EOFx
-       bz      .Lagain2
-       clc     0(3,%r2),.L_eof
-       bz      .Lagain2
-
-#
-# reset files in VM reader
-#
-       stidp   .Lcpuid                 # store cpuid
-       tm      .Lcpuid,0xff            # running VM ?
-       bno     .Lnoreset
-       la      %r2,.Lreset
-       lhi     %r3,26
-       diag    %r2,%r3,8
-       la      %r5,.Lirb
-       stsch   0(%r5)                  # check if irq is pending
-       tm      30(%r5),0x0f            # by verifying if any of the
-       bnz     .Lwaitforirq            # activity or status control
-       tm      31(%r5),0xff            # bits is set in the schib
-       bz      .Lnoreset
-.Lwaitforirq:
-       bas     %r14,.Lirqwait          # wait for IO interrupt
-       c       %r1,__LC_SUBCHANNEL_ID  # compare subchannel number
-       bne     .Lwaitforirq
-       la      %r5,.Lirb
-       tsch    0(%r5)
-.Lnoreset:
-       b       .Lnoload
-
-#
-# everything loaded, go for it
-#
-.Lnoload:
-       l       %r1,.Lstartup
-       br      %r1
-
-.Linitrd:.long _end                    # default address of initrd
-.Lparm:        .long  PARMAREA
-.Lstartup: .long startup
 .Lreset:.byte  0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
        .byte   0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
        .byte   0xc8,0xd6,0xd3,0xc4     # "change rdr all keep nohold"
@@ -268,10 +215,10 @@ iplstart:
 # this is called either by the ipl loader or directly by PSW restart
 # or linload or SALIPL
 #
-       .org    STARTUP_NORMAL_OFFSET
+       .org    STARTUP_NORMAL_OFFSET - IPL_START
 SYM_CODE_START(startup)
        j       startup_normal
-       .org    EP_OFFSET
+       .org    EP_OFFSET - IPL_START
 #
 # This is a list of s390 kernel entry points. At address 0x1000f the number of
 # valid entry points is stored.
@@ -283,7 +230,7 @@ SYM_CODE_START(startup)
 #
 # kdump startup-code, running in 64 bit absolute addressing mode
 #
-       .org    STARTUP_KDUMP_OFFSET
+       .org    STARTUP_KDUMP_OFFSET - IPL_START
        j       startup_kdump
 SYM_CODE_END(startup)
 SYM_CODE_START_LOCAL(startup_normal)
@@ -295,20 +242,23 @@ SYM_CODE_START_LOCAL(startup_normal)
        .fill   16,4,0x0
 0:     lmh     %r0,%r15,0(%r13)        # clear high-order half of gprs
        sam64                           # switch to 64 bit addressing mode
-       basr    %r13,0                  # get base
-.LPG0:
-       mvc     __LC_EXT_NEW_PSW(16),.Lext_new_psw-.LPG0(%r13)
-       mvc     __LC_PGM_NEW_PSW(16),.Lpgm_new_psw-.LPG0(%r13)
-       mvc     __LC_IO_NEW_PSW(16),.Lio_new_psw-.LPG0(%r13)
+       larl    %r13,.Lext_new_psw
+       mvc     __LC_EXT_NEW_PSW(16),0(%r13)
+       larl    %r13,.Lpgm_new_psw
+       mvc     __LC_PGM_NEW_PSW(16),0(%r13)
+       larl    %r13,.Lio_new_psw
+       mvc     __LC_IO_NEW_PSW(16),0(%r13)
        xc      0x200(256),0x200        # partially clear lowcore
        xc      0x300(256),0x300
        xc      0xe00(256),0xe00
        xc      0xf00(256),0xf00
-       lctlg   %c0,%c15,.Lctl-.LPG0(%r13)      # load control registers
+       larl    %r13,.Lctl
+       lctlg   %c0,%c15,0(%r13)        # load control registers
        stcke   __LC_BOOT_CLOCK
        mvc     __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
-       spt     6f-.LPG0(%r13)
-       mvc     __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+       larl    %r13,6f
+       spt     0(%r13)
+       mvc     __LC_LAST_UPDATE_TIMER(8),0(%r13)
        larl    %r15,_stack_end-STACK_FRAME_OVERHEAD
        brasl   %r14,sclp_early_setup_buffer
        brasl   %r14,verify_facilities
@@ -368,23 +318,3 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
        lmg     %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
        lpswe   __LC_RETURN_PSW         # disabled wait
 SYM_CODE_END(startup_pgm_check_handler)
-
-#
-# params at 10400 (setup.h)
-# Must be keept in sync with struct parmarea in setup.h
-#
-       .org    PARMAREA
-SYM_DATA_START(parmarea)
-       .quad   0                       # IPL_DEVICE
-       .quad   0                       # INITRD_START
-       .quad   0                       # INITRD_SIZE
-       .quad   0                       # OLDMEM_BASE
-       .quad   0                       # OLDMEM_SIZE
-       .quad   kernel_version          # points to kernel version string
-       .quad   COMMAND_LINE_SIZE
-
-       .org    COMMAND_LINE
-       .byte   "root=/dev/ram0 ro"
-       .byte   0
-       .org    PARMAREA+__PARMAREA_SIZE
-SYM_DATA_END(parmarea)
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
new file mode 100644 (file)
index 0000000..0846e2b
--- /dev/null
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/compat.h>
+#include <linux/ptrace.h>
+#include <asm/cio.h>
+#include <asm/asm-offsets.h>
+#include "boot.h"
+
+#define CCW0(cmd, addr, cnt, flg) \
+       { .cmd_code = cmd, .cda = addr, .count = cnt, .flags = flg, }
+
+#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA)
+
+struct ipl_lowcore {
+       psw_t32         ipl_psw;                        /* 0x0000 */
+       struct ccw0     ccwpgm[2];                      /* 0x0008 */
+       u8              fill[56];                       /* 0x0018 */
+       struct ccw0     ccwpgmcc[20];                   /* 0x0050 */
+       u8              pad_0xf0[0x01a0-0x00f0];        /* 0x00f0 */
+       psw_t           restart_psw;                    /* 0x01a0 */
+       psw_t           external_new_psw;               /* 0x01b0 */
+       psw_t           svc_new_psw;                    /* 0x01c0 */
+       psw_t           program_new_psw;                /* 0x01d0 */
+       psw_t           mcck_new_psw;                   /* 0x01e0 */
+       psw_t           io_new_psw;                     /* 0x01f0 */
+};
+
+/*
+ * Initial lowcore for IPL: the first 24 bytes are loaded by IPL to
+ * addresses 0-23 (a PSW and two CCWs). Bytes 24-79 are discarded.
+ * The next 160 bytes are loaded to addresses 0x18-0xb7. They form
+ * the continuation of the CCW program started by IPL and load the
+ * range 0x0f0-0x730 from the image to the range 0x0f0-0x730 in
+ * memory. At the end of the channel program the PSW at location 0 is
+ * loaded.
+ * Initial processing starts at 0x200 = iplstart.
+ *
+ * The restart psw points to iplstart which allows to load a kernel
+ * image into memory and starting it by a psw restart on any cpu. All
+ * other default psw new locations contain a disabled wait psw where
+ * the address indicates which psw was loaded.
+ *
+ * Note that the 'file' utility can detect s390 kernel images. For
+ * that to succeed the two initial CCWs, and the 0x40 fill bytes must
+ * be present.
+ */
+static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
+       .ipl_psw = { .mask = PSW32_MASK_BASE, .addr = PSW32_ADDR_AMODE | IPL_START },
+       .ccwpgm = {
+               [ 0] = CCW0(CCW_CMD_READ_IPL, 0x018, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 1] = CCW0(CCW_CMD_READ_IPL, 0x068, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+       },
+       .fill = {
+               [ 0 ... 55] = 0x40,
+       },
+       .ccwpgmcc = {
+               [ 0] = CCW0(CCW_CMD_READ_IPL, 0x0f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 1] = CCW0(CCW_CMD_READ_IPL, 0x140, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 2] = CCW0(CCW_CMD_READ_IPL, 0x190, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 3] = CCW0(CCW_CMD_READ_IPL, 0x1e0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 4] = CCW0(CCW_CMD_READ_IPL, 0x230, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 5] = CCW0(CCW_CMD_READ_IPL, 0x280, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 6] = CCW0(CCW_CMD_READ_IPL, 0x2d0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 7] = CCW0(CCW_CMD_READ_IPL, 0x320, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 8] = CCW0(CCW_CMD_READ_IPL, 0x370, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [ 9] = CCW0(CCW_CMD_READ_IPL, 0x3c0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [10] = CCW0(CCW_CMD_READ_IPL, 0x410, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [11] = CCW0(CCW_CMD_READ_IPL, 0x460, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [12] = CCW0(CCW_CMD_READ_IPL, 0x4b0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [13] = CCW0(CCW_CMD_READ_IPL, 0x500, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [14] = CCW0(CCW_CMD_READ_IPL, 0x550, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [15] = CCW0(CCW_CMD_READ_IPL, 0x5a0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [16] = CCW0(CCW_CMD_READ_IPL, 0x5f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [17] = CCW0(CCW_CMD_READ_IPL, 0x640, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+               [19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
+       },
+       .restart_psw      = { .mask = 0, .addr = IPL_START, },
+       .external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
+       .svc_new_psw      = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
+       .program_new_psw  = { .mask = PSW_MASK_DISABLED, .addr = __LC_PGM_NEW_PSW, },
+       .mcck_new_psw     = { .mask = PSW_MASK_DISABLED, .addr = __LC_MCK_NEW_PSW, },
+       .io_new_psw       = { .mask = PSW_MASK_DISABLED, .addr = __LC_IO_NEW_PSW, },
+};
index 9ed7e29c81d9a0ec9e167f9b2e9d6fcf067c6c74..ca78d6162245a6d1a2324e24ac51fe95bd81aef2 100644 (file)
@@ -8,9 +8,16 @@
 #include <asm/sections.h>
 #include <asm/boot_data.h>
 #include <asm/facility.h>
+#include <asm/setup.h>
 #include <asm/uv.h>
 #include "boot.h"
 
+struct parmarea parmarea __section(".parmarea") = {
+       .kernel_version         = (unsigned long)kernel_version,
+       .max_command_line_size  = COMMAND_LINE_SIZE,
+       .command_line           = "root=/dev/ram0 ro",
+};
+
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
 int __bootdata(noexec_disabled);
 
index d8984462071ffc8d6f09622eafa85d7c9ab4c303..e8d74d4f62aa5ff0cd2284dc66dffca5a5e8ed56 100644 (file)
@@ -8,7 +8,7 @@
 #include <asm/timex.h>
 #include <asm/sclp.h>
 #include <asm/kasan.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
 #include "boot.h"
 
 #define PRNG_MODE_TDES  1
index 2f949cd9076b81dbc4b1536d25229e49e7515a8f..7fa1a32ea0f3f095612917617573e9bd3d8af4ed 100644 (file)
@@ -7,7 +7,7 @@
 #include <asm/sections.h>
 #include <asm/mem_detect.h>
 #include <asm/sparsemem.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
 #include "boot.h"
 
 struct mem_detect_info __bootdata(mem_detect);
index 1aa11a8f57dd827432c5e439feb0a299e3ece7d7..863e6bcaa5a188f1971b04ebeda51ff5f0bb8829 100644 (file)
@@ -10,7 +10,7 @@
 #include <asm/sclp.h>
 #include <asm/diag.h>
 #include <asm/uv.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
 #include "boot.h"
 #include "uv.h"
 
similarity index 94%
rename from arch/s390/boot/compressed/vmlinux.lds.S
rename to arch/s390/boot/vmlinux.lds.S
index 918e05137d4c6ca935cbb0e4cb510e24e86aa52a..af5c6860e0a119d3984f2b8e85db90d3acdf2772 100644 (file)
@@ -4,6 +4,7 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/sclp.h>
+#include "boot.h"
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
 OUTPUT_ARCH(s390:64-bit)
@@ -13,11 +14,19 @@ ENTRY(startup)
 SECTIONS
 {
        . = 0;
+       .ipldata : {
+               *(.ipldata)
+       }
+       . = IPL_START;
        .head.text : {
                _head = . ;
                HEAD_TEXT
                _ehead = . ;
        }
+       . = PARMAREA;
+       .parmarea : {
+               *(.parmarea)
+       }
        .text : {
                _text = .;      /* Text */
                *(.text)
index bfbafd35bcbd24af676e5bb99472b6d91649b8b7..e013088b511550a03f52644c3fa8ff4f0df1473a 100644 (file)
@@ -194,7 +194,7 @@ static struct skcipher_alg cbc_des_alg = {
  *   same as DES.  Implementers MUST reject keys that exhibit this
  *   property.
  *
- *   In fips mode additinally check for all 3 keys are unique.
+ *   In fips mode additionally check for all 3 keys are unique.
  *
  */
 static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
index 234d791ca59d550869d424d42866de178fd27310..ae382bafc77252d6676a39be80c5fc66cab2ba6f 100644 (file)
@@ -528,7 +528,7 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
                        /* give mutex free before calling schedule() */
                        mutex_unlock(&prng_data->mutex);
                        schedule();
-                       /* occopy mutex again */
+                       /* occupy mutex again */
                        if (mutex_lock_interruptible(&prng_data->mutex)) {
                                if (ret == 0)
                                        ret = -ERESTARTSYS;
index 3765c2d81df5596c493cbe5d1daac6ae6475230f..a3d881ca0a98d14421acae675a19757431be74fb 100644 (file)
@@ -190,7 +190,7 @@ int hypfs_vm_create_files(struct dentry *root)
        if (IS_ERR(data))
                return PTR_ERR(data);
 
-       /* Hpervisor Info */
+       /* Hypervisor Info */
        dir = hypfs_mkdir(root, "hyp");
        if (IS_ERR(dir)) {
                rc = PTR_ERR(dir);
index bb3837d7387ce037cff3d8026b253ee462fb8fb7..7db046596b93e81e1b45a0e2537f602556747d39 100644 (file)
@@ -4,19 +4,6 @@
 
 #ifdef __ASSEMBLY__
 
-/*
- * Check the length of an instruction sequence. The length may not be larger
- * than 254 bytes and it has to be divisible by 2.
- */
-.macro alt_len_check start,end
-       .if ( \end - \start ) > 254
-       .error "cpu alternatives does not support instructions blocks > 254 bytes\n"
-       .endif
-       .if ( \end - \start ) % 2
-       .error "cpu alternatives instructions length is odd\n"
-       .endif
-.endm
-
 /*
  * Issue one struct alt_instr descriptor entry (need to put it into
  * the section .altinstructions, see below). This entry contains
        .long   \alt_start - .
        .word   \feature
        .byte   \orig_end - \orig_start
-       .byte   \alt_end - \alt_start
-.endm
-
-/*
- * Fill up @bytes with nops. The macro emits 6-byte nop instructions
- * for the bulk of the area, possibly followed by a 4-byte and/or
- * a 2-byte nop if the size of the area is not divisible by 6.
- */
-.macro alt_pad_fill bytes
-       .rept   ( \bytes ) / 6
-       brcl    0,0
-       .endr
-       .rept   ( \bytes ) % 6 / 4
-       nop
-       .endr
-       .rept   ( \bytes ) % 6 % 4 / 2
-       nopr
-       .endr
-.endm
-
-/*
- * Fill up @bytes with nops. If the number of bytes is larger
- * than 6, emit a jg instruction to branch over all nops, then
- * fill an area of size (@bytes - 6) with nop instructions.
- */
-.macro alt_pad bytes
-       .if ( \bytes > 0 )
-       .if ( \bytes > 6 )
-       jg      . + \bytes
-       alt_pad_fill \bytes - 6
-       .else
-       alt_pad_fill \bytes
-       .endif
-       .endif
+       .org    . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+       .org    . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
 .endm
 
 /*
  * Define an alternative between two instructions. If @feature is
  * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
+ * @newinstr.
  */
 .macro ALTERNATIVE oldinstr, newinstr, feature
        .pushsection .altinstr_replacement,"ax"
 770:   \newinstr
 771:   .popsection
 772:   \oldinstr
-773:   alt_len_check 770b, 771b
-       alt_len_check 772b, 773b
-       alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
-774:   .pushsection .altinstructions,"a"
-       alt_entry 772b, 774b, 770b, 771b, \feature
+773:   .pushsection .altinstructions,"a"
+       alt_entry 772b, 773b, 770b, 771b, \feature
        .popsection
 .endm
 
 /*
  * Define an alternative between two instructions. If @feature is
  * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
+ * @newinstr.
  */
 .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
        .pushsection .altinstr_replacement,"ax"
 771:   \newinstr2
 772:   .popsection
 773:   \oldinstr
-774:   alt_len_check 770b, 771b
-       alt_len_check 771b, 772b
-       alt_len_check 773b, 774b
-       .if ( 771b - 770b > 772b - 771b )
-       alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
-       .else
-       alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
-       .endif
-775:   .pushsection .altinstructions,"a"
-       alt_entry 773b, 775b, 770b, 771b,\feature1
-       alt_entry 773b, 775b, 771b, 772b,\feature2
+774:   .pushsection .altinstructions,"a"
+       alt_entry 773b, 774b, 770b, 771b,\feature1
+       alt_entry 773b, 774b, 771b, 772b,\feature2
        .popsection
 .endm
 
index 3f2856ed680831f2f574425dc9ed19672b75f7eb..904dd049f954767e4c5812358d6d403884638008 100644 (file)
@@ -13,32 +13,25 @@ struct alt_instr {
        s32 repl_offset;        /* offset to replacement instruction */
        u16 facility;           /* facility bit set for replacement */
        u8  instrlen;           /* length of original instruction */
-       u8  replacementlen;     /* length of new instruction */
 } __packed;
 
 void apply_alternative_instructions(void);
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 
 /*
- * |661:       |662:     |6620      |663:
- * +-----------+---------------------+
- * | oldinstr  | oldinstr_padding    |
- * |          +----------+----------+
- * |          |          |          |
- * |          | >6 bytes |6/4/2 nops|
- * |          |6 bytes jg----------->
- * +-----------+---------------------+
- *              ^^ static padding ^^
+ * +---------------------------------+
+ * |661:                            |662:
+ * | oldinstr                       |
+ * +---------------------------------+
  *
  * .altinstr_replacement section
- * +---------------------+-----------+
+ * +---------------------------------+
  * |6641:                           |6651:
  * | alternative instr 1            |
- * +-----------+---------+- - - - - -+
- * |6642:               |6652:      |
- * | alternative instr 2 | padding
- * +---------------------+- - - - - -+
- *                       ^ runtime ^
+ * +---------------------------------+
+ * |6642:                           |6652:
+ * | alternative instr 2            |
+ * +---------------------------------+
  *
  * .altinstructions section
  * +---------------------------------+
@@ -47,77 +40,31 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
  * +---------------------------------+
  */
 
-#define b_altinstr(num)        "664"#num
-#define e_altinstr(num)        "665"#num
-
-#define e_oldinstr_pad_end     "663"
+#define b_altinstr(num)                "664"#num
+#define e_altinstr(num)                "665"#num
 #define oldinstr_len           "662b-661b"
-#define oldinstr_total_len     e_oldinstr_pad_end"b-661b"
 #define altinstr_len(num)      e_altinstr(num)"b-"b_altinstr(num)"b"
-#define oldinstr_pad_len(num) \
-       "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \
-       "((" altinstr_len(num) ")-(" oldinstr_len "))"
-
-#define INSTR_LEN_SANITY_CHECK(len)                                    \
-       ".if " len " > 254\n"                                           \
-       "\t.error \"cpu alternatives does not support instructions "    \
-               "blocks > 254 bytes\"\n"                                \
-       ".endif\n"                                                      \
-       ".if (" len ") %% 2\n"                                          \
-       "\t.error \"cpu alternatives instructions length is odd\"\n"    \
-       ".endif\n"
-
-#define OLDINSTR_PADDING(oldinstr, num)                                        \
-       ".if " oldinstr_pad_len(num) " > 6\n"                           \
-       "\tjg " e_oldinstr_pad_end "f\n"                                \
-       "6620:\n"                                                       \
-       "\t.rept (" oldinstr_pad_len(num) " - (6620b-662b)) / 2\n"      \
-       "\tnopr\n"                                                      \
-       ".else\n"                                                       \
-       "\t.rept " oldinstr_pad_len(num) " / 6\n"                       \
-       "\t.brcl 0,0\n"                                                 \
-       "\t.endr\n"                                                     \
-       "\t.rept " oldinstr_pad_len(num) " %% 6 / 4\n"                  \
-       "\tnop\n"                                                       \
-       "\t.endr\n"                                                     \
-       "\t.rept " oldinstr_pad_len(num) " %% 6 %% 4 / 2\n"             \
-       "\tnopr\n"                                                      \
-       ".endr\n"                                                       \
-       ".endif\n"
-
-#define OLDINSTR(oldinstr, num)                                                \
-       "661:\n\t" oldinstr "\n662:\n"                                  \
-       OLDINSTR_PADDING(oldinstr, num)                                 \
-       e_oldinstr_pad_end ":\n"                                        \
-       INSTR_LEN_SANITY_CHECK(oldinstr_len)
-
-#define OLDINSTR_2(oldinstr, num1, num2)                               \
-       "661:\n\t" oldinstr "\n662:\n"                                  \
-       ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n"         \
-       OLDINSTR_PADDING(oldinstr, num2)                                \
-       ".else\n"                                                       \
-       OLDINSTR_PADDING(oldinstr, num1)                                \
-       ".endif\n"                                                      \
-       e_oldinstr_pad_end ":\n"                                        \
-       INSTR_LEN_SANITY_CHECK(oldinstr_len)
+
+#define OLDINSTR(oldinstr) \
+       "661:\n\t" oldinstr "\n662:\n"
 
 #define ALTINSTR_ENTRY(facility, num)                                  \
        "\t.long 661b - .\n"                    /* old instruction */   \
        "\t.long " b_altinstr(num)"b - .\n"     /* alt instruction */   \
        "\t.word " __stringify(facility) "\n"   /* facility bit    */   \
-       "\t.byte " oldinstr_total_len "\n"      /* source len      */   \
-       "\t.byte " altinstr_len(num) "\n"       /* alt instruction len */
+       "\t.byte " oldinstr_len "\n"            /* instruction len */   \
+       "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n"     \
+       "\t.org . - (" altinstr_len(num) ") + (" oldinstr_len ")\n"
 
 #define ALTINSTR_REPLACEMENT(altinstr, num)    /* replacement */       \
-       b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"      \
-       INSTR_LEN_SANITY_CHECK(altinstr_len(num))
+       b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, altinstr, facility) \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(altinstr, 1)                               \
        ".popsection\n"                                                 \
-       OLDINSTR(oldinstr, 1)                                           \
+       OLDINSTR(oldinstr)                                              \
        ".pushsection .altinstructions,\"a\"\n"                         \
        ALTINSTR_ENTRY(facility, 1)                                     \
        ".popsection\n"
@@ -127,7 +74,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
        ALTINSTR_REPLACEMENT(altinstr1, 1)                              \
        ALTINSTR_REPLACEMENT(altinstr2, 2)                              \
        ".popsection\n"                                                 \
-       OLDINSTR_2(oldinstr, 1, 2)                                      \
+       OLDINSTR(oldinstr)                                              \
        ".pushsection .altinstructions,\"a\"\n"                         \
        ALTINSTR_ENTRY(facility1, 1)                                    \
        ALTINSTR_ENTRY(facility2, 2)                                    \
index fb62df5e16a2cb8cd57ab003c898d6d4f4c582b9..f24d9591aaeda810ef4220272a380ad836b9a82f 100644 (file)
        stringify_in_c(.long    (_target) - .;)                         \
        stringify_in_c(.short   (_type);)                               \
        stringify_in_c(.macro extable_reg reg;)                         \
-       stringify_in_c(.set found, 0;)                                  \
-       stringify_in_c(.set regnr, 0;)                                  \
+       stringify_in_c(.set .Lfound, 0;)                                \
+       stringify_in_c(.set .Lregnr, 0;)                                \
        stringify_in_c(.irp rs,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15;) \
        stringify_in_c(.ifc "\reg", "%%\rs";)                           \
-       stringify_in_c(.set found, 1;)                                  \
-       stringify_in_c(.short regnr;)                                   \
+       stringify_in_c(.set .Lfound, 1;)                                \
+       stringify_in_c(.short .Lregnr;)                                 \
        stringify_in_c(.endif;)                                         \
-       stringify_in_c(.set regnr, regnr+1;)                            \
+       stringify_in_c(.set .Lregnr, .Lregnr+1;)                        \
        stringify_in_c(.endr;)                                          \
-       stringify_in_c(.ifne (found != 1);)                             \
+       stringify_in_c(.ifne (.Lfound != 1);)                           \
        stringify_in_c(.error "extable_reg: bad register argument";)    \
        stringify_in_c(.endif;)                                         \
        stringify_in_c(.endm;)                                          \
index 2c057e1f32000a6d15e2c9d6317b0a66cbd36519..82de2a7c41605fca221c4d0c86553b56362c850e 100644 (file)
@@ -26,14 +26,14 @@ static __always_inline void bcr_serialize(void)
        asm volatile(__ASM_BCR_SERIALIZE : : : "memory");
 }
 
-#define mb()           bcr_serialize()
-#define rmb()          barrier()
-#define wmb()          barrier()
-#define dma_rmb()      mb()
-#define dma_wmb()      mb()
-#define __smp_mb()     mb()
-#define __smp_rmb()    rmb()
-#define __smp_wmb()    wmb()
+#define __mb()         bcr_serialize()
+#define __rmb()                barrier()
+#define __wmb()                barrier()
+#define __dma_rmb()    __mb()
+#define __dma_wmb()    __mb()
+#define __smp_mb()     __mb()
+#define __smp_rmb()    __rmb()
+#define __smp_wmb()    __wmb()
 
 #define __smp_store_release(p, v)                                      \
 do {                                                                   \
index 0b25f28351edc496ddfe728b169a4700e724cd7f..aebe1e22c7befa486bbe269383164f9a792b5c89 100644 (file)
@@ -15,7 +15,8 @@
                "1:     .asciz  \""__FILE__"\"\n"               \
                ".previous\n"                                   \
                ".section __bug_table,\"awM\",@progbits,%2\n"   \
-               "2:     .long   0b-2b,1b-2b\n"                  \
+               "2:     .long   0b-.\n"                         \
+               "       .long   1b-.\n"                         \
                "       .short  %0,%1\n"                        \
                "       .org    2b+%2\n"                        \
                ".previous\n"                                   \
@@ -30,7 +31,7 @@
        asm_inline volatile(                                    \
                "0:     mc      0,0\n"                          \
                ".section __bug_table,\"awM\",@progbits,%1\n"   \
-               "1:     .long   0b-1b\n"                        \
+               "1:     .long   0b-.\n"                         \
                "       .short  %0\n"                           \
                "       .org    1b+%1\n"                        \
                ".previous\n"                                   \
index 1effac6a01520de46093dd4a0483eb8f0a97bc44..1c4f585dd39b6c96a02ba1c73c17dadc1fdc7c0b 100644 (file)
@@ -369,7 +369,7 @@ void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
 struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
 
 /* Function from drivers/s390/cio/chsc.c */
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
 int chsc_stzi(void *page, void *result, size_t size);
 int chsc_sgib(u32 origin);
index cdc7ae72529d8a8cfbd89753a29a628cdfb91487..7d6fe813ac39ee03110e4706bc60b5f9ca9b8839 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
+#include <asm/ptrace.h>
 
 #define compat_mode_t  compat_mode_t
 typedef u16            compat_mode_t;
@@ -22,32 +23,8 @@ typedef u16          compat_mode_t;
        (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
 })
 
-#define PSW32_MASK_PER         0x40000000UL
-#define PSW32_MASK_DAT         0x04000000UL
-#define PSW32_MASK_IO          0x02000000UL
-#define PSW32_MASK_EXT         0x01000000UL
-#define PSW32_MASK_KEY         0x00F00000UL
-#define PSW32_MASK_BASE                0x00080000UL    /* Always one */
-#define PSW32_MASK_MCHECK      0x00040000UL
-#define PSW32_MASK_WAIT                0x00020000UL
-#define PSW32_MASK_PSTATE      0x00010000UL
-#define PSW32_MASK_ASC         0x0000C000UL
-#define PSW32_MASK_CC          0x00003000UL
-#define PSW32_MASK_PM          0x00000f00UL
-#define PSW32_MASK_RI          0x00000080UL
-
 #define PSW32_MASK_USER                0x0000FF00UL
 
-#define PSW32_ADDR_AMODE       0x80000000UL
-#define PSW32_ADDR_INSN                0x7FFFFFFFUL
-
-#define PSW32_DEFAULT_KEY      (((u32) PAGE_DEFAULT_ACC) << 20)
-
-#define PSW32_ASC_PRIMARY      0x00000000UL
-#define PSW32_ASC_ACCREG       0x00004000UL
-#define PSW32_ASC_SECONDARY    0x00008000UL
-#define PSW32_ASC_HOME         0x0000C000UL
-
 #define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
                         PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
                         PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
index 82388da3f95ff0257e929ed51e81314c582229c9..267a8f88e1435da57e41da2be04185aa675f176c 100644 (file)
@@ -93,7 +93,9 @@ union ctlreg0 {
                unsigned long tcx  : 1; /* Transactional-Execution control */
                unsigned long pifo : 1; /* Transactional-Execution Program-
                                           Interruption-Filtering Override */
-               unsigned long      : 22;
+               unsigned long      : 3;
+               unsigned long ccc  : 1; /* Cryptography counter control */
+               unsigned long      : 18;
                unsigned long      : 3;
                unsigned long lap  : 1; /* Low-address-protection control */
                unsigned long      : 4;
index 2f0a1cacdf858d42f94db5b7f652840af6761645..000de2b1e67a2a036e40105f7355025263ea3b81 100644 (file)
@@ -9,19 +9,21 @@
 #include <linux/uaccess.h>
 #include <asm/timex.h>
 #include <asm/fpu/api.h>
+#include <asm/pai.h>
 
 #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
 
 void do_per_trap(struct pt_regs *regs);
 
-#ifdef CONFIG_DEBUG_ENTRY
-static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 {
-       debug_user_asce(0);
+       if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+               debug_user_asce(0);
+
+       pai_kernel_enter(regs);
 }
 
-#define arch_check_user_regs arch_check_user_regs
-#endif /* CONFIG_DEBUG_ENTRY */
+#define arch_enter_from_user_mode arch_enter_from_user_mode
 
 static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
                                                        unsigned long ti_work)
@@ -44,6 +46,8 @@ static __always_inline void arch_exit_to_user_mode(void)
 
        if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
                debug_user_asce(1);
+
+       pai_kernel_exit(current_pt_regs());
 }
 
 #define arch_exit_to_user_mode arch_exit_to_user_mode
index 3f8ee257f9aa3629e33efa472722ee895c9e6daa..a405b6bb89fbecdb8267fa5febb083c35e3d66e3 100644 (file)
@@ -133,6 +133,8 @@ int ipl_report_add_certificate(struct ipl_report *report, void *key,
  * DIAG 308 support
  */
 enum diag308_subcode  {
+       DIAG308_CLEAR_RESET = 0,
+       DIAG308_LOAD_NORMAL_RESET = 1,
        DIAG308_REL_HSA = 2,
        DIAG308_LOAD_CLEAR = 3,
        DIAG308_LOAD_NORMAL_DUMP = 4,
@@ -141,6 +143,10 @@ enum diag308_subcode  {
        DIAG308_LOAD_NORMAL = 7,
 };
 
+enum diag308_subcode_flags {
+       DIAG308_FLAG_EI = 1UL << 16,
+};
+
 enum diag308_rc {
        DIAG308_RC_OK           = 0x0001,
        DIAG308_RC_NOCONFIG     = 0x0102,
index 56002aeacabf80cb4954b808ecb4e4793063407c..26fe5e535728d3923fd0f87f1269fb63a5c30def 100644 (file)
@@ -200,7 +200,10 @@ struct lowcore {
        __u64   last_break_save_area;           /* 0x1338 */
        __u32   access_regs_save_area[16];      /* 0x1340 */
        __u64   cregs_save_area[16];            /* 0x1380 */
-       __u8    pad_0x1400[0x1800-0x1400];      /* 0x1400 */
+       __u8    pad_0x1400[0x1500-0x1400];      /* 0x1400 */
+       /* Cryptography-counter designation */
+       __u64   ccd;                            /* 0x1500 */
+       __u8    pad_0x1508[0x1800-0x1508];      /* 0x1508 */
 
        /* Transaction abort diagnostic block */
        struct pgm_tdb pgm_tdb;                 /* 0x1800 */
index 29208308383019be4ffc07e878d41169ad23aa4f..af1cd3a6f4060666aee6373891b40ff4e3ece46b 100644 (file)
@@ -101,7 +101,7 @@ void nmi_alloc_mcesa_early(u64 *mcesad);
 int nmi_alloc_mcesa(u64 *mcesad);
 void nmi_free_mcesa(u64 *mcesad);
 
-void s390_handle_mcck(void);
+void s390_handle_mcck(struct pt_regs *regs);
 void __s390_handle_mcck(void);
 int s390_do_machine_check(struct pt_regs *regs);
 
index 2cfcd5ac3a8b912ae5f0ec610cdba9b8f06fc002..d910d71b5bb50e72df5def006d748ee2b23614cb 100644 (file)
        .endm
 
        .macro  __DECODE_R expand,reg
-       .set __decode_fail,1
+       .set .L__decode_fail,1
        .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
        .ifc \reg,%r\r1
        \expand \r1
-       .set __decode_fail,0
+       .set .L__decode_fail,0
        .endif
        .endr
-       .if __decode_fail == 1
+       .if .L__decode_fail == 1
        .error "__DECODE_R failed"
        .endif
        .endm
 
        .macro  __DECODE_RR expand,rsave,rtarget
-       .set __decode_fail,1
+       .set .L__decode_fail,1
        .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
        .ifc \rsave,%r\r1
        .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
        .ifc \rtarget,%r\r2
        \expand \r1,\r2
-       .set __decode_fail,0
+       .set .L__decode_fail,0
        .endif
        .endr
        .endif
        .endr
-       .if __decode_fail == 1
+       .if .L__decode_fail == 1
        .error "__DECODE_RR failed"
        .endif
        .endm
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
new file mode 100644 (file)
index 0000000..5b7e33a
--- /dev/null
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Processor Activity Instrumentation support for cryptography counters
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef _ASM_S390_PAI_H
+#define _ASM_S390_PAI_H
+
+#include <linux/jump_label.h>
+#include <asm/lowcore.h>
+#include <asm/ptrace.h>
+
+struct qpaci_info_block {
+       u64 header;
+       struct {
+               u64 : 8;
+               u64 num_cc : 8; /* # of supported crypto counters */
+               u64 : 48;
+       };
+};
+
+static inline int qpaci(struct qpaci_info_block *info)
+{
+       /* Size of info (in double words minus one) */
+       size_t size = sizeof(*info) / sizeof(u64) - 1;
+       int cc;
+
+       asm volatile(
+               "       lgr     0,%[size]\n"
+               "       .insn   s,0xb28f0000,%[info]\n"
+               "       lgr     %[size],0\n"
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               : [cc] "=d" (cc), [info] "=Q" (*info), [size] "+&d" (size)
+               :
+               : "0", "cc", "memory");
+       return cc ? (size + 1) * sizeof(u64) : 0;
+}
+
+#define PAI_CRYPTO_BASE                        0x1000  /* First event number */
+#define PAI_CRYPTO_MAXCTR              256     /* Max # of event counters */
+#define PAI_CRYPTO_KERNEL_OFFSET       2048
+
+DECLARE_STATIC_KEY_FALSE(pai_key);
+
+static __always_inline void pai_kernel_enter(struct pt_regs *regs)
+{
+       if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+               return;
+       if (!static_branch_unlikely(&pai_key))
+               return;
+       if (!S390_lowcore.ccd)
+               return;
+       if (!user_mode(regs))
+               return;
+       WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+static __always_inline void pai_kernel_exit(struct pt_regs *regs)
+{
+       if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+               return;
+       if (!static_branch_unlikely(&pai_key))
+               return;
+       if (!S390_lowcore.ccd)
+               return;
+       if (!user_mode(regs))
+               return;
+       WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+#endif
index 5dfe4758827709f6b0cf8cc57c16b4acf6666137..3bb4e7e33a0ee4b96675fe7c583210e6e34e829c 100644 (file)
@@ -17,9 +17,14 @@ extern debug_info_t *pci_debug_err_id;
                debug_text_event(pci_debug_err_id, 0, debug_buffer);            \
        } while (0)
 
+static inline void zpci_err_hex_level(int level, void *addr, int len)
+{
+       debug_event(pci_debug_err_id, level, addr, len);
+}
+
 static inline void zpci_err_hex(void *addr, int len)
 {
-       debug_event(pci_debug_err_id, 0, addr, len);
+       zpci_err_hex_level(0, addr, len);
 }
 
 #endif
index d9d5350cc3ec367d9ed241b410b5f48c0936509c..bf15da0fedbca5ed6cc0d24a6af9b348b2776fcd 100644 (file)
@@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void)
 
 static inline void __preempt_count_add(int val)
 {
-       if (__builtin_constant_p(val) && (val >= -128) && (val <= 127))
-               __atomic_add_const(val, &S390_lowcore.preempt_count);
-       else
-               __atomic_add(val, &S390_lowcore.preempt_count);
+       /*
+        * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
+        * enabled, gcc 12 fails to handle __builtin_constant_p().
+        */
+       if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
+               if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
+                       __atomic_add_const(val, &S390_lowcore.preempt_count);
+                       return;
+               }
+       }
+       __atomic_add(val, &S390_lowcore.preempt_count);
 }
 
 static inline void __preempt_count_sub(int val)
index ff1e25d515a855f666278ca3e444ccecb9b1da80..add764a2be8cadf2ef8cae76af13ed3dc276aede 100644 (file)
@@ -83,6 +83,7 @@ void cpu_detect_mhz_feature(void);
 extern const struct seq_operations cpuinfo_op;
 extern void execve_tail(void);
 extern void __bpon(void);
+unsigned long vdso_size(void);
 
 /*
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -94,9 +95,10 @@ extern void __bpon(void);
                                        (_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
 #define TASK_SIZE_MAX          (-PAGE_SIZE)
 
-#define STACK_TOP              (test_thread_flag(TIF_31BIT) ? \
-                                       _REGION3_SIZE : _REGION2_SIZE)
-#define STACK_TOP_MAX          _REGION2_SIZE
+#define VDSO_BASE              (STACK_TOP + PAGE_SIZE)
+#define VDSO_LIMIT             (test_thread_flag(TIF_31BIT) ? _REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP              (VDSO_LIMIT - vdso_size() - PAGE_SIZE)
+#define STACK_TOP_MAX          (_REGION2_SIZE - vdso_size() - PAGE_SIZE)
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
index ddb70fb13fbc99a0c374c2a455f57dbb43675013..8bae33ab320ac154fc6cb28a537877b90cfd8e0d 100644 (file)
@@ -71,6 +71,35 @@ enum {
        &(*(struct psw_bits *)(&(__psw)));      \
 }))
 
+#define PSW32_MASK_PER         0x40000000UL
+#define PSW32_MASK_DAT         0x04000000UL
+#define PSW32_MASK_IO          0x02000000UL
+#define PSW32_MASK_EXT         0x01000000UL
+#define PSW32_MASK_KEY         0x00F00000UL
+#define PSW32_MASK_BASE                0x00080000UL    /* Always one */
+#define PSW32_MASK_MCHECK      0x00040000UL
+#define PSW32_MASK_WAIT                0x00020000UL
+#define PSW32_MASK_PSTATE      0x00010000UL
+#define PSW32_MASK_ASC         0x0000C000UL
+#define PSW32_MASK_CC          0x00003000UL
+#define PSW32_MASK_PM          0x00000f00UL
+#define PSW32_MASK_RI          0x00000080UL
+
+#define PSW32_ADDR_AMODE       0x80000000UL
+#define PSW32_ADDR_INSN                0x7FFFFFFFUL
+
+#define PSW32_DEFAULT_KEY      (((u32)PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY      0x00000000UL
+#define PSW32_ASC_ACCREG       0x00004000UL
+#define PSW32_ASC_SECONDARY    0x00008000UL
+#define PSW32_ASC_HOME         0x0000C000UL
+
+typedef struct {
+       unsigned int mask;
+       unsigned int addr;
+} psw_t32 __aligned(8);
+
 #define PGM_INT_CODE_MASK      0x7f
 #define PGM_INT_CODE_PER       0x80
 
index 04cb1e7582a66acfe8198237b6f114b3f903bcc3..236b34b75ddb8daca11f013dfc2881ebc8b5c21b 100644 (file)
@@ -87,6 +87,7 @@ struct sclp_info {
        unsigned char has_diag318 : 1;
        unsigned char has_sipl : 1;
        unsigned char has_dirq : 1;
+       unsigned char has_iplcc : 1;
        unsigned int ibc;
        unsigned int mtid;
        unsigned int mtid_cp;
index a7c3ccf681da074af56d68177719d1f84508da2e..7ce584aff5bb5def0ea00f323c37d1fcdb55b30a 100644 (file)
@@ -508,9 +508,21 @@ static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
  */
 static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
 {
-       return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
-              !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
-              (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+       /* Must be status pending. */
+       if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+               return 0;
+
+       /* Must have alert status. */
+       if (!(scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS))
+               return 0;
+
+       /* Must be alone or together with primary, secondary or both,
+        * => no intermediate status.
+        */
+       if (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS)
+               return 0;
+
+       return 1;
 }
 
 /**
@@ -522,10 +534,25 @@ static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
  */
 static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
 {
-       return (scsw->cmd.fctl != 0) &&
-              (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
-              (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
-                 (scsw->cmd.actl & SCSW_ACTL_SUSPENDED));
+       /* Must indicate at least one I/O function. */
+       if (!scsw->cmd.fctl)
+               return 0;
+
+       /* Must be status pending. */
+       if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+               return 0;
+
+       /* Can be status pending alone, or with any combination of primary,
+        * secondary and alert => no intermediate status.
+        */
+       if (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS))
+               return 1;
+
+       /* If intermediate, must be suspended. */
+       if (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)
+               return 1;
+
+       return 0;
 }
 
 /**
@@ -675,9 +702,21 @@ static inline int scsw_tm_is_valid_q(union scsw *scsw)
  */
 static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
 {
-       return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
-              !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
-              (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+       /* Must be status pending. */
+       if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+               return 0;
+
+       /* Must have alert status. */
+       if (!(scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS))
+               return 0;
+
+       /* Must be alone or together with primary, secondary or both,
+        * => no intermediate status.
+        */
+       if (scsw->tm.stctl & SCSW_STCTL_INTER_STATUS)
+               return 0;
+
+       return 1;
 }
 
 /**
@@ -689,11 +728,25 @@ static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
  */
 static inline int scsw_tm_is_valid_pno(union scsw *scsw)
 {
-       return (scsw->tm.fctl != 0) &&
-              (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
-              (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
-                ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
-                 (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+       /* Must indicate at least one I/O function. */
+       if (!scsw->tm.fctl)
+               return 0;
+
+       /* Must be status pending. */
+       if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+               return 0;
+
+       /* Can be status pending alone, or with any combination of primary,
+        * secondary and alert => no intermediate status.
+        */
+       if (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS))
+               return 1;
+
+       /* If intermediate, must be suspended. */
+       if (scsw->tm.actl & SCSW_ACTL_SUSPENDED)
+               return 1;
+
+       return 0;
 }
 
 /**
index 24a54443c8652a1512f7ea3cf5a75f6324f29fe4..37127cd7749eae23729ffa20878e13b6f500dcb4 100644 (file)
@@ -77,8 +77,9 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
 static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
        typecheck(int, lp->lock);
+       kcsan_release();
        asm_inline volatile(
-               ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
+               ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
                "       sth     %1,%0\n"
                : "=R" (((unsigned short *) &lp->lock)[1])
                : "d" (0) : "cc", "memory");
index ba07463897c10e4288ed375c4ff1d3de3fefc11f..4d74d7e33340b1e67e569acebd6e911d9bf991d6 100644 (file)
@@ -44,8 +44,8 @@ struct stp_sstpi {
        u32             : 32;
        u32 ctnid[3];
        u32             : 32;
-       u32 todoff[4];
-       u32 rsvd[48];
+       u64 todoff;
+       u32 rsvd[50];
 } __packed;
 
 struct stp_tzib {
index 2cfce42aa7fc43f66af4bc191aa6177eea4f1f55..ce878e85b6e4e9a2c7c2f6f32a9dcd6bbc4087bb 100644 (file)
@@ -197,6 +197,7 @@ static inline cycles_t get_cycles(void)
 {
        return (cycles_t) get_tod_clock() >> 2;
 }
+#define get_cycles get_cycles
 
 int get_phys_clock(unsigned long *clock);
 void init_cpu_timer(void);
index 87e6cc2aeba4e1de3d3365590b7a0a112ddd0eae..95480ed9149efb5c4d2d27654903ddb0c6d69741 100644 (file)
 .macro VLM     vfrom, vto, disp, base, hint=3
        VX_NUM  v1, \vfrom
        VX_NUM  v3, \vto
-       GR_NUM  b2, \base           /* Base register */
+       GR_NUM  b2, \base
        .word   0xE700 | ((v1&15) << 4) | (v3&15)
        .word   (b2 << 12) | (\disp)
        MRXBOPC \hint, 0x36, v1, v3
 .macro VST     vr1, disp, index="%r0", base
        VX_NUM  v1, \vr1
        GR_NUM  x2, \index
-       GR_NUM  b2, \base           /* Base register */
+       GR_NUM  b2, \base
        .word   0xE700 | ((v1&15) << 4) | (x2&15)
        .word   (b2 << 12) | (\disp)
        MRXBOPC 0, 0x0E, v1
 .macro VSTM    vfrom, vto, disp, base, hint=3
        VX_NUM  v1, \vfrom
        VX_NUM  v3, \vto
-       GR_NUM  b2, \base           /* Base register */
+       GR_NUM  b2, \base
        .word   0xE700 | ((v1&15) << 4) | (v3&15)
        .word   (b2 << 12) | (\disp)
        MRXBOPC \hint, 0x3E, v1, v3
index 7349e96d28a0d3a1b7d92de47b1eb149a940a814..924b876f992c15e0efc4b8cf7094183987cc8bc3 100644 (file)
@@ -171,7 +171,7 @@ struct pkey_skey2pkey {
 #define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
 
 /*
- * Verify the given CCA AES secure key for being able to be useable with
+ * Verify the given CCA AES secure key for being able to be usable with
  * the pkey module. Check for correct key type and check for having at
  * least one crypto card being able to handle this key (master key
  * or old master key verification pattern matches).
index 2f04a5499d742d03837f19dedb038add46d4f8a3..d83713f67530943945e9627008f4cf8ded26d748 100644 (file)
@@ -4,7 +4,7 @@
  *
  *  zcrypt 2.2.1 (user-visible header)
  *
- *  Copyright IBM Corp. 2001, 2019
+ *  Copyright IBM Corp. 2001, 2022
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
@@ -85,7 +85,7 @@ struct ica_rsa_modexpo_crt {
 struct CPRBX {
        __u16        cprb_len;          /* CPRB length        220        */
        __u8         cprb_ver_id;       /* CPRB version id.   0x02       */
-       __u8         pad_000[3];        /* Alignment pad bytes           */
+       __u8         _pad_000[3];       /* Alignment pad bytes           */
        __u8         func_id[2];        /* function id        0x5432     */
        __u8         cprb_flags[4];     /* Flags                         */
        __u32        req_parml;         /* request parameter buffer len  */
@@ -95,19 +95,19 @@ struct CPRBX {
        __u32        rpl_datal;         /* reply data block len          */
        __u32        rpld_datal;        /* replied data block len        */
        __u32        req_extbl;         /* request extension block len   */
-       __u8         pad_001[4];        /* reserved                      */
+       __u8         _pad_001[4];       /* reserved                      */
        __u32        rpld_extbl;        /* replied extension block len   */
-       __u8         padx000[16 - sizeof(__u8 *)];
+       __u8         _pad_002[16 - sizeof(__u8 *)];
        __u8 __user *req_parmb;         /* request parm block 'address'  */
-       __u8         padx001[16 - sizeof(__u8 *)];
+       __u8         _pad_003[16 - sizeof(__u8 *)];
        __u8 __user *req_datab;         /* request data block 'address'  */
-       __u8         padx002[16 - sizeof(__u8 *)];
+       __u8         _pad_004[16 - sizeof(__u8 *)];
        __u8 __user *rpl_parmb;         /* reply parm block 'address'    */
-       __u8         padx003[16 - sizeof(__u8 *)];
+       __u8         _pad_005[16 - sizeof(__u8 *)];
        __u8 __user *rpl_datab;         /* reply data block 'address'    */
-       __u8         padx004[16 - sizeof(__u8 *)];
+       __u8         _pad_006[16 - sizeof(__u8 *)];
        __u8 __user *req_extb;          /* request extension block 'addr'*/
-       __u8         padx005[16 - sizeof(__u8 *)];
+       __u8         _pad_007[16 - sizeof(__u8 *)];
        __u8 __user *rpl_extb;          /* reply extension block 'address'*/
        __u16        ccp_rtcode;        /* server return code            */
        __u16        ccp_rscode;        /* server reason code            */
@@ -115,12 +115,10 @@ struct CPRBX {
        __u8         logon_id[8];       /* Logon Identifier              */
        __u8         mac_value[8];      /* Mac Value                     */
        __u8         mac_content_flgs;  /* Mac content flag byte         */
-       __u8         pad_002;           /* Alignment                     */
+       __u8         _pad_008;          /* Alignment                     */
        __u16        domain;            /* Domain                        */
-       __u8         usage_domain[4];   /* Usage domain                  */
-       __u8         cntrl_domain[4];   /* Control domain                */
-       __u8         S390enf_mask[4];   /* S/390 enforcement mask        */
-       __u8         pad_004[36];       /* reserved                      */
+       __u8         _pad_009[12];      /* reserved, checked for zeros   */
+       __u8         _pad_010[36];      /* reserved                      */
 } __attribute__((packed));
 
 /**
@@ -238,8 +236,8 @@ struct zcrypt_device_matrix_ext {
 };
 
 #define AUTOSELECT  0xFFFFFFFF
-#define AUTOSEL_AP  ((__u16) 0xFFFF)
-#define AUTOSEL_DOM ((__u16) 0xFFFF)
+#define AUTOSEL_AP  ((__u16)0xFFFF)
+#define AUTOSEL_DOM ((__u16)0xFFFF)
 
 #define ZCRYPT_IOCTL_MAGIC 'z'
 
@@ -305,12 +303,12 @@ struct zcrypt_device_matrix_ext {
 /**
  * Supported ioctl calls
  */
-#define ICARSAMODEXPO  _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
-#define ICARSACRT      _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
-#define ZSECSENDCPRB   _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
-#define ZSENDEP11CPRB  _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+#define ICARSAMODEXPO  _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT      _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB   _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB  _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
 
-#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
 #define ZCRYPT_STATUS_MASK   _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
 #define ZCRYPT_QDEPTH_MASK   _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
 #define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
@@ -352,7 +350,7 @@ struct zcrypt_device_matrix {
 };
 
 /* Deprecated: use ZCRYPT_DEVICE_STATUS */
-#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+#define ZDEVICESTATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
 /* Deprecated: use ZCRYPT_STATUS_MASK */
 #define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
 /* Deprecated: use ZCRYPT_QDEPTH_MASK */
index c8d1b6aa823e386ea91d1d13dbf8e78f88655b08..5851041bb214c9a62f111fa545c1f880785663e4 100644 (file)
@@ -72,6 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)  += ima_arch.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_cpum_cf_common.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_cpum_cf.o perf_cpum_sf.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_cpum_cf_events.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS)      += perf_pai_crypto.o
 
 obj-$(CONFIG_TRACEPOINTS)      += trace.o
 obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))  += uv.o
index cce0ddee2d02d2731ef36635a18082f9b13f76c3..e7bca29f9c3485bead01a70b6935f3f9041b7f8e 100644 (file)
@@ -7,8 +7,6 @@
 #include <asm/facility.h>
 #include <asm/nospec-branch.h>
 
-#define MAX_PATCH_LEN (255 - 1)
-
 static int __initdata_or_module alt_instr_disabled;
 
 static int __init disable_alternative_instructions(char *str)
@@ -19,85 +17,30 @@ static int __init disable_alternative_instructions(char *str)
 
 early_param("noaltinstr", disable_alternative_instructions);
 
-struct brcl_insn {
-       u16 opc;
-       s32 disp;
-} __packed;
-
-static u16 __initdata_or_module nop16 = 0x0700;
-static u32 __initdata_or_module nop32 = 0x47000000;
-static struct brcl_insn __initdata_or_module nop48 = {
-       0xc004, 0
-};
-
-static const void *nops[] __initdata_or_module = {
-       &nop16,
-       &nop32,
-       &nop48
-};
-
-static void __init_or_module add_jump_padding(void *insns, unsigned int len)
-{
-       struct brcl_insn brcl = {
-               0xc0f4,
-               len / 2
-       };
-
-       memcpy(insns, &brcl, sizeof(brcl));
-       insns += sizeof(brcl);
-       len -= sizeof(brcl);
-
-       while (len > 0) {
-               memcpy(insns, &nop16, 2);
-               insns += 2;
-               len -= 2;
-       }
-}
-
-static void __init_or_module add_padding(void *insns, unsigned int len)
-{
-       if (len > 6)
-               add_jump_padding(insns, len);
-       else if (len >= 2)
-               memcpy(insns, nops[len / 2 - 1], len);
-}
-
 static void __init_or_module __apply_alternatives(struct alt_instr *start,
                                                  struct alt_instr *end)
 {
        struct alt_instr *a;
        u8 *instr, *replacement;
-       u8 insnbuf[MAX_PATCH_LEN];
 
        /*
         * The scan order should be from start to end. A later scanned
         * alternative code can overwrite previously scanned alternative code.
         */
        for (a = start; a < end; a++) {
-               int insnbuf_sz = 0;
-
                instr = (u8 *)&a->instr_offset + a->instr_offset;
                replacement = (u8 *)&a->repl_offset + a->repl_offset;
 
                if (!__test_facility(a->facility, alt_stfle_fac_list))
                        continue;
 
-               if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
+               if (unlikely(a->instrlen % 2)) {
                        WARN_ONCE(1, "cpu alternatives instructions length is "
                                     "odd, skipping patching\n");
                        continue;
                }
 
-               memcpy(insnbuf, replacement, a->replacementlen);
-               insnbuf_sz = a->replacementlen;
-
-               if (a->instrlen > a->replacementlen) {
-                       add_padding(insnbuf + a->replacementlen,
-                                   a->instrlen - a->replacementlen);
-                       insnbuf_sz += a->instrlen - a->replacementlen;
-               }
-
-               s390_kernel_write(instr, insnbuf, insnbuf_sz);
+               s390_kernel_write(instr, replacement, a->instrlen);
        }
 }
 
index 64509e7dbd3bf2ede50f60ebcd11aeab60b31ca0..ef23739b277c78572897911a71e407c920985a40 100644 (file)
@@ -5,69 +5,59 @@
 #include <linux/compat.h>
 #include <linux/socket.h>
 #include <linux/syscalls.h>
+#include <asm/ptrace.h>
 
-/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
-/*       to a 64 bit pointer */
-#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
-#define AA(__x)                                \
-       ((unsigned long)(__x))
+/*
+ * Macro that masks the high order bit of a 32 bit pointer and
+ * converts it to a 64 bit pointer.
+ */
+#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x)        ((unsigned long)(__x))
 
 /* Now 32bit compatibility types */
 struct ipc_kludge_32 {
-        __u32   msgp;                           /* pointer              */
-        __s32   msgtyp;
+       __u32   msgp;   /* pointer */
+       __s32   msgtyp;
 };
 
 /* asm/sigcontext.h */
-typedef union
-{
-       __u64   d;
-       __u32   f; 
+typedef union {
+       __u64   d;
+       __u32   f;
 } freg_t32;
 
-typedef struct
-{
+typedef struct {
        unsigned int    fpc;
        unsigned int    pad;
-       freg_t32        fprs[__NUM_FPRS];              
+       freg_t32        fprs[__NUM_FPRS];
 } _s390_fp_regs32;
 
-typedef struct 
-{
-        __u32   mask;
-        __u32  addr;
-} _psw_t32 __attribute__ ((aligned(8)));
-
-typedef struct
-{
-       _psw_t32        psw;
+typedef struct {
+       psw_t32         psw;
        __u32           gprs[__NUM_GPRS];
        __u32           acrs[__NUM_ACRS];
 } _s390_regs_common32;
 
-typedef struct
-{
+typedef struct {
        _s390_regs_common32 regs;
-       _s390_fp_regs32     fpregs;
+       _s390_fp_regs32     fpregs;
 } _sigregs32;
 
-typedef struct
-{
-       __u32 gprs_high[__NUM_GPRS];
-       __u64 vxrs_low[__NUM_VXRS_LOW];
-       __vector128 vxrs_high[__NUM_VXRS_HIGH];
-       __u8 __reserved[128];
+typedef struct {
+       __u32           gprs_high[__NUM_GPRS];
+       __u64           vxrs_low[__NUM_VXRS_LOW];
+       __vector128     vxrs_high[__NUM_VXRS_HIGH];
+       __u8            __reserved[128];
 } _sigregs_ext32;
 
 #define _SIGCONTEXT_NSIG32     64
 #define _SIGCONTEXT_NSIG_BPW32 32
 #define __SIGNAL_FRAMESIZE32   96
-#define _SIGMASK_COPY_SIZE32   (sizeof(u32)*2)
+#define _SIGMASK_COPY_SIZE32   (sizeof(u32) * 2)
 
-struct sigcontext32
-{
+struct sigcontext32 {
        __u32   oldmask[_COMPAT_NSIG_WORDS];
-       __u32   sregs;                          /* pointer */
+       __u32   sregs;  /* pointer */
 };
 
 /* asm/signal.h */
@@ -75,11 +65,11 @@ struct sigcontext32
 /* asm/ucontext.h */
 struct ucontext32 {
        __u32                   uc_flags;
-       __u32                   uc_link;        /* pointer */   
+       __u32                   uc_link;        /* pointer */
        compat_stack_t          uc_stack;
        _sigregs32              uc_mcontext;
        compat_sigset_t         uc_sigmask;
-       /* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+       /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
        unsigned char           __unused[128 - sizeof(compat_sigset_t)];
        _sigregs_ext32          uc_mcontext_ext;
 };
@@ -88,25 +78,6 @@ struct stat64_emu31;
 struct mmap_arg_struct_emu31;
 struct fadvise64_64_args;
 
-long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
-long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
-long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
-long compat_sys_s390_setregid16(u16 rgid, u16 egid);
-long compat_sys_s390_setgid16(u16 gid);
-long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
-long compat_sys_s390_setuid16(u16 uid);
-long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
-long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long compat_sys_s390_setfsuid16(u16 uid);
-long compat_sys_s390_setfsgid16(u16 gid);
-long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
-long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
-long compat_sys_s390_getuid16(void);
-long compat_sys_s390_geteuid16(void);
-long compat_sys_s390_getgid16(void);
-long compat_sys_s390_getegid16(void);
 long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
 long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
 long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
@@ -118,8 +89,8 @@ long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbu
 long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
 long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
 long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
-long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count);
 long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
 long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
 long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
index 59b69c8ab5e1f5ef37d9953ca189110662bf2754..df41132ccd06898c03fc0d40d92535d190f7ce80 100644 (file)
@@ -53,19 +53,19 @@ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 _LPP_OFFSET    = __LC_LPP
 
        .macro STBEAR address
-       ALTERNATIVE "", ".insn  s,0xb2010000,\address", 193
+       ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
        .endm
 
        .macro LBEAR address
-       ALTERNATIVE "", ".insn  s,0xb2000000,\address", 193
+       ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
        .endm
 
        .macro LPSWEY address,lpswe
-       ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193
+       ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
        .endm
 
        .macro MBEAR reg
-       ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+       ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
        .endm
 
        .macro  CHECK_STACK savearea
@@ -121,16 +121,16 @@ _LPP_OFFSET       = __LC_LPP
        .endm
 
        .macro BPOFF
-       ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,12,0", 82
+       ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
        .endm
 
        .macro BPON
-       ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,13,0", 82
+       ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
        .endm
 
        .macro BPENTER tif_ptr,tif_mask
        ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
-                   "", 82
+                   "j .+12; nop; nop", 82
        .endm
 
        .macro BPEXIT tif_ptr,tif_mask
@@ -172,9 +172,19 @@ _LPP_OFFSET        = __LC_LPP
        lgr     %r14,\reg
        larl    %r13,\start
        slgr    %r14,%r13
-       lghi    %r13,\end - \start
-       clgr    %r14,%r13
+#ifdef CONFIG_AS_IS_LLVM
+       clgfrl  %r14,.Lrange_size\@
+#else
+       clgfi   %r14,\end - \start
+#endif
        jhe     \outside_label
+#ifdef CONFIG_AS_IS_LLVM
+       .section .rodata, "a"
+       .align 4
+.Lrange_size\@:
+       .long   \end - \start
+       .previous
+#endif
        .endm
 
        .macro SIEEXIT
@@ -226,7 +236,7 @@ ENTRY(__switch_to)
        aghi    %r3,__TASK_pid
        mvc     __LC_CURRENT_PID(4,%r0),0(%r3)  # store pid of next
        lmg     %r6,%r15,__SF_GPRS(%r15)        # load gprs of next task
-       ALTERNATIVE "", "lpp _LPP_OFFSET", 40
+       ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
        BR_EX   %r14
 ENDPROC(__switch_to)
 
@@ -473,10 +483,7 @@ ENTRY(\name)
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
        MBEAR   %r11
        stmg    %r8,%r9,__PT_PSW(%r11)
-       tm      %r8,0x0001              # coming from user space?
-       jno     1f
-       lctlg   %c1,%c1,__LC_KERNEL_ASCE
-1:     lgr     %r2,%r11                # pass pointer to pt_regs
+       lgr     %r2,%r11                # pass pointer to pt_regs
        brasl   %r14,\handler
        mvc     __LC_RETURN_PSW(16),__PT_PSW(%r11)
        tmhh    %r8,0x0001              # returning to user ?
@@ -602,6 +609,7 @@ ENTRY(mcck_int_handler)
        mvc     STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
        xc      __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
        la      %r11,STACK_FRAME_OVERHEAD(%r1)
+       lgr     %r2,%r11
        lgr     %r15,%r1
        brasl   %r14,s390_handle_mcck
 .Lmcck_return:
@@ -612,7 +620,7 @@ ENTRY(mcck_int_handler)
        jno     0f
        BPEXIT  __TI_flags(%r12),_TIF_ISOLATE_BP
        stpt    __LC_EXIT_TIMER
-0:     ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+0:     ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
        LBEAR   0(%r12)
        lmg     %r11,%r15,__PT_R11(%r11)
        LPSWEY  __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -648,7 +656,7 @@ ENTRY(mcck_int_handler)
 ENDPROC(mcck_int_handler)
 
 ENTRY(restart_int_handler)
-       ALTERNATIVE "", "lpp _LPP_OFFSET", 40
+       ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
        stg     %r15,__LC_SAVE_AREA_RESTART
        TSTMSK  __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
        jz      0f
index 3033f616e256939a8d62db8ab90a7a623bddabe2..45393919fe615cb9da6799d60d7e4bbcecc2f048 100644 (file)
@@ -205,7 +205,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
        unsigned long flags;
        int cpu;
 
-       irq_lock_sparse();
+       rcu_read_lock();
        desc = irq_to_desc(irq);
        if (!desc)
                goto out;
@@ -224,7 +224,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
        seq_putc(p, '\n');
        raw_spin_unlock_irqrestore(&desc->lock, flags);
 out:
-       irq_unlock_sparse();
+       rcu_read_unlock();
 }
 
 /*
index 6ebf02e15c85812bf4a42ff4afa5d547f5d9171a..ab761c008f9813e8a4526b05e1947c244c1e3c6b 100644 (file)
 #include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/nmi.h>
+#include <asm/sclp.h>
 
-typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long,
+                                 unsigned long);
 
 extern const unsigned char relocate_kernel[];
 extern const unsigned long long relocate_kernel_len;
@@ -243,6 +245,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
  */
 static void __do_machine_kexec(void *data)
 {
+       unsigned long diag308_subcode;
        relocate_kernel_t data_mover;
        struct kimage *image = data;
 
@@ -251,7 +254,10 @@ static void __do_machine_kexec(void *data)
 
        __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
        /* Call the moving routine */
-       (*data_mover)(&image->head, image->start);
+       diag308_subcode = DIAG308_CLEAR_RESET;
+       if (sclp.has_iplcc)
+               diag308_subcode |= DIAG308_FLAG_EI;
+       (*data_mover)(&image->head, image->start, diag308_subcode);
 
        /* Die if kexec returns */
        disabled_wait();
index fc60e29b8690def8ca377669a7f014fcbdab087b..53ed3884fe644d9df5d8094e0d50dc42177a0dd3 100644 (file)
@@ -29,6 +29,8 @@
 #include <asm/switch_to.h>
 #include <asm/ctl_reg.h>
 #include <asm/asm-offsets.h>
+#include <asm/pai.h>
+
 #include <linux/kvm_host.h>
 
 struct mcck_struct {
@@ -169,10 +171,12 @@ void __s390_handle_mcck(void)
        }
 }
 
-void noinstr s390_handle_mcck(void)
+void noinstr s390_handle_mcck(struct pt_regs *regs)
 {
        trace_hardirqs_off();
+       pai_kernel_enter(regs);
        __s390_handle_mcck();
+       pai_kernel_exit(regs);
        trace_hardirqs_on();
 }
 /*
index 52c1fe23b8232af364ebf60d2f8f55f17c69e9ea..0d64aafd158f2256bc60c570e557ef1735baf053 100644 (file)
@@ -295,6 +295,76 @@ CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
 CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
 CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7);
+CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
 
 static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
        CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -635,6 +705,80 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
        NULL,
 };
 
+static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
+       CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES),
+       CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES),
+       CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES),
+       CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY),
+       CPUMF_EVENT_PTR(cf_z16, TX_C_TEND),
+       CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND),
+       CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES),
+       CPUMF_EVENT_PTR(cf_z16, DCW_REQ),
+       CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV),
+       CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER),
+       CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, ICW_REQ),
+       CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV),
+       CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER),
+       CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY),
+       CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS),
+       CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS),
+       CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS),
+       CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS),
+       CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT),
+       CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL),
+       CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL),
+       CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS),
+       CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES),
+       CPUMF_EVENT_PTR(cf_z16, SORTL),
+       CPUMF_EVENT_PTR(cf_z16, DFLT_CC),
+       CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH),
+       CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS),
+       CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS),
+       CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK),
+       CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK),
+       CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+       CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+       NULL,
+};
+
 /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
 
 static struct attribute_group cpumcf_pmu_events_group = {
@@ -749,6 +893,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
        case 0x8562:
                model = cpumcf_z15_pmu_event_attr;
                break;
+       case 0x3931:
+       case 0x3932:
+               model = cpumcf_z16_pmu_event_attr;
+               break;
        default:
                model = none;
                break;
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
new file mode 100644 (file)
index 0000000..8c15459
--- /dev/null
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_crypto"
+#define pr_fmt(fmt)    KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *cfm_dbg;
+static unsigned int paicrypt_cnt;      /* Size of the mapped counter sets */
+                                       /* extracted with QPACI instruction */
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+struct pai_userdata {
+       u16 num;
+       u64 value;
+} __packed;
+
+struct paicrypt_map {
+       unsigned long *page;            /* Page for CPU to store counters */
+       struct pai_userdata *save;      /* Page to store no-zero counters */
+       unsigned int users;             /* # of PAI crypto users */
+       unsigned int sampler;           /* # of PAI crypto samplers */
+       unsigned int counter;           /* # of PAI crypto counters */
+       struct perf_event *event;       /* Perf event for sampling */
+};
+
+static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+       struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+
+       cpump->event = NULL;
+       static_branch_dec(&pai_key);
+       mutex_lock(&pai_reserve_mutex);
+       if (event->attr.sample_period)
+               cpump->sampler -= 1;
+       else
+               cpump->counter -= 1;
+       debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d"
+                           " sampler %d counter %d\n", __func__,
+                           event->attr.config, event->cpu, cpump->sampler,
+                           cpump->counter);
+       if (!cpump->counter && !cpump->sampler) {
+               debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
+                                   __func__, (unsigned long)cpump->page,
+                                   cpump->save);
+               free_page((unsigned long)cpump->page);
+               cpump->page = NULL;
+               kvfree(cpump->save);
+               cpump->save = NULL;
+       }
+       mutex_unlock(&pai_reserve_mutex);
+}
+
+static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+{
+       if (kernel)
+               nr += PAI_CRYPTO_MAXCTR;
+       return cpump->page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For event
+ * CRYPTO_ALL sum up all events.
+ */
+static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
+{
+       struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+       u64 sum = 0;
+       int i;
+
+       if (event->attr.config != PAI_CRYPTO_BASE) {
+               return paicrypt_getctr(cpump,
+                                      event->attr.config - PAI_CRYPTO_BASE,
+                                      kernel);
+       }
+
+       for (i = 1; i <= paicrypt_cnt; i++) {
+               u64 val = paicrypt_getctr(cpump, i, kernel);
+
+               if (!val)
+                       continue;
+               sum += val;
+       }
+       return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+       u64 sum = 0;
+
+       if (!event->attr.exclude_kernel)
+               sum += paicrypt_getdata(event, true);
+       if (!event->attr.exclude_user)
+               sum += paicrypt_getdata(event, false);
+
+       return sum;
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for crypto events
+ *
+ * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+{
+       unsigned int *use_ptr;
+       int rc = 0;
+
+       mutex_lock(&pai_reserve_mutex);
+       if (a->sample_period) {         /* Sampling requested */
+               use_ptr = &cpump->sampler;
+               if (cpump->counter || cpump->sampler)
+                       rc = -EBUSY;    /* ... sampling/counting active */
+       } else {                        /* Counting requested */
+               use_ptr = &cpump->counter;
+               if (cpump->sampler)
+                       rc = -EBUSY;    /* ... and sampling active */
+       }
+       if (rc)
+               goto unlock;
+
+       /* Allocate memory for counter page and counter extraction.
+        * Only the first counting event has to allocate a page.
+        */
+       if (cpump->page)
+               goto unlock;
+
+       rc = -ENOMEM;
+       cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+       if (!cpump->page)
+               goto unlock;
+       cpump->save = kvmalloc_array(paicrypt_cnt + 1,
+                                    sizeof(struct pai_userdata), GFP_KERNEL);
+       if (!cpump->save) {
+               free_page((unsigned long)cpump->page);
+               cpump->page = NULL;
+               goto unlock;
+       }
+       rc = 0;
+
+unlock:
+       /* If rc is non-zero, do not increment counter/sampler. */
+       if (!rc)
+               *use_ptr += 1;
+       debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx sampler %d"
+                           " counter %d page %#lx save %p rc %d\n", __func__,
+                           a->sample_period, cpump->sampler, cpump->counter,
+                           (unsigned long)cpump->page, cpump->save, rc);
+       mutex_unlock(&pai_reserve_mutex);
+       return rc;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paicrypt_event_init(struct perf_event *event)
+{
+       struct perf_event_attr *a = &event->attr;
+       struct paicrypt_map *cpump;
+       int rc;
+
+       /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+       if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+               return -ENOENT;
+       /* PAI crypto event must be valid */
+       if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+               return -EINVAL;
+       /* Allow only CPU wide operation, no process context for now. */
+       if (event->hw.target || event->cpu == -1)
+               return -ENOENT;
+       /* Allow only CRYPTO_ALL for sampling. */
+       if (a->sample_period && a->config != PAI_CRYPTO_BASE)
+               return -EINVAL;
+
+       cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+       rc = paicrypt_busy(a, cpump);
+       if (rc)
+               return rc;
+
+       cpump->event = event;
+       event->destroy = paicrypt_event_destroy;
+
+       if (a->sample_period) {
+               a->sample_period = 1;
+               a->freq = 0;
+               /* Register for paicrypt_sched_task() to be called */
+               event->attach_state |= PERF_ATTACH_SCHED_CB;
+               /* Add raw data which contain the memory mapped counters */
+               a->sample_type |= PERF_SAMPLE_RAW;
+               /* Turn off inheritance */
+               a->inherit = 0;
+       }
+
+       static_branch_inc(&pai_key);
+       return 0;
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+       u64 prev, new, delta;
+
+       prev = local64_read(&event->hw.prev_count);
+       new = paicrypt_getall(event);
+       local64_set(&event->hw.prev_count, new);
+       delta = (prev <= new) ? new - prev
+                             : (-1ULL - prev) + new + 1;        /* overflow */
+       local64_add(delta, &event->count);
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+       u64 sum;
+
+       sum = paicrypt_getall(event);           /* Get current value */
+       local64_set(&event->hw.prev_count, sum);
+       local64_set(&event->count, 0);
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+       struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+       unsigned long ccd;
+
+       if (cpump->users++ == 0) {
+               ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
+               WRITE_ONCE(S390_lowcore.ccd, ccd);
+               __ctl_set_bit(0, 50);
+       }
+       cpump->event = event;
+       if (flags & PERF_EF_START && !event->attr.sample_period) {
+               /* Only counting needs initial counter value */
+               paicrypt_start(event, PERF_EF_RELOAD);
+       }
+       event->hw.state = 0;
+       if (event->attr.sample_period)
+               perf_sched_cb_inc(event->pmu);
+       return 0;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+       paicrypt_read(event);
+       event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+       struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+
+       if (event->attr.sample_period)
+               perf_sched_cb_dec(event->pmu);
+       if (!event->attr.sample_period)
+               /* Only counting needs to read counter */
+               paicrypt_stop(event, PERF_EF_UPDATE);
+       if (cpump->users-- == 1) {
+               __ctl_clear_bit(0, 50);
+               WRITE_ONCE(S390_lowcore.ccd, 0);
+       }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paicrypt_copy(struct pai_userdata *userdata,
+                           struct paicrypt_map *cpump,
+                           bool exclude_user, bool exclude_kernel)
+{
+       int i, outidx = 0;
+
+       for (i = 1; i <= paicrypt_cnt; i++) {
+               u64 val = 0;
+
+               if (!exclude_kernel)
+                       val += paicrypt_getctr(cpump, i, true);
+               if (!exclude_user)
+                       val += paicrypt_getctr(cpump, i, false);
+               if (val) {
+                       userdata[outidx].num = i;
+                       userdata[outidx].value = val;
+                       outidx++;
+               }
+       }
+       return outidx * sizeof(struct pai_userdata);
+}
+
+static int paicrypt_push_sample(void)
+{
+       struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+       struct perf_event *event = cpump->event;
+       struct perf_sample_data data;
+       struct perf_raw_record raw;
+       struct pt_regs regs;
+       size_t rawsize;
+       int overflow;
+
+       if (!cpump->event)              /* No event active */
+               return 0;
+       rawsize = paicrypt_copy(cpump->save, cpump,
+                               cpump->event->attr.exclude_user,
+                               cpump->event->attr.exclude_kernel);
+       if (!rawsize)                   /* No incremented counters */
+               return 0;
+
+       /* Setup perf sample */
+       memset(&regs, 0, sizeof(regs));
+       memset(&raw, 0, sizeof(raw));
+       memset(&data, 0, sizeof(data));
+       perf_sample_data_init(&data, 0, event->hw.last_period);
+       if (event->attr.sample_type & PERF_SAMPLE_TID) {
+               data.tid_entry.pid = task_tgid_nr(current);
+               data.tid_entry.tid = task_pid_nr(current);
+       }
+       if (event->attr.sample_type & PERF_SAMPLE_TIME)
+               data.time = event->clock();
+       if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+               data.id = event->id;
+       if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+               data.cpu_entry.cpu = smp_processor_id();
+               data.cpu_entry.reserved = 0;
+       }
+       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+               raw.frag.size = rawsize;
+               raw.frag.data = cpump->save;
+               raw.size = raw.frag.size;
+               data.raw = &raw;
+       }
+
+       overflow = perf_event_overflow(event, &data, &regs);
+       perf_event_update_userpage(event);
+       /* Clear lowcore page after read */
+       memset(cpump->page, 0, PAGE_SIZE);
+       return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       /* We started with a clean page on event installation. So read out
+        * results on schedule_out and if page was dirty, clear values.
+        */
+       if (!sched_in)
+               paicrypt_push_sample();
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+       .name = "events",
+       .attrs = NULL                   /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+       .name = "format",
+       .attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+       &paicrypt_events_group,
+       &paicrypt_format_group,
+       NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+       .task_ctx_nr  = perf_invalid_context,
+       .event_init   = paicrypt_event_init,
+       .add          = paicrypt_add,
+       .del          = paicrypt_del,
+       .start        = paicrypt_start,
+       .stop         = paicrypt_stop,
+       .read         = paicrypt_read,
+       .sched_task   = paicrypt_sched_task,
+       .attr_groups  = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+       [0] = "CRYPTO_ALL",
+       [1] = "KM_DEA",
+       [2] = "KM_TDEA_128",
+       [3] = "KM_TDEA_192",
+       [4] = "KM_ENCRYPTED_DEA",
+       [5] = "KM_ENCRYPTED_TDEA_128",
+       [6] = "KM_ENCRYPTED_TDEA_192",
+       [7] = "KM_AES_128",
+       [8] = "KM_AES_192",
+       [9] = "KM_AES_256",
+       [10] = "KM_ENCRYPTED_AES_128",
+       [11] = "KM_ENCRYPTED_AES_192",
+       [12] = "KM_ENCRYPTED_AES_256",
+       [13] = "KM_XTS_AES_128",
+       [14] = "KM_XTS_AES_256",
+       [15] = "KM_XTS_ENCRYPTED_AES_128",
+       [16] = "KM_XTS_ENCRYPTED_AES_256",
+       [17] = "KMC_DEA",
+       [18] = "KMC_TDEA_128",
+       [19] = "KMC_TDEA_192",
+       [20] = "KMC_ENCRYPTED_DEA",
+       [21] = "KMC_ENCRYPTED_TDEA_128",
+       [22] = "KMC_ENCRYPTED_TDEA_192",
+       [23] = "KMC_AES_128",
+       [24] = "KMC_AES_192",
+       [25] = "KMC_AES_256",
+       [26] = "KMC_ENCRYPTED_AES_128",
+       [27] = "KMC_ENCRYPTED_AES_192",
+       [28] = "KMC_ENCRYPTED_AES_256",
+       [29] = "KMC_PRNG",
+       [30] = "KMA_GCM_AES_128",
+       [31] = "KMA_GCM_AES_192",
+       [32] = "KMA_GCM_AES_256",
+       [33] = "KMA_GCM_ENCRYPTED_AES_128",
+       [34] = "KMA_GCM_ENCRYPTED_AES_192",
+       [35] = "KMA_GCM_ENCRYPTED_AES_256",
+       [36] = "KMF_DEA",
+       [37] = "KMF_TDEA_128",
+       [38] = "KMF_TDEA_192",
+       [39] = "KMF_ENCRYPTED_DEA",
+       [40] = "KMF_ENCRYPTED_TDEA_128",
+       [41] = "KMF_ENCRYPTED_TDEA_192",
+       [42] = "KMF_AES_128",
+       [43] = "KMF_AES_192",
+       [44] = "KMF_AES_256",
+       [45] = "KMF_ENCRYPTED_AES_128",
+       [46] = "KMF_ENCRYPTED_AES_192",
+       [47] = "KMF_ENCRYPTED_AES_256",
+       [48] = "KMCTR_DEA",
+       [49] = "KMCTR_TDEA_128",
+       [50] = "KMCTR_TDEA_192",
+       [51] = "KMCTR_ENCRYPTED_DEA",
+       [52] = "KMCTR_ENCRYPTED_TDEA_128",
+       [53] = "KMCTR_ENCRYPTED_TDEA_192",
+       [54] = "KMCTR_AES_128",
+       [55] = "KMCTR_AES_192",
+       [56] = "KMCTR_AES_256",
+       [57] = "KMCTR_ENCRYPTED_AES_128",
+       [58] = "KMCTR_ENCRYPTED_AES_192",
+       [59] = "KMCTR_ENCRYPTED_AES_256",
+       [60] = "KMO_DEA",
+       [61] = "KMO_TDEA_128",
+       [62] = "KMO_TDEA_192",
+       [63] = "KMO_ENCRYPTED_DEA",
+       [64] = "KMO_ENCRYPTED_TDEA_128",
+       [65] = "KMO_ENCRYPTED_TDEA_192",
+       [66] = "KMO_AES_128",
+       [67] = "KMO_AES_192",
+       [68] = "KMO_AES_256",
+       [69] = "KMO_ENCRYPTED_AES_128",
+       [70] = "KMO_ENCRYPTED_AES_192",
+       [71] = "KMO_ENCRYPTED_AES_256",
+       [72] = "KIMD_SHA_1",
+       [73] = "KIMD_SHA_256",
+       [74] = "KIMD_SHA_512",
+       [75] = "KIMD_SHA3_224",
+       [76] = "KIMD_SHA3_256",
+       [77] = "KIMD_SHA3_384",
+       [78] = "KIMD_SHA3_512",
+       [79] = "KIMD_SHAKE_128",
+       [80] = "KIMD_SHAKE_256",
+       [81] = "KIMD_GHASH",
+       [82] = "KLMD_SHA_1",
+       [83] = "KLMD_SHA_256",
+       [84] = "KLMD_SHA_512",
+       [85] = "KLMD_SHA3_224",
+       [86] = "KLMD_SHA3_256",
+       [87] = "KLMD_SHA3_384",
+       [88] = "KLMD_SHA3_512",
+       [89] = "KLMD_SHAKE_128",
+       [90] = "KLMD_SHAKE_256",
+       [91] = "KMAC_DEA",
+       [92] = "KMAC_TDEA_128",
+       [93] = "KMAC_TDEA_192",
+       [94] = "KMAC_ENCRYPTED_DEA",
+       [95] = "KMAC_ENCRYPTED_TDEA_128",
+       [96] = "KMAC_ENCRYPTED_TDEA_192",
+       [97] = "KMAC_AES_128",
+       [98] = "KMAC_AES_192",
+       [99] = "KMAC_AES_256",
+       [100] = "KMAC_ENCRYPTED_AES_128",
+       [101] = "KMAC_ENCRYPTED_AES_192",
+       [102] = "KMAC_ENCRYPTED_AES_256",
+       [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+       [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+       [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+       [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+       [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+       [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+       [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+       [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+       [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+       [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+       [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+       [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+       [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+       [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+       [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+       [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+       [119] = "PCC_SCALAR_MULTIPLY_P256",
+       [120] = "PCC_SCALAR_MULTIPLY_P384",
+       [121] = "PCC_SCALAR_MULTIPLY_P521",
+       [122] = "PCC_SCALAR_MULTIPLY_ED25519",
+       [123] = "PCC_SCALAR_MULTIPLY_ED448",
+       [124] = "PCC_SCALAR_MULTIPLY_X25519",
+       [125] = "PCC_SCALAR_MULTIPLY_X448",
+       [126] = "PRNO_SHA_512_DRNG",
+       [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+       [128] = "PRNO_TRNG",
+       [129] = "KDSA_ECDSA_VERIFY_P256",
+       [130] = "KDSA_ECDSA_VERIFY_P384",
+       [131] = "KDSA_ECDSA_VERIFY_P521",
+       [132] = "KDSA_ECDSA_SIGN_P256",
+       [133] = "KDSA_ECDSA_SIGN_P384",
+       [134] = "KDSA_ECDSA_SIGN_P521",
+       [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+       [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+       [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+       [138] = "KDSA_EDDSA_VERIFY_ED25519",
+       [139] = "KDSA_EDDSA_VERIFY_ED448",
+       [140] = "KDSA_EDDSA_SIGN_ED25519",
+       [141] = "KDSA_EDDSA_SIGN_ED448",
+       [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+       [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+       [144] = "PCKMO_ENCRYPT_DEA_KEY",
+       [145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+       [146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+       [147] = "PCKMO_ENCRYPT_AES_128_KEY",
+       [148] = "PCKMO_ENCRYPT_AES_192_KEY",
+       [149] = "PCKMO_ENCRYPT_AES_256_KEY",
+       [150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+       [151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+       [152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+       [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+       [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+       [155] = "IBM_RESERVED_155",
+       [156] = "IBM_RESERVED_156",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+       struct perf_pmu_events_attr *pa;
+       int i;
+
+       for (i = 0; i < num; i++) {
+               struct device_attribute *dap;
+
+               dap = container_of(attrs[i], struct device_attribute, attr);
+               pa = container_of(dap, struct perf_pmu_events_attr, attr);
+               kfree(pa);
+       }
+       kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+       struct perf_pmu_events_attr *pa;
+
+       pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+       if (!pa)
+               return -ENOMEM;
+
+       sysfs_attr_init(&pa->attr.attr);
+       pa->id = PAI_CRYPTO_BASE + num;
+       pa->attr.attr.name = paicrypt_ctrnames[num];
+       pa->attr.attr.mode = 0444;
+       pa->attr.show = cpumf_events_sysfs_show;
+       pa->attr.store = NULL;
+       attrs[num] = &pa->attr.attr;
+       return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+       struct attribute **attrs;
+       int ret, i;
+
+       attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
+                             GFP_KERNEL);
+       if (!attrs)
+               return -ENOMEM;
+       for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+               ret = attr_event_init_one(attrs, i);
+               if (ret) {
+                       attr_event_free(attrs, i - 1);
+                       return ret;
+               }
+       }
+       attrs[i] = NULL;
+       paicrypt_events_group.attrs = attrs;
+       return 0;
+}
+
+static int __init paicrypt_init(void)
+{
+       struct qpaci_info_block ib;
+       int rc;
+
+       if (!test_facility(196))
+               return 0;
+
+       qpaci(&ib);
+       paicrypt_cnt = ib.num_cc;
+       if (paicrypt_cnt == 0)
+               return 0;
+       if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
+               paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+
+       rc = attr_event_init();         /* Export known PAI crypto events */
+       if (rc) {
+               pr_err("Creation of PMU pai_crypto /sysfs failed\n");
+               return rc;
+       }
+
+       /* Setup s390dbf facility */
+       cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+       if (!cfm_dbg) {
+               pr_err("Registration of s390dbf pai_crypto failed\n");
+               return -ENOMEM;
+       }
+       debug_register_view(cfm_dbg, &debug_sprintf_view);
+
+       rc = perf_pmu_register(&paicrypt, "pai_crypto", -1);
+       if (rc) {
+               pr_err("Registering the pai_crypto PMU failed with rc=%i\n",
+                      rc);
+               debug_unregister_view(cfm_dbg, &debug_sprintf_view);
+               debug_unregister(cfm_dbg);
+               return rc;
+       }
+       return 0;
+}
+
+device_initcall(paicrypt_init);
index 9438368c36329ae3ca065a8e6c12fc3412b89e79..a9a1a6f45375c0c3b13df07e30a6c73a7aefe553 100644 (file)
@@ -14,6 +14,7 @@
  * moves the new kernel to its destination...
  * %r2 = pointer to first kimage_entry_t
  * %r3 = start address - where to jump to after the job is done...
+ * %r4 = subcode
  *
  * %r5 will be used as temp. storage
  * %r6 holds the destination address
@@ -56,7 +57,7 @@ ENTRY(relocate_kernel)
                jo      0b
                j       .base
        .done:
-               sgr     %r0,%r0         # clear register r0
+               lgr     %r0,%r4         # subcode
                cghi    %r3,0
                je      .diag
                la      %r4,load_psw-.base(%r13)        # load psw-address into the register
index d860ac3009197eb3e975bfcd32efc6b5346c0614..8d91eccc096325b47cf21f593da2826ccc420863 100644 (file)
@@ -494,7 +494,7 @@ static void __init setup_lowcore_dat_off(void)
        lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
        lc->preempt_count = PREEMPT_DISABLED;
 
-       set_prefix((u32)(unsigned long) lc);
+       set_prefix(__pa(lc));
        lowcore_ptr[0] = lc;
 }
 
index 326cb8f75f58ef21cc73ba7c244103c8e13e509a..6b7b6d5e3632a27b53ebd768b273ada03f24be44 100644 (file)
@@ -364,7 +364,7 @@ static inline int check_sync_clock(void)
  * Apply clock delta to the global data structures.
  * This is called once on the CPU that performed the clock sync.
  */
-static void clock_sync_global(unsigned long delta)
+static void clock_sync_global(long delta)
 {
        unsigned long now, adj;
        struct ptff_qto qto;
@@ -400,7 +400,7 @@ static void clock_sync_global(unsigned long delta)
  * Apply clock delta to the per-CPU data structures of this CPU.
  * This is called for each online CPU after the call to clock_sync_global.
  */
-static void clock_sync_local(unsigned long delta)
+static void clock_sync_local(long delta)
 {
        /* Add the delta to the clock comparator. */
        if (S390_lowcore.clock_comparator != clock_comparator_max) {
@@ -424,7 +424,7 @@ static void __init time_init_wq(void)
 struct clock_sync_data {
        atomic_t cpus;
        int in_sync;
-       unsigned long clock_delta;
+       long clock_delta;
 };
 
 /*
@@ -544,7 +544,7 @@ static int stpinfo_valid(void)
 static int stp_sync_clock(void *data)
 {
        struct clock_sync_data *sync = data;
-       u64 clock_delta, flags;
+       long clock_delta, flags;
        static int first;
        int rc;
 
@@ -554,9 +554,7 @@ static int stp_sync_clock(void *data)
                while (atomic_read(&sync->cpus) != 0)
                        cpu_relax();
                rc = 0;
-               if (stp_info.todoff[0] || stp_info.todoff[1] ||
-                   stp_info.todoff[2] || stp_info.todoff[3] ||
-                   stp_info.tmd != 2) {
+               if (stp_info.todoff || stp_info.tmd != 2) {
                        flags = vdso_update_begin();
                        rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
                                        &clock_delta);
index 99694260cac97bc618bd7cce2c2672fdf9053604..5075cde77b29202977f7aad304267f49b6aea33e 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/time_namespace.h>
+#include <linux/random.h>
 #include <vdso/datapage.h>
 #include <asm/vdso.h>
 
@@ -160,10 +161,9 @@ int vdso_getcpu_init(void)
 }
 early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
 
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
 {
-       unsigned long vdso_text_len, vdso_mapping_len;
-       unsigned long vvar_start, vdso_text_start;
+       unsigned long vvar_start, vdso_text_start, vdso_text_len;
        struct vm_special_mapping *vdso_mapping;
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
@@ -180,8 +180,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
                vdso_text_len = vdso64_end - vdso64_start;
                vdso_mapping = &vdso64_mapping;
        }
-       vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
-       vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+       vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
        rc = vvar_start;
        if (IS_ERR_VALUE(vvar_start))
                goto out;
@@ -210,6 +209,52 @@ out:
        return rc;
 }
 
+static unsigned long vdso_addr(unsigned long start, unsigned long len)
+{
+       unsigned long addr, end, offset;
+
+       /*
+        * Round up the start address. It can start out unaligned as a result
+        * of stack start randomization.
+        */
+       start = PAGE_ALIGN(start);
+
+       /* Round the lowest possible end address up to a PMD boundary. */
+       end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+       if (end >= VDSO_BASE)
+               end = VDSO_BASE;
+       end -= len;
+
+       if (end > start) {
+               offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+               addr = start + (offset << PAGE_SHIFT);
+       } else {
+               addr = start;
+       }
+       return addr;
+}
+
+unsigned long vdso_size(void)
+{
+       unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+
+       if (is_compat_task())
+               size += vdso32_end - vdso32_start;
+       else
+               size += vdso64_end - vdso64_start;
+       return PAGE_ALIGN(size);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+       unsigned long addr = VDSO_BASE;
+       unsigned long size = vdso_size();
+
+       if (current->flags & PF_RANDOMIZE)
+               addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
+       return map_vdso(addr, size);
+}
+
 static struct page ** __init vdso_setup_pages(void *start, void *end)
 {
        int pages = (end - start) >> PAGE_SHIFT;
index 5beb7a4a11b3c6e8a6692abfd5ecf26d846ba47e..83bb5cf97282cf772b05e07c365df4bf988a3ef5 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/kvm.h>
 #include <linux/gfp.h>
 #include <linux/errno.h>
-#include <linux/compat.h>
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
 
index 5e7ea8b111e892ec24d82776a4abdae9e091bbee..04d4c6cf898ef3b61ef889a06196dc91a5c6f1b8 100644 (file)
@@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
        int owner;
 
        asm_inline volatile(
-               ALTERNATIVE("", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
+               ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
                "       l       %0,%1\n"
                : "=d" (owner) : "Q" (*lock) : "memory");
        return owner;
@@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
        int expected = old;
 
        asm_inline volatile(
-               ALTERNATIVE("", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
+               ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
                "       cs      %0,%3,%1\n"
                : "=d" (old), "=Q" (*lock)
                : "0" (old), "d" (new), "Q" (*lock)
index e54f928503c5cbc4b35db4b5863389cad5264cf3..d545f5c39f7e41faaec351596e529e426f6370e3 100644 (file)
@@ -58,9 +58,9 @@ static inline unsigned long mmap_base(unsigned long rnd,
 
        /*
         * Top of mmap area (just below the process stack).
-        * Leave at least a ~32 MB hole.
+        * Leave at least a ~128 MB hole.
         */
-       gap_min = 32 * 1024 * 1024UL;
+       gap_min = SZ_128M;
        gap_max = (STACK_TOP / 6) * 5;
 
        if (gap < gap_min)
index e563cb65c0c4c8d3cf76c36a4952291e2eab5234..bc980fd313d5137a44f083b1253cd14bc189edc6 100644 (file)
@@ -799,7 +799,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
        struct zpci_dev *zdev;
        int rc;
 
-       zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+       zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
        zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
        if (!zdev)
                return ERR_PTR(-ENOMEM);
index 1057d7af4a551b7498a0a37f3856faa1ac52cd99..375e0a5120bc1645cafa589b6bb7d178f76223a2 100644 (file)
@@ -30,7 +30,7 @@ bool zpci_unique_uid;
 void update_uid_checking(bool new)
 {
        if (zpci_unique_uid != new)
-               zpci_dbg(1, "uid checking:%d\n", new);
+               zpci_dbg(3, "uid checking:%d\n", new);
 
        zpci_unique_uid = new;
 }
index 3408c0df3ebf11fa54c289cab70703ed272fb99d..ca6bd98eec136e9a920a250729108730ec652ef5 100644 (file)
@@ -196,7 +196,7 @@ int __init zpci_debug_init(void)
        if (!pci_debug_err_id)
                return -EINVAL;
        debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
-       debug_set_level(pci_debug_err_id, 6);
+       debug_set_level(pci_debug_err_id, 3);
 
        debugfs_root = debugfs_create_dir("pci", NULL);
        return 0;
index ea9db5cea64e30a8b7133b57b059f9375608a82e..b9324ca2eb94034200efa9e9f18371873bef7c28 100644 (file)
@@ -321,9 +321,6 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 
        zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
                 ccdf->fid, ccdf->fh, ccdf->pec);
-       zpci_err("avail CCDF:\n");
-       zpci_err_hex(ccdf, sizeof(*ccdf));
-
        switch (ccdf->pec) {
        case 0x0301: /* Reserved|Standby -> Configured */
                if (!zdev) {
index 1710d006ee93e9506b0ae37a97b30658f0c12791..1a822b7799f8c5ad167810e145794947b1c2b766 100644 (file)
 
 #define ZPCI_INSN_BUSY_DELAY   1       /* 1 microsecond */
 
-static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+struct zpci_err_insn_data {
+       u8 insn;
+       u8 cc;
+       u8 status;
+       union {
+               struct {
+                       u64 req;
+                       u64 offset;
+               };
+               struct {
+                       u64 addr;
+                       u64 len;
+               };
+       };
+} __packed;
+
+static inline void zpci_err_insn_req(int lvl, u8 insn, u8 cc, u8 status,
+                                    u64 req, u64 offset)
 {
-       struct {
-               u64 req;
-               u64 offset;
-               u8 cc;
-               u8 status;
-       } __packed data = {req, offset, cc, status};
-
-       zpci_err_hex(&data, sizeof(data));
+       struct zpci_err_insn_data data = {
+               .insn = insn, .cc = cc, .status = status,
+               .req = req, .offset = offset};
+
+       zpci_err_hex_level(lvl, &data, sizeof(data));
+}
+
+static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
+                                     u64 addr, u64 len)
+{
+       struct zpci_err_insn_data data = {
+               .insn = insn, .cc = cc, .status = status,
+               .addr = addr, .len = len};
+
+       zpci_err_hex_level(lvl, &data, sizeof(data));
 }
 
 /* Modify PCI Function Controls */
@@ -47,16 +71,24 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 
 u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
 {
+       bool retried = false;
        u8 cc;
 
        do {
                cc = __mpcifc(req, fib, status);
-               if (cc == 2)
+               if (cc == 2) {
                        msleep(ZPCI_INSN_BUSY_DELAY);
+                       if (!retried) {
+                               zpci_err_insn_req(1, 'M', cc, *status, req, 0);
+                               retried = true;
+                       }
+               }
        } while (cc == 2);
 
        if (cc)
-               zpci_err_insn(cc, *status, req, 0);
+               zpci_err_insn_req(0, 'M', cc, *status, req, 0);
+       else if (retried)
+               zpci_err_insn_req(1, 'M', cc, *status, req, 0);
 
        return cc;
 }
@@ -80,16 +112,24 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
 
 int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 {
+       bool retried = false;
        u8 cc, status;
 
        do {
                cc = __rpcit(fn, addr, range, &status);
-               if (cc == 2)
+               if (cc == 2) {
                        udelay(ZPCI_INSN_BUSY_DELAY);
+                       if (!retried) {
+                               zpci_err_insn_addr(1, 'R', cc, status, addr, range);
+                               retried = true;
+                       }
+               }
        } while (cc == 2);
 
        if (cc)
-               zpci_err_insn(cc, status, addr, range);
+               zpci_err_insn_addr(0, 'R', cc, status, addr, range);
+       else if (retried)
+               zpci_err_insn_addr(1, 'R', cc, status, addr, range);
 
        if (cc == 1 && (status == 4 || status == 16))
                return -ENOMEM;
@@ -144,17 +184,25 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 
 int __zpci_load(u64 *data, u64 req, u64 offset)
 {
+       bool retried = false;
        u8 status;
        int cc;
 
        do {
                cc = __pcilg(data, req, offset, &status);
-               if (cc == 2)
+               if (cc == 2) {
                        udelay(ZPCI_INSN_BUSY_DELAY);
+                       if (!retried) {
+                               zpci_err_insn_req(1, 'l', cc, status, req, offset);
+                               retried = true;
+                       }
+               }
        } while (cc == 2);
 
        if (cc)
-               zpci_err_insn(cc, status, req, offset);
+               zpci_err_insn_req(0, 'l', cc, status, req, offset);
+       else if (retried)
+               zpci_err_insn_req(1, 'l', cc, status, req, offset);
 
        return (cc > 0) ? -EIO : cc;
 }
@@ -198,7 +246,7 @@ int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
 
        cc = __pcilg_mio(data, (__force u64) addr, len, &status);
        if (cc)
-               zpci_err_insn(cc, status, 0, (__force u64) addr);
+               zpci_err_insn_addr(0, 'L', cc, status, (__force u64) addr, len);
 
        return (cc > 0) ? -EIO : cc;
 }
@@ -225,17 +273,25 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
 
 int __zpci_store(u64 data, u64 req, u64 offset)
 {
+       bool retried = false;
        u8 status;
        int cc;
 
        do {
                cc = __pcistg(data, req, offset, &status);
-               if (cc == 2)
+               if (cc == 2) {
                        udelay(ZPCI_INSN_BUSY_DELAY);
+                       if (!retried) {
+                               zpci_err_insn_req(1, 's', cc, status, req, offset);
+                               retried = true;
+                       }
+               }
        } while (cc == 2);
 
        if (cc)
-               zpci_err_insn(cc, status, req, offset);
+               zpci_err_insn_req(0, 's', cc, status, req, offset);
+       else if (retried)
+               zpci_err_insn_req(1, 's', cc, status, req, offset);
 
        return (cc > 0) ? -EIO : cc;
 }
@@ -278,7 +334,7 @@ int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
 
        cc = __pcistg_mio(data, (__force u64) addr, len, &status);
        if (cc)
-               zpci_err_insn(cc, status, 0, (__force u64) addr);
+               zpci_err_insn_addr(0, 'S', cc, status, (__force u64) addr, len);
 
        return (cc > 0) ? -EIO : cc;
 }
@@ -304,17 +360,25 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
 
 int __zpci_store_block(const u64 *data, u64 req, u64 offset)
 {
+       bool retried = false;
        u8 status;
        int cc;
 
        do {
                cc = __pcistb(data, req, offset, &status);
-               if (cc == 2)
+               if (cc == 2) {
                        udelay(ZPCI_INSN_BUSY_DELAY);
+                       if (!retried) {
+                               zpci_err_insn_req(0, 'b', cc, status, req, offset);
+                               retried = true;
+                       }
+               }
        } while (cc == 2);
 
        if (cc)
-               zpci_err_insn(cc, status, req, offset);
+               zpci_err_insn_req(0, 'b', cc, status, req, offset);
+       else if (retried)
+               zpci_err_insn_req(1, 'b', cc, status, req, offset);
 
        return (cc > 0) ? -EIO : cc;
 }
@@ -358,7 +422,7 @@ int zpci_write_block(volatile void __iomem *dst,
 
        cc = __pcistb_mio(src, (__force u64) dst, len, &status);
        if (cc)
-               zpci_err_insn(cc, status, 0, (__force u64) dst);
+               zpci_err_insn_addr(0, 'B', cc, status, (__force u64) dst, len);
 
        return (cc > 0) ? -EIO : cc;
 }
index 3d1c31e0cf3dd7e37dc382350a843a8c78f5657b..6f835124ee82ab28d07a1971bf7e4fa1b1e4c1c5 100644 (file)
 .endm
 
 .macro MEMSWAP dst,src,buf,len
-10:    cghi    \len,bufsz
+10:    larl    %r0,purgatory_end
+       larl    %r1,stack
+       slgr    %r0,%r1
+       cgr     \len,%r0
        jh      11f
        lgr     %r4,\len
        j       12f
-11:    lghi    %r4,bufsz
+11:    lgr     %r4,%r0
 
 12:    MEMCPY  \buf,\dst,%r4
        MEMCPY  \dst,\src,%r4
@@ -135,12 +138,18 @@ ENTRY(purgatory_start)
 
 .start_crash_kernel:
        /* Location of purgatory_start in crash memory */
+       larl    %r0,.base_crash
+       larl    %r1,purgatory_start
+       slgr    %r0,%r1
        lgr     %r8,%r13
-       aghi    %r8,-(.base_crash-purgatory_start)
+       sgr     %r8,%r0
 
        /* Destination for this code i.e. end of memory to be swapped. */
+       larl    %r0,purgatory_end
+       larl    %r1,purgatory_start
+       slgr    %r0,%r1
        lg      %r9,crash_size-.base_crash(%r13)
-       aghi    %r9,-(purgatory_end-purgatory_start)
+       sgr     %r9,%r0
 
        /* Destination in crash memory, i.e. same as r9 but in crash memory. */
        lg      %r10,crash_start-.base_crash(%r13)
@@ -149,15 +158,19 @@ ENTRY(purgatory_start)
        /* Buffer location (in crash memory) and size. As the purgatory is
         * behind the point of no return it can re-use the stack as buffer.
         */
-       lghi    %r11,bufsz
+       larl    %r11,purgatory_end
        larl    %r12,stack
+       slgr    %r11,%r12
 
        MEMCPY  %r12,%r9,%r11   /* dst  -> (crash) buf */
        MEMCPY  %r9,%r8,%r11    /* self -> dst */
 
        /* Jump to new location. */
        lgr     %r7,%r9
-       aghi    %r7,.jump_to_dst-purgatory_start
+       larl    %r0,.jump_to_dst
+       larl    %r1,purgatory_start
+       slgr    %r0,%r1
+       agr     %r7,%r0
        br      %r7
 
 .jump_to_dst:
@@ -169,7 +182,10 @@ ENTRY(purgatory_start)
 
        /* Load new buffer location after jump */
        larl    %r7,stack
-       aghi    %r10,stack-purgatory_start
+       lgr     %r0,%r7
+       larl    %r1,purgatory_start
+       slgr    %r0,%r1
+       agr     %r10,%r0
        MEMCPY  %r10,%r7,%r11   /* (new) buf -> (crash) buf */
 
        /* Now the code is set up to run from its designated location. Start
index c32b4c6229d3f849e6f6fa971e1a41ad7e6916e1..f39c8196efdfd166371464bbd0865d1737155e51 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mmcif.h>
+#include <linux/platform_data/sh_mmcif.h>
 #include <linux/sh_eth.h>
 #include <linux/sh_intc.h>
 #include <linux/usb/renesas_usbhs.h>
index 4c9522dd351f4b292815b16e0db3c97d46e531d1..674da7ebd8b7f5005325c1fe48bb5c0daba2b5a1 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/memblock.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mmcif.h>
+#include <linux/platform_data/sh_mmcif.h>
 #include <linux/mtd/physmap.h>
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
index 6595b6b45bf152ae1dc3b57ee22f53b3b8cd2013..d30123d859e054be903802d9b105c66df70d66c0 100644 (file)
@@ -8,7 +8,7 @@
  * for more details.
  */
 
-#include <linux/mmc/sh_mmcif.h>
+#include <linux/platform_data/sh_mmcif.h>
 #include <mach/romimage.h>
 
 #define MMCIF_BASE      (void __iomem *)0xa4ca0000
index e41526120be14baed6b122d1cccadc2c8f85b5b2..619c18699459769b8eef382c5775089a0480a557 100644 (file)
@@ -25,7 +25,6 @@ CONFIG_CMDLINE_OVERWRITE=y
 CONFIG_CMDLINE="console=ttySC0,115200 earlyprintk=serial ignore_loglevel"
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 # CONFIG_STANDALONE is not set
index 6af08fa1ddf8a156535cbf7ad7f0beef7227919f..5a54e2b883f0a33577b1a1b5058d6feb784fd141 100644 (file)
@@ -30,7 +30,6 @@ CONFIG_CMDLINE_OVERWRITE=y
 CONFIG_CMDLINE="console=ttySC0,115200 earlyprintk=serial ignore_loglevel"
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
index 601d062250d11cbebdc10e2f93efc241e890306a..122216123e6399c976ac4c5600e6aa34df04e298 100644 (file)
@@ -40,7 +40,6 @@ CONFIG_CMDLINE_OVERWRITE=y
 CONFIG_CMDLINE="console=ttySC3,115200 ignore_loglevel earlyprintk=serial"
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_BINFMT_SHARED_FLAT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
index 542915b46209772cea964453b828dc7bfa9efa64..f86326a6f89e0df6e0219d33e5112495fcc7bf01 100644 (file)
@@ -9,8 +9,6 @@
 
 #define CLOCK_TICK_RATE        1193180 /* Underlying HZ */
 
-/* XXX Maybe do something better at some point... -DaveM */
-typedef unsigned long cycles_t;
-#define get_cycles()   (0)
+#include <asm-generic/timex.h>
 
 #endif
index f9fe502b81c65c0818bdd3749dea6434ac1c1ce3..dad38960d1a8ac92aba0546d8f323945407048a7 100644 (file)
@@ -779,5 +779,6 @@ static_assert(offsetof(compat_siginfo_t, si_upper)  == 0x18);
 static_assert(offsetof(compat_siginfo_t, si_pkey)      == 0x14);
 static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
 static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_flags)        == 0x18);
 static_assert(offsetof(compat_siginfo_t, si_band)      == 0x0c);
 static_assert(offsetof(compat_siginfo_t, si_fd)                == 0x10);
index 8b9fc76cd3e026c1e2f76256f2b69d820c130399..570e43e6fda5cabb72f225fb392dbf91e61945f2 100644 (file)
@@ -590,5 +590,6 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28);
 static_assert(offsetof(siginfo_t, si_pkey)     == 0x20);
 static_assert(offsetof(siginfo_t, si_perf_data)        == 0x18);
 static_assert(offsetof(siginfo_t, si_perf_type)        == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
 static_assert(offsetof(siginfo_t, si_band)     == 0x10);
 static_assert(offsetof(siginfo_t, si_fd)       == 0x14);
index c5e1545bc5cf9e7b1642723abec263fc953be046..77d7b9032158c7c34d23a8c937d2f9dd48f3ae17 100644 (file)
@@ -58,7 +58,7 @@ CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64
 
 SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7
 
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -88,6 +88,7 @@ $(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32
 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic
index b03269faef714764748b16f4645e9496cb555633..c4344b67628dd677a4bd952d2aa8e783ccc56b39 100644 (file)
@@ -483,7 +483,6 @@ static void ubd_handler(void)
                        if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
                                blk_queue_max_discard_sectors(io_req->req->q, 0);
                                blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
-                               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
                        }
                        blk_mq_end_request(io_req->req, io_req->error);
                        kfree(io_req);
@@ -800,10 +799,8 @@ static int ubd_open_dev(struct ubd *ubd_dev)
        }
        if (ubd_dev->no_trim == 0) {
                ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
-               ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
                blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
                blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
        }
        blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
        return 0;
index e392a9a5bc9bdacb5e247d81030b538e424243c3..9f27176adb26dc7392a95506dd0e8fa1ca56d69a 100644 (file)
@@ -2,13 +2,8 @@
 #ifndef __UM_TIMEX_H
 #define __UM_TIMEX_H
 
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
-       return 0;
-}
-
 #define CLOCK_TICK_RATE (HZ)
 
+#include <asm-generic/timex.h>
+
 #endif
index 4bed3abf444d138fbe7125d38df8fe2b6c5ddda6..762a0b6ab8b6882358b14a003bcdfa8e1f6ddb1c 100644 (file)
@@ -188,7 +188,7 @@ config X86
        select HAVE_CONTEXT_TRACKING            if X86_64
        select HAVE_CONTEXT_TRACKING_OFFSTACK   if HAVE_CONTEXT_TRACKING
        select HAVE_C_RECORDMCOUNT
-       select HAVE_OBJTOOL_MCOUNT              if STACK_VALIDATION
+       select HAVE_OBJTOOL_MCOUNT              if HAVE_OBJTOOL
        select HAVE_BUILDTIME_MCOUNT_SORT
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS
@@ -212,6 +212,7 @@ config X86
        select HAVE_IOREMAP_PROT
        select HAVE_IRQ_EXIT_ON_IRQ_STACK       if X86_64
        select HAVE_IRQ_TIME_ACCOUNTING
+       select HAVE_JUMP_LABEL_HACK             if HAVE_OBJTOOL
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZ4
@@ -230,7 +231,10 @@ config X86
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_MOVE_PMD
        select HAVE_MOVE_PUD
+       select HAVE_NOINSTR_HACK                if HAVE_OBJTOOL
        select HAVE_NMI
+       select HAVE_NOINSTR_VALIDATION          if HAVE_OBJTOOL
+       select HAVE_OBJTOOL                     if X86_64
        select HAVE_OPTPROBES
        select HAVE_PCSPKR_PLATFORM
        select HAVE_PERF_EVENTS
@@ -239,17 +243,17 @@ config X86
        select HAVE_PCI
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
-       select MMU_GATHER_RCU_TABLE_FREE                if PARAVIRT
+       select MMU_GATHER_RCU_TABLE_FREE        if PARAVIRT
        select HAVE_POSIX_CPU_TIMERS_TASK_WORK
        select HAVE_REGS_AND_STACK_ACCESS_API
-       select HAVE_RELIABLE_STACKTRACE         if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
+       select HAVE_RELIABLE_STACKTRACE         if UNWINDER_ORC || STACK_VALIDATION
        select HAVE_FUNCTION_ARG_ACCESS_API
        select HAVE_SETUP_PER_CPU_AREA
        select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_STACKPROTECTOR              if CC_HAS_SANE_STACKPROTECTOR
-       select HAVE_STACK_VALIDATION            if X86_64
+       select HAVE_STACK_VALIDATION            if HAVE_OBJTOOL
        select HAVE_STATIC_CALL
-       select HAVE_STATIC_CALL_INLINE          if HAVE_STACK_VALIDATION
+       select HAVE_STATIC_CALL_INLINE          if HAVE_OBJTOOL
        select HAVE_PREEMPT_DYNAMIC_CALL
        select HAVE_RSEQ
        select HAVE_SYSCALL_TRACEPOINTS
@@ -268,7 +272,6 @@ config X86
        select RTC_MC146818_LIB
        select SPARSE_IRQ
        select SRCU
-       select STACK_VALIDATION                 if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
        select TRACE_IRQFLAGS_SUPPORT
@@ -459,6 +462,7 @@ config GOLDFISH
 
 config RETPOLINE
        bool "Avoid speculative indirect branches in kernel"
+       select OBJTOOL if HAVE_OBJTOOL
        default y
        help
          Compile kernel with the retpoline compiler options to guard against
@@ -472,6 +476,7 @@ config CC_HAS_SLS
 config SLS
        bool "Mitigate Straight-Line-Speculation"
        depends on CC_HAS_SLS && X86_64
+       select OBJTOOL if HAVE_OBJTOOL
        default n
        help
          Compile the kernel with straight-line-speculation options to guard
@@ -878,6 +883,21 @@ config ACRN_GUEST
          IOT with small footprint and real-time features. More details can be
          found in https://projectacrn.org/.
 
+config INTEL_TDX_GUEST
+       bool "Intel TDX (Trust Domain Extensions) - Guest Support"
+       depends on X86_64 && CPU_SUP_INTEL
+       depends on X86_X2APIC
+       select ARCH_HAS_CC_PLATFORM
+       select X86_MEM_ENCRYPT
+       select X86_MCE
+       help
+         Support running as a guest under Intel TDX.  Without this support,
+         the guest kernel can not boot or run under TDX.
+         TDX includes memory encryption and integrity capabilities
+         which protect the confidentiality and integrity of guest
+         memory contents and CPU state. TDX guests are protected from
+         some attacks from the VMM.
+
 endif #HYPERVISOR_GUEST
 
 source "arch/x86/Kconfig.cpu"
@@ -1313,7 +1333,7 @@ config MICROCODE
 
 config MICROCODE_INTEL
        bool "Intel microcode loading support"
-       depends on MICROCODE
+       depends on CPU_SUP_INTEL && MICROCODE
        default MICROCODE
        help
          This options enables microcode patch loading support for Intel
@@ -1325,7 +1345,7 @@ config MICROCODE_INTEL
 
 config MICROCODE_AMD
        bool "AMD microcode loading support"
-       depends on MICROCODE
+       depends on CPU_SUP_AMD && MICROCODE
        help
          If you select this option, microcode patch loading support for AMD
          processors will be enabled.
@@ -1816,17 +1836,6 @@ config ARCH_RANDOM
          If supported, this is a high bandwidth, cryptographically
          secure hardware random number generator.
 
-config X86_SMAP
-       def_bool y
-       prompt "Supervisor Mode Access Prevention" if EXPERT
-       help
-         Supervisor Mode Access Prevention (SMAP) is a security
-         feature in newer Intel processors.  There is a small
-         performance cost if this enabled and turned on; there is
-         also a small increase in the kernel size if this is enabled.
-
-         If unsure, say Y.
-
 config X86_UMIP
        def_bool y
        prompt "User Mode Instruction Prevention" if EXPERT
@@ -1855,9 +1864,10 @@ config CC_HAS_IBT
 config X86_KERNEL_IBT
        prompt "Indirect Branch Tracking"
        bool
-       depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION
+       depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
        # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
        depends on !LD_IS_LLD || LLD_VERSION >= 140000
+       select OBJTOOL
        help
          Build the kernel with support for Indirect Branch Tracking, a
          hardware support course-grain forward-edge Control Flow Integrity
@@ -2326,7 +2336,9 @@ choice
          it can be used to assist security vulnerability exploitation.
 
          This setting can be changed at boot time via the kernel command
-         line parameter vsyscall=[emulate|xonly|none].
+         line parameter vsyscall=[emulate|xonly|none].  Emulate mode
+         is deprecated and can only be enabled using the kernel command
+         line.
 
          On a system with recent enough glibc (2.14 or newer) and no
          static binaries, you can say None without a performance penalty
@@ -2334,20 +2346,6 @@ choice
 
          If unsure, select "Emulate execution only".
 
-       config LEGACY_VSYSCALL_EMULATE
-               bool "Full emulation"
-               help
-                 The kernel traps and emulates calls into the fixed vsyscall
-                 address mapping. This makes the mapping non-executable, but
-                 it still contains readable known contents, which could be
-                 used in certain rare security vulnerability exploits. This
-                 configuration is recommended when using legacy userspace
-                 that still uses vsyscalls along with legacy binary
-                 instrumentation tools that require code to be readable.
-
-                 An example of this type of legacy userspace is running
-                 Pin on an old binary that still uses vsyscalls.
-
        config LEGACY_VSYSCALL_XONLY
                bool "Emulate execution only"
                help
@@ -2838,13 +2836,6 @@ config IA32_EMULATION
          64-bit kernel. You should likely turn this on, unless you're
          100% sure that you don't have any 32-bit programs left.
 
-config IA32_AOUT
-       tristate "IA32 a.out support"
-       depends on IA32_EMULATION
-       depends on BROKEN
-       help
-         Support old a.out binaries in the 32bit emulation.
-
 config X86_X32_ABI
        bool "x32 ABI for 64-bit mode"
        depends on X86_64
index d3a6f74a94bdf29d15b9ef1db95927fae29d668c..d872a7522e55fd4b65ce2705b5ce8cecdc3b858b 100644 (file)
@@ -237,7 +237,7 @@ choice
 config UNWINDER_ORC
        bool "ORC unwinder"
        depends on X86_64
-       select STACK_VALIDATION
+       select OBJTOOL
        help
          This option enables the ORC (Oops Rewind Capability) unwinder for
          unwinding kernel stack traces.  It uses a custom data format which is
index 63d50f65b8283466e0275c3e9e606f9b1d1a55e8..1abd7cc9d6cd98eee795f59ef61c1dd7e4166e7b 100644 (file)
@@ -313,5 +313,6 @@ define archhelp
   echo  ''
   echo  '  kvm_guest.config    - Enable Kconfig items for running this kernel as a KVM guest'
   echo  '  xen.config          - Enable Kconfig items for running this kernel as a Xen guest'
+  echo  '  x86_debug.config    - Enable tip tree debugging options for testing'
 
 endef
index 34c9dbb6a47d678e1f984947b2437523eb2d362e..148ba5c5106e1eae9e924b811658d69b38c5a46c 100644 (file)
@@ -26,6 +26,7 @@
 #include "bitops.h"
 #include "ctype.h"
 #include "cpuflags.h"
+#include "io.h"
 
 /* Useful macros */
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
@@ -35,44 +36,10 @@ extern struct boot_params boot_params;
 
 #define cpu_relax()    asm volatile("rep; nop")
 
-/* Basic port I/O */
-static inline void outb(u8 v, u16 port)
-{
-       asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u8 inb(u16 port)
-{
-       u8 v;
-       asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
-       return v;
-}
-
-static inline void outw(u16 v, u16 port)
-{
-       asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u16 inw(u16 port)
-{
-       u16 v;
-       asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
-       return v;
-}
-
-static inline void outl(u32 v, u16 port)
-{
-       asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u32 inl(u16 port)
-{
-       u32 v;
-       asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
-       return v;
-}
-
 static inline void io_delay(void)
 {
        const u16 DELAY_PORT = 0x80;
-       asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
+       outb(0, DELAY_PORT);
 }
 
 /* These functions are used to reference data in other segments. */
@@ -110,66 +77,78 @@ typedef unsigned int addr_t;
 
 static inline u8 rdfs8(addr_t addr)
 {
+       u8 *ptr = (u8 *)absolute_pointer(addr);
        u8 v;
-       asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+       asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
        return v;
 }
 static inline u16 rdfs16(addr_t addr)
 {
+       u16 *ptr = (u16 *)absolute_pointer(addr);
        u16 v;
-       asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+       asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 static inline u32 rdfs32(addr_t addr)
 {
+       u32 *ptr = (u32 *)absolute_pointer(addr);
        u32 v;
-       asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+       asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 
 static inline void wrfs8(u8 v, addr_t addr)
 {
-       asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+       u8 *ptr = (u8 *)absolute_pointer(addr);
+       asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
 }
 static inline void wrfs16(u16 v, addr_t addr)
 {
-       asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+       u16 *ptr = (u16 *)absolute_pointer(addr);
+       asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
 }
 static inline void wrfs32(u32 v, addr_t addr)
 {
-       asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+       u32 *ptr = (u32 *)absolute_pointer(addr);
+       asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
 }
 
 static inline u8 rdgs8(addr_t addr)
 {
+       u8 *ptr = (u8 *)absolute_pointer(addr);
        u8 v;
-       asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+       asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
        return v;
 }
 static inline u16 rdgs16(addr_t addr)
 {
+       u16 *ptr = (u16 *)absolute_pointer(addr);
        u16 v;
-       asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+       asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 static inline u32 rdgs32(addr_t addr)
 {
+       u32 *ptr = (u32 *)absolute_pointer(addr);
        u32 v;
-       asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+       asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 
 static inline void wrgs8(u8 v, addr_t addr)
 {
-       asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+       u8 *ptr = (u8 *)absolute_pointer(addr);
+       asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
 }
 static inline void wrgs16(u16 v, addr_t addr)
 {
-       asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+       u16 *ptr = (u16 *)absolute_pointer(addr);
+       asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
 }
 static inline void wrgs32(u32 v, addr_t addr)
 {
-       asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+       u32 *ptr = (u32 *)absolute_pointer(addr);
+       asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
 }
 
 /* Note: these only return true/false, not a signed return value! */
index 6115274fe10fc54c133d098ee8c9fe1d0ce16bfb..19e1905dcbf6fd59f6cc5ca4d89e8997482b935d 100644 (file)
@@ -101,8 +101,10 @@ ifdef CONFIG_X86_64
 endif
 
 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
 
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
 efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 $(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
index 8bcbcee54aa13703f82bc7e48c02bfdb48351e76..9caf89063e775eb054a857849eb8dd35a0ed9818 100644 (file)
@@ -3,10 +3,9 @@
 #include "misc.h"
 #include "error.h"
 #include "../string.h"
+#include "efi.h"
 
 #include <linux/numa.h>
-#include <linux/efi.h>
-#include <asm/efi.h>
 
 /*
  * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
  */
 struct mem_vector immovable_mem[MAX_NUMNODES*2];
 
-/*
- * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
- * ACPI_TABLE_GUID are found, take the former, which has more features.
- */
 static acpi_physical_address
-__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
-                   bool efi_64)
+__efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
 {
-       acpi_physical_address rsdp_addr = 0;
-
 #ifdef CONFIG_EFI
-       int i;
-
-       /* Get EFI tables from systab. */
-       for (i = 0; i < nr_tables; i++) {
-               acpi_physical_address table;
-               efi_guid_t guid;
-
-               if (efi_64) {
-                       efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
-
-                       guid  = tbl->guid;
-                       table = tbl->table;
-
-                       if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-                               debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-                               return 0;
-                       }
-               } else {
-                       efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
-
-                       guid  = tbl->guid;
-                       table = tbl->table;
-               }
+       unsigned long rsdp_addr;
+       int ret;
 
-               if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-                       rsdp_addr = table;
-               else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-                       return table;
-       }
+       /*
+        * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
+        * ACPI_TABLE_GUID because it has more features.
+        */
+       rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+                                         ACPI_20_TABLE_GUID);
+       if (rsdp_addr)
+               return (acpi_physical_address)rsdp_addr;
+
+       /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
+       rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+                                         ACPI_TABLE_GUID);
+       if (rsdp_addr)
+               return (acpi_physical_address)rsdp_addr;
+
+       debug_putstr("Error getting RSDP address.\n");
 #endif
-       return rsdp_addr;
-}
-
-/* EFI/kexec support is 64-bit only. */
-#ifdef CONFIG_X86_64
-static struct efi_setup_data *get_kexec_setup_data_addr(void)
-{
-       struct setup_data *data;
-       u64 pa_data;
-
-       pa_data = boot_params->hdr.setup_data;
-       while (pa_data) {
-               data = (struct setup_data *)pa_data;
-               if (data->type == SETUP_EFI)
-                       return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
-
-               pa_data = data->next;
-       }
-       return NULL;
-}
-
-static acpi_physical_address kexec_get_rsdp_addr(void)
-{
-       efi_system_table_64_t *systab;
-       struct efi_setup_data *esd;
-       struct efi_info *ei;
-       char *sig;
-
-       esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
-       if (!esd)
-               return 0;
-
-       if (!esd->tables) {
-               debug_putstr("Wrong kexec SETUP_EFI data.\n");
-               return 0;
-       }
-
-       ei = &boot_params->efi_info;
-       sig = (char *)&ei->efi_loader_signature;
-       if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
-               debug_putstr("Wrong kexec EFI loader signature.\n");
-               return 0;
-       }
-
-       /* Get systab from boot params. */
-       systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
-       if (!systab)
-               error("EFI system table not found in kexec boot_params.");
-
-       return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+       return 0;
 }
-#else
-static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
-#endif /* CONFIG_X86_64 */
 
 static acpi_physical_address efi_get_rsdp_addr(void)
 {
 #ifdef CONFIG_EFI
-       unsigned long systab, config_tables;
+       unsigned long cfg_tbl_pa = 0;
+       unsigned int cfg_tbl_len;
+       unsigned long systab_pa;
        unsigned int nr_tables;
-       struct efi_info *ei;
-       bool efi_64;
-       char *sig;
-
-       ei = &boot_params->efi_info;
-       sig = (char *)&ei->efi_loader_signature;
-
-       if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
-               efi_64 = true;
-       } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
-               efi_64 = false;
-       } else {
-               debug_putstr("Wrong EFI loader signature.\n");
-               return 0;
-       }
+       enum efi_type et;
+       int ret;
 
-       /* Get systab from boot params. */
-#ifdef CONFIG_X86_64
-       systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
-#else
-       if (ei->efi_systab_hi || ei->efi_memmap_hi) {
-               debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n");
+       et = efi_get_type(boot_params);
+       if (et == EFI_TYPE_NONE)
                return 0;
-       }
-       systab = ei->efi_systab;
-#endif
-       if (!systab)
-               error("EFI system table not found.");
 
-       /* Handle EFI bitness properly */
-       if (efi_64) {
-               efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab;
+       systab_pa = efi_get_system_table(boot_params);
+       if (!systab_pa)
+               error("EFI support advertised, but unable to locate system table.");
 
-               config_tables   = stbl->tables;
-               nr_tables       = stbl->nr_tables;
-       } else {
-               efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
+       ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
+       if (ret || !cfg_tbl_pa)
+               error("EFI config table not found.");
 
-               config_tables   = stbl->tables;
-               nr_tables       = stbl->nr_tables;
-       }
-
-       if (!config_tables)
-               error("EFI config tables not found.");
-
-       return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+       return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len);
 #else
        return 0;
 #endif
@@ -256,14 +158,6 @@ acpi_physical_address get_rsdp_addr(void)
 
        pa = boot_params->acpi_rsdp_addr;
 
-       /*
-        * Try to get EFI data from setup_data. This can happen when we're a
-        * kexec'ed kernel and kexec(1) has passed all the required EFI info to
-        * us.
-        */
-       if (!pa)
-               pa = kexec_get_rsdp_addr();
-
        if (!pa)
                pa = efi_get_rsdp_addr();
 
index 261e81fb95826d0264c8ad3148f6259cc0c3f51d..70a8d1706d0f1fb46db84bdcb2acf140fc763e04 100644 (file)
@@ -1,5 +1,6 @@
 #include "misc.h"
 
-int early_serial_base;
+/* This might be accessed before .bss is cleared, so use .data instead. */
+int early_serial_base __section(".data");
 
 #include "../early_serial_console.c"
diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c
new file mode 100644 (file)
index 0000000..6edd034
--- /dev/null
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helpers for early access to EFI configuration table.
+ *
+ * Originally derived from arch/x86/boot/compressed/acpi.c
+ */
+
+#include "misc.h"
+
+/**
+ * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment.
+ *
+ * @bp:         pointer to boot_params
+ *
+ * Return: EFI_TYPE_{32,64} for valid EFI environments, EFI_TYPE_NONE otherwise.
+ */
+enum efi_type efi_get_type(struct boot_params *bp)
+{
+       struct efi_info *ei;
+       enum efi_type et;
+       const char *sig;
+
+       ei = &bp->efi_info;
+       sig = (char *)&ei->efi_loader_signature;
+
+       if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+               et = EFI_TYPE_64;
+       } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
+               et = EFI_TYPE_32;
+       } else {
+               debug_putstr("No EFI environment detected.\n");
+               et = EFI_TYPE_NONE;
+       }
+
+#ifndef CONFIG_X86_64
+       /*
+        * Existing callers like acpi.c treat this case as an indicator to
+        * fall-through to non-EFI, rather than an error, so maintain that
+        * functionality here as well.
+        */
+       if (ei->efi_systab_hi || ei->efi_memmap_hi) {
+               debug_putstr("EFI system table is located above 4GB and cannot be accessed.\n");
+               et = EFI_TYPE_NONE;
+       }
+#endif
+
+       return et;
+}
+
+/**
+ * efi_get_system_table - Given a pointer to boot_params, retrieve the physical address
+ *                        of the EFI system table.
+ *
+ * @bp:         pointer to boot_params
+ *
+ * Return: EFI system table address on success. On error, return 0.
+ */
+unsigned long efi_get_system_table(struct boot_params *bp)
+{
+       unsigned long sys_tbl_pa;
+       struct efi_info *ei;
+       enum efi_type et;
+
+       /* Get systab from boot params. */
+       ei = &bp->efi_info;
+#ifdef CONFIG_X86_64
+       sys_tbl_pa = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+#else
+       sys_tbl_pa = ei->efi_systab;
+#endif
+       if (!sys_tbl_pa) {
+               debug_putstr("EFI system table not found.");
+               return 0;
+       }
+
+       return sys_tbl_pa;
+}
+
+/*
+ * EFI config table address changes to virtual address after boot, which may
+ * not be accessible for the kexec'd kernel. To address this, kexec provides
+ * the initial physical address via a struct setup_data entry, which is
+ * checked for here, along with some sanity checks.
+ */
+static struct efi_setup_data *get_kexec_setup_data(struct boot_params *bp,
+                                                  enum efi_type et)
+{
+#ifdef CONFIG_X86_64
+       struct efi_setup_data *esd = NULL;
+       struct setup_data *data;
+       u64 pa_data;
+
+       pa_data = bp->hdr.setup_data;
+       while (pa_data) {
+               data = (struct setup_data *)pa_data;
+               if (data->type == SETUP_EFI) {
+                       esd = (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+                       break;
+               }
+
+               pa_data = data->next;
+       }
+
+       /*
+        * Original ACPI code falls back to attempting normal EFI boot in these
+        * cases, so maintain existing behavior by indicating non-kexec
+        * environment to the caller, but print them for debugging.
+        */
+       if (esd && !esd->tables) {
+               debug_putstr("kexec EFI environment missing valid configuration table.\n");
+               return NULL;
+       }
+
+       return esd;
+#endif
+       return NULL;
+}
+
+/**
+ * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical
+ *                      address of EFI configuration table.
+ *
+ * @bp:                 pointer to boot_params
+ * @cfg_tbl_pa:         location to store physical address of config table
+ * @cfg_tbl_len:        location to store number of config table entries
+ *
+ * Return: 0 on success. On error, return params are left unchanged.
+ */
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+                      unsigned int *cfg_tbl_len)
+{
+       unsigned long sys_tbl_pa;
+       enum efi_type et;
+       int ret;
+
+       if (!cfg_tbl_pa || !cfg_tbl_len)
+               return -EINVAL;
+
+       sys_tbl_pa = efi_get_system_table(bp);
+       if (!sys_tbl_pa)
+               return -EINVAL;
+
+       /* Handle EFI bitness properly */
+       et = efi_get_type(bp);
+       if (et == EFI_TYPE_64) {
+               efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa;
+               struct efi_setup_data *esd;
+
+               /* kexec provides an alternative EFI conf table, check for it. */
+               esd = get_kexec_setup_data(bp, et);
+
+               *cfg_tbl_pa = esd ? esd->tables : stbl->tables;
+               *cfg_tbl_len = stbl->nr_tables;
+       } else if (et == EFI_TYPE_32) {
+               efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa;
+
+               *cfg_tbl_pa = stbl->tables;
+               *cfg_tbl_len = stbl->nr_tables;
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Get vendor table address/guid from EFI config table at the given index */
+static int get_vendor_table(void *cfg_tbl, unsigned int idx,
+                           unsigned long *vendor_tbl_pa,
+                           efi_guid_t *vendor_tbl_guid,
+                           enum efi_type et)
+{
+       if (et == EFI_TYPE_64) {
+               efi_config_table_64_t *tbl_entry = (efi_config_table_64_t *)cfg_tbl + idx;
+
+               if (!IS_ENABLED(CONFIG_X86_64) && tbl_entry->table >> 32) {
+                       debug_putstr("Error: EFI config table entry located above 4GB.\n");
+                       return -EINVAL;
+               }
+
+               *vendor_tbl_pa = tbl_entry->table;
+               *vendor_tbl_guid = tbl_entry->guid;
+
+       } else if (et == EFI_TYPE_32) {
+               efi_config_table_32_t *tbl_entry = (efi_config_table_32_t *)cfg_tbl + idx;
+
+               *vendor_tbl_pa = tbl_entry->table;
+               *vendor_tbl_guid = tbl_entry->guid;
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * efi_find_vendor_table - Given EFI config table, search it for the physical
+ *                         address of the vendor table associated with GUID.
+ *
+ * @bp:                pointer to boot_params
+ * @cfg_tbl_pa:        pointer to EFI configuration table
+ * @cfg_tbl_len:       number of entries in EFI configuration table
+ * @guid:              GUID of vendor table
+ *
+ * Return: vendor table address on success. On error, return 0.
+ */
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+                                   unsigned long cfg_tbl_pa,
+                                   unsigned int cfg_tbl_len,
+                                   efi_guid_t guid)
+{
+       enum efi_type et;
+       unsigned int i;
+
+       et = efi_get_type(bp);
+       if (et == EFI_TYPE_NONE)
+               return 0;
+
+       for (i = 0; i < cfg_tbl_len; i++) {
+               unsigned long vendor_tbl_pa;
+               efi_guid_t vendor_tbl_guid;
+               int ret;
+
+               ret = get_vendor_table((void *)cfg_tbl_pa, i,
+                                      &vendor_tbl_pa,
+                                      &vendor_tbl_guid, et);
+               if (ret)
+                       return 0;
+
+               if (!efi_guidcmp(guid, vendor_tbl_guid))
+                       return vendor_tbl_pa;
+       }
+
+       return 0;
+}
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
new file mode 100644 (file)
index 0000000..7db2f41
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_EFI_H
+#define BOOT_COMPRESSED_EFI_H
+
+#if defined(_LINUX_EFI_H) || defined(_ASM_X86_EFI_H)
+#error Please do not include kernel proper namespace headers
+#endif
+
+typedef guid_t efi_guid_t __aligned(__alignof__(u32));
+
+#define EFI_GUID(a, b, c, d...) (efi_guid_t){ {                                        \
+       (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff,  \
+       (b) & 0xff, ((b) >> 8) & 0xff,                                          \
+       (c) & 0xff, ((c) >> 8) & 0xff, d } }
+
+#define ACPI_TABLE_GUID                                EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3,  0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define ACPI_20_TABLE_GUID                     EFI_GUID(0x8868e871, 0xe4f1, 0x11d3,  0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define EFI_CC_BLOB_GUID                       EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+
+#define EFI32_LOADER_SIGNATURE "EL32"
+#define EFI64_LOADER_SIGNATURE "EL64"
+
+/*
+ * Generic EFI table header
+ */
+typedef        struct {
+       u64 signature;
+       u32 revision;
+       u32 headersize;
+       u32 crc32;
+       u32 reserved;
+} efi_table_hdr_t;
+
+#define EFI_CONVENTIONAL_MEMORY                 7
+
+#define EFI_MEMORY_MORE_RELIABLE \
+                               ((u64)0x0000000000010000ULL)    /* higher reliability */
+#define EFI_MEMORY_SP          ((u64)0x0000000000040000ULL)    /* soft reserved */
+
+#define EFI_PAGE_SHIFT         12
+
+typedef struct {
+       u32 type;
+       u32 pad;
+       u64 phys_addr;
+       u64 virt_addr;
+       u64 num_pages;
+       u64 attribute;
+} efi_memory_desc_t;
+
+#define efi_early_memdesc_ptr(map, desc_size, n)                       \
+       (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size)))
+
+typedef struct {
+       efi_guid_t guid;
+       u64 table;
+} efi_config_table_64_t;
+
+typedef struct {
+       efi_guid_t guid;
+       u32 table;
+} efi_config_table_32_t;
+
+typedef struct {
+       efi_table_hdr_t hdr;
+       u64 fw_vendor;  /* physical addr of CHAR16 vendor string */
+       u32 fw_revision;
+       u32 __pad1;
+       u64 con_in_handle;
+       u64 con_in;
+       u64 con_out_handle;
+       u64 con_out;
+       u64 stderr_handle;
+       u64 stderr;
+       u64 runtime;
+       u64 boottime;
+       u32 nr_tables;
+       u32 __pad2;
+       u64 tables;
+} efi_system_table_64_t;
+
+typedef struct {
+       efi_table_hdr_t hdr;
+       u32 fw_vendor;  /* physical addr of CHAR16 vendor string */
+       u32 fw_revision;
+       u32 con_in_handle;
+       u32 con_in;
+       u32 con_out_handle;
+       u32 con_out;
+       u32 stderr_handle;
+       u32 stderr;
+       u32 runtime;
+       u32 boottime;
+       u32 nr_tables;
+       u32 tables;
+} efi_system_table_32_t;
+
+/* kexec external ABI */
+struct efi_setup_data {
+       u64 fw_vendor;
+       u64 __unused;
+       u64 tables;
+       u64 smbios;
+       u64 reserved[8];
+};
+
+static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
+{
+       return memcmp(&left, &right, sizeof (efi_guid_t));
+}
+
+#ifdef CONFIG_EFI
+bool __pure __efi_soft_reserve_enabled(void);
+
+static inline bool __pure efi_soft_reserve_enabled(void)
+{
+       return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE)
+               && __efi_soft_reserve_enabled();
+}
+#else
+static inline bool efi_soft_reserve_enabled(void)
+{
+       return false;
+}
+#endif /* CONFIG_EFI */
+#endif /* BOOT_COMPRESSED_EFI_H */
index dea95301196b8550fdd3faa81355bb20eeba93c8..d33f060900d235169f05fdfaff4c7c7bbbae6cc1 100644 (file)
@@ -189,11 +189,11 @@ SYM_FUNC_START(startup_32)
        subl    $32, %eax       /* Encryption bit is always above bit 31 */
        bts     %eax, %edx      /* Set encryption mask for page tables */
        /*
-        * Mark SEV as active in sev_status so that startup32_check_sev_cbit()
-        * will do a check. The sev_status memory will be fully initialized
-        * with the contents of MSR_AMD_SEV_STATUS later in
-        * set_sev_encryption_mask(). For now it is sufficient to know that SEV
-        * is active.
+        * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
+        * startup32_check_sev_cbit() will do a check. sev_enable() will
+        * initialize sev_status with all the bits reported by
+        * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
+        * needs to be set for now.
         */
        movl    $1, rva(sev_status)(%ebp)
 1:
@@ -289,7 +289,7 @@ SYM_FUNC_START(startup_32)
        pushl   %eax
 
        /* Enter paged protected Mode, activating Long Mode */
-       movl    $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
+       movl    $CR0_STATE, %eax
        movl    %eax, %cr0
 
        /* Jump from 32bit compatibility mode into 64bit mode. */
@@ -447,6 +447,23 @@ SYM_CODE_START(startup_64)
        call    load_stage1_idt
        popq    %rsi
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+       /*
+        * Now that the stage1 interrupt handlers are set up, #VC exceptions from
+        * CPUID instructions can be properly handled for SEV-ES guests.
+        *
+        * For SEV-SNP, the CPUID table also needs to be set up in advance of any
+        * CPUID instructions being issued, so go ahead and do that now via
+        * sev_enable(), which will also handle the rest of the SEV-related
+        * detection/setup to ensure that has been done in advance of any dependent
+        * code.
+        */
+       pushq   %rsi
+       movq    %rsi, %rdi              /* real mode address */
+       call    sev_enable
+       popq    %rsi
+#endif
+
        /*
         * paging_prepare() sets up the trampoline and checks if we need to
         * enable 5-level paging.
@@ -558,17 +575,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
        shrq    $3, %rcx
        rep     stosq
 
-/*
- * If running as an SEV guest, the encryption mask is required in the
- * page-table setup code below. When the guest also has SEV-ES enabled
- * set_sev_encryption_mask() will cause #VC exceptions, but the stage2
- * handler can't map its GHCB because the page-table is not set up yet.
- * So set up the encryption mask here while still on the stage1 #VC
- * handler. Then load stage2 IDT and switch to the kernel's own
- * page-table.
- */
        pushq   %rsi
-       call    set_sev_encryption_mask
        call    load_stage2_idt
 
        /* Pass boot_params to initialize_identity_maps() */
@@ -642,12 +649,28 @@ SYM_CODE_START(trampoline_32bit_src)
        movl    $MSR_EFER, %ecx
        rdmsr
        btsl    $_EFER_LME, %eax
+       /* Avoid writing EFER if no change was made (for TDX guest) */
+       jc      1f
        wrmsr
-       popl    %edx
+1:     popl    %edx
        popl    %ecx
 
+#ifdef CONFIG_X86_MCE
+       /*
+        * Preserve CR4.MCE if the kernel will enable #MC support.
+        * Clearing MCE may fault in some environments (that also force #MC
+        * support). Any machine check that occurs before #MC support is fully
+        * configured will crash the system regardless of the CR4.MCE value set
+        * here.
+        */
+       movl    %cr4, %eax
+       andl    $X86_CR4_MCE, %eax
+#else
+       movl    $0, %eax
+#endif
+
        /* Enable PAE and LA57 (if required) paging modes */
-       movl    $X86_CR4_PAE, %eax
+       orl     $X86_CR4_PAE, %eax
        testl   %edx, %edx
        jz      1f
        orl     $X86_CR4_LA57, %eax
@@ -661,8 +684,9 @@ SYM_CODE_START(trampoline_32bit_src)
        pushl   $__KERNEL_CS
        pushl   %eax
 
-       /* Enable paging again */
-       movl    $(X86_CR0_PG | X86_CR0_PE), %eax
+       /* Enable paging again. */
+       movl    %cr0, %eax
+       btsl    $X86_CR0_PG_BIT, %eax
        movl    %eax, %cr0
 
        lret
index f7213d0943b82e73ef0181a72def7c3815ab5c0e..44c350d627c79b4b13e29bfe74cbec1e5c69879f 100644 (file)
@@ -90,7 +90,7 @@ static struct x86_mapping_info mapping_info;
 /*
  * Adds the specified range to the identity mappings.
  */
-static void add_identity_map(unsigned long start, unsigned long end)
+void kernel_add_identity_map(unsigned long start, unsigned long end)
 {
        int ret;
 
@@ -157,14 +157,15 @@ void initialize_identity_maps(void *rmode)
         * explicitly here in case the compressed kernel does not touch them,
         * or does not touch all the pages covering them.
         */
-       add_identity_map((unsigned long)_head, (unsigned long)_end);
+       kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
        boot_params = rmode;
-       add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+       kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
        cmdline = get_cmd_line_ptr();
-       add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+       kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+       sev_prep_identity_maps(top_level_pgt);
 
        /* Load the new page-table. */
-       sev_verify_cbit(top_level_pgt);
        write_cr3(top_level_pgt);
 }
 
@@ -246,10 +247,10 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
         * It should already exist, but keep things generic.
         *
         * To map the page just read from it and fault it in if there is no
-        * mapping yet. add_identity_map() can't be called here because that
-        * would unconditionally map the address on PMD level, destroying any
-        * PTE-level mappings that might already exist. Use assembly here so
-        * the access won't be optimized away.
+        * mapping yet. kernel_add_identity_map() can't be called here because
+        * that would unconditionally map the address on PMD level, destroying
+        * any PTE-level mappings that might already exist. Use assembly here
+        * so the access won't be optimized away.
         */
        asm volatile("mov %[address], %%r9"
                     :: [address] "g" (*(unsigned long *)address)
@@ -275,15 +276,31 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
         * Changing encryption attributes of a page requires to flush it from
         * the caches.
         */
-       if ((set | clr) & _PAGE_ENC)
+       if ((set | clr) & _PAGE_ENC) {
                clflush_page(address);
 
+               /*
+                * If the encryption attribute is being cleared, change the page state
+                * to shared in the RMP table.
+                */
+               if (clr)
+                       snp_set_page_shared(__pa(address & PAGE_MASK));
+       }
+
        /* Update PTE */
        pte = *ptep;
        pte = pte_set_flags(pte, set);
        pte = pte_clear_flags(pte, clr);
        set_pte(ptep, pte);
 
+       /*
+        * If the encryption attribute is being set, then change the page state to
+        * private in the RMP entry. The page state change must be done after the PTE
+        * is updated.
+        */
+       if (set & _PAGE_ENC)
+               snp_set_page_private(__pa(address & PAGE_MASK));
+
        /* Flush TLB after changing encryption attribute */
        write_cr3(top_level_pgt);
 
@@ -347,5 +364,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
         * Error code is sane - now identity map the 2M region around
         * the faulting address.
         */
-       add_identity_map(address, end);
+       kernel_add_identity_map(address, end);
 }
index 9b93567d663a9003d25a8d3f9a3ff6029da28458..6debb816e83dcc85321e80f1c6db00d23ad3351f 100644 (file)
@@ -39,7 +39,23 @@ void load_stage1_idt(void)
        load_boot_idt(&boot_idt_desc);
 }
 
-/* Setup IDT after kernel jumping to  .Lrelocated */
+/*
+ * Setup IDT after kernel jumping to  .Lrelocated.
+ *
+ * initialize_identity_maps() needs a #PF handler to be setup
+ * in order to be able to fault-in identity mapping ranges; see
+ * do_boot_page_fault().
+ *
+ * This #PF handler setup needs to happen in load_stage2_idt() where the
+ * IDT is loaded and there the #VC IDT entry gets setup too.
+ *
+ * In order to be able to handle #VCs, one needs a GHCB which
+ * gets setup with an already set up pagetable, which is done in
+ * initialize_identity_maps(). And there's the catch 22: the boot #VC
+ * handler do_boot_stage2_vc() needs to call early_setup_ghcb() itself
+ * (and, especially set_page_decrypted()) because the SEV-ES setup code
+ * cannot initialize a GHCB as there's no #PF handler yet...
+ */
 void load_stage2_idt(void)
 {
        boot_idt_desc.address = (unsigned long)boot_idt;
index 411b268bc0a24db0f0eecf24989619a69025c1aa..4a3f223973f40f85bc633359f1a41c92ce6ca084 100644 (file)
 #include "misc.h"
 #include "error.h"
 #include "../string.h"
+#include "efi.h"
 
 #include <generated/compile.h>
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
-#include <linux/efi.h>
 #include <generated/utsrelease.h>
-#include <asm/efi.h>
 
 #define _SETUP
 #include <asm/setup.h> /* For COMMAND_LINE_SIZE */
index a63424d13627bcfe935ee1f02f1f6637cf984f1c..a73e4d783cae20b6786969c1505ac3bca32bedd7 100644 (file)
@@ -187,42 +187,6 @@ SYM_CODE_END(startup32_vc_handler)
        .code64
 
 #include "../../kernel/sev_verify_cbit.S"
-SYM_FUNC_START(set_sev_encryption_mask)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-       push    %rbp
-       push    %rdx
-
-       movq    %rsp, %rbp              /* Save current stack pointer */
-
-       call    get_sev_encryption_bit  /* Get the encryption bit position */
-       testl   %eax, %eax
-       jz      .Lno_sev_mask
-
-       bts     %rax, sme_me_mask(%rip) /* Create the encryption mask */
-
-       /*
-        * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
-        * get_sev_encryption_bit() because this function is 32-bit code and
-        * shared between 64-bit and 32-bit boot path.
-        */
-       movl    $MSR_AMD64_SEV, %ecx    /* Read the SEV MSR */
-       rdmsr
-
-       /* Store MSR value in sev_status */
-       shlq    $32, %rdx
-       orq     %rdx, %rax
-       movq    %rax, sev_status(%rip)
-
-.Lno_sev_mask:
-       movq    %rbp, %rsp              /* Restore original stack pointer */
-
-       pop     %rdx
-       pop     %rbp
-#endif
-
-       xor     %rax, %rax
-       RET
-SYM_FUNC_END(set_sev_encryption_mask)
 
        .data
 
index 1cdcaf34ee367bd179ad98ad95dfec56b6df37bb..cf690d8712f4eda895861a802e10f86ccc7d68a1 100644 (file)
@@ -48,12 +48,17 @@ void *memmove(void *dest, const void *src, size_t n);
  */
 struct boot_params *boot_params;
 
+struct port_io_ops pio_ops;
+
 memptr free_mem_ptr;
 memptr free_mem_end_ptr;
 
 static char *vidmem;
 static int vidport;
-static int lines, cols;
+
+/* These might be accessed before .bss is cleared, so use .data instead. */
+static int lines __section(".data");
+static int cols __section(".data");
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
@@ -371,6 +376,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
        lines = boot_params->screen_info.orig_video_lines;
        cols = boot_params->screen_info.orig_video_cols;
 
+       init_default_io_ops();
+
+       /*
+        * Detect TDX guest environment.
+        *
+        * It has to be done before console_init() in order to use
+        * paravirtualized port I/O operations if needed.
+        */
+       early_tdx_detect();
+
        console_init();
 
        /*
index 16ed360b6692dbb05a96cd3f39d1786bcf6e2ae0..4910bf230d7b4afbb44f83d958e0b1902fe26458 100644 (file)
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <linux/elf.h>
-#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
 #include <asm/desc_defs.h>
 
+#include "tdx.h"
+
 #define BOOT_CTYPE_H
 #include <linux/acpi.h>
 
 #define BOOT_BOOT_H
 #include "../ctype.h"
+#include "../io.h"
+
+#include "efi.h"
 
 #ifdef CONFIG_X86_64
 #define memptr long
@@ -120,17 +124,23 @@ static inline void console_init(void)
 { }
 #endif
 
-void set_sev_encryption_mask(void);
-
 #ifdef CONFIG_AMD_MEM_ENCRYPT
+void sev_enable(struct boot_params *bp);
 void sev_es_shutdown_ghcb(void);
 extern bool sev_es_check_ghcb_fault(unsigned long address);
+void snp_set_page_private(unsigned long paddr);
+void snp_set_page_shared(unsigned long paddr);
+void sev_prep_identity_maps(unsigned long top_level_pgt);
 #else
+static inline void sev_enable(struct boot_params *bp) { }
 static inline void sev_es_shutdown_ghcb(void) { }
 static inline bool sev_es_check_ghcb_fault(unsigned long address)
 {
        return false;
 }
+static inline void snp_set_page_private(unsigned long paddr) { }
+static inline void snp_set_page_shared(unsigned long paddr) { }
+static inline void sev_prep_identity_maps(unsigned long top_level_pgt) { }
 #endif
 
 /* acpi.c */
@@ -151,6 +161,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
 #ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
 #endif
+extern void kernel_add_identity_map(unsigned long start, unsigned long end);
 
 /* Used by PAGE_KERN* macros: */
 extern pteval_t __default_kernel_pte_mask;
@@ -172,4 +183,47 @@ void boot_stage2_vc(void);
 
 unsigned long sev_verify_cbit(unsigned long cr3);
 
+enum efi_type {
+       EFI_TYPE_64,
+       EFI_TYPE_32,
+       EFI_TYPE_NONE,
+};
+
+#ifdef CONFIG_EFI
+/* helpers for early EFI config table access */
+enum efi_type efi_get_type(struct boot_params *bp);
+unsigned long efi_get_system_table(struct boot_params *bp);
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+                      unsigned int *cfg_tbl_len);
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+                                   unsigned long cfg_tbl_pa,
+                                   unsigned int cfg_tbl_len,
+                                   efi_guid_t guid);
+#else
+static inline enum efi_type efi_get_type(struct boot_params *bp)
+{
+       return EFI_TYPE_NONE;
+}
+
+static inline unsigned long efi_get_system_table(struct boot_params *bp)
+{
+       return 0;
+}
+
+static inline int efi_get_conf_table(struct boot_params *bp,
+                                    unsigned long *cfg_tbl_pa,
+                                    unsigned int *cfg_tbl_len)
+{
+       return -ENOENT;
+}
+
+static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
+                                                 unsigned long cfg_tbl_pa,
+                                                 unsigned int cfg_tbl_len,
+                                                 efi_guid_t guid)
+{
+       return 0;
+}
+#endif /* CONFIG_EFI */
+
 #endif /* BOOT_COMPRESSED_MISC_H */
index 6ff7e81b5628456b92779ede7a9c941fe7df6a85..cc9b2529a08634b4249ec65c0d33219f811d08a1 100644 (file)
@@ -6,7 +6,7 @@
 #define TRAMPOLINE_32BIT_PGTABLE_OFFSET        0
 
 #define TRAMPOLINE_32BIT_CODE_OFFSET   PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE     0x70
+#define TRAMPOLINE_32BIT_CODE_SIZE     0x80
 
 #define TRAMPOLINE_32BIT_STACK_END     TRAMPOLINE_32BIT_SIZE
 
index a1733319a22a055262758126bc578a32e7fd3ef0..2ac12ff4111bf8c0dddeafc38680daaf9c0834cc 100644 (file)
@@ -1,11 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "misc.h"
-#include <linux/efi.h>
 #include <asm/e820/types.h>
 #include <asm/processor.h>
-#include <asm/efi.h>
 #include "pgtable.h"
 #include "../string.h"
+#include "efi.h"
 
 #define BIOS_START_MIN         0x20000U        /* 128K, less than this is insane */
 #define BIOS_START_MAX         0x9f000U        /* 640K, absolute maximum */
index 28bcf04c022eaf4e87f92cdfb7f8fd74343750eb..52f989f6acc281f95815bc76e0976348b5b8f635 100644 (file)
 #include <asm/fpu/xcr.h>
 #include <asm/ptrace.h>
 #include <asm/svm.h>
+#include <asm/cpuid.h>
 
 #include "error.h"
+#include "../msr.h"
 
 struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
 struct ghcb *boot_ghcb;
@@ -56,23 +58,19 @@ static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
 
 static inline u64 sev_es_rd_ghcb_msr(void)
 {
-       unsigned long low, high;
+       struct msr m;
 
-       asm volatile("rdmsr" : "=a" (low), "=d" (high) :
-                       "c" (MSR_AMD64_SEV_ES_GHCB));
+       boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
 
-       return ((high << 32) | low);
+       return m.q;
 }
 
 static inline void sev_es_wr_ghcb_msr(u64 val)
 {
-       u32 low, high;
+       struct msr m;
 
-       low  = val & 0xffffffffUL;
-       high = val >> 32;
-
-       asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
-                       "a"(low), "d" (high) : "memory");
+       m.q = val;
+       boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
 }
 
 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
@@ -119,11 +117,54 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
 /* Include code for early handlers */
 #include "../../kernel/sev-shared.c"
 
-static bool early_setup_sev_es(void)
+static inline bool sev_snp_enabled(void)
+{
+       return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+}
+
+static void __page_state_change(unsigned long paddr, enum psc_op op)
+{
+       u64 val;
+
+       if (!sev_snp_enabled())
+               return;
+
+       /*
+        * If private -> shared then invalidate the page before requesting the
+        * state change in the RMP table.
+        */
+       if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+       /* Issue VMGEXIT to change the page state in RMP table. */
+       sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+       VMGEXIT();
+
+       /* Read the response of the VMGEXIT. */
+       val = sev_es_rd_ghcb_msr();
+       if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+       /*
+        * Now that page state is changed in the RMP table, validate it so that it is
+        * consistent with the RMP entry.
+        */
+       if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+void snp_set_page_private(unsigned long paddr)
+{
+       __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
+}
+
+void snp_set_page_shared(unsigned long paddr)
 {
-       if (!sev_es_negotiate_protocol())
-               sev_es_terminate(GHCB_SEV_ES_PROT_UNSUPPORTED);
+       __page_state_change(paddr, SNP_PAGE_STATE_SHARED);
+}
 
+static bool early_setup_ghcb(void)
+{
        if (set_page_decrypted((unsigned long)&boot_ghcb_page))
                return false;
 
@@ -135,6 +176,10 @@ static bool early_setup_sev_es(void)
        /* Initialize lookup tables for the instruction decoder */
        inat_init_tables();
 
+       /* SNP guest requires the GHCB GPA must be registered */
+       if (sev_snp_enabled())
+               snp_register_ghcb_early(__pa(&boot_ghcb_page));
+
        return true;
 }
 
@@ -174,8 +219,8 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
        struct es_em_ctxt ctxt;
        enum es_result result;
 
-       if (!boot_ghcb && !early_setup_sev_es())
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+       if (!boot_ghcb && !early_setup_ghcb())
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
 
        vc_ghcb_invalidate(boot_ghcb);
        result = vc_init_em_ctxt(&ctxt, regs, exit_code);
@@ -202,5 +247,191 @@ finish:
        if (result == ES_OK)
                vc_finish_insn(&ctxt);
        else if (result != ES_RETRY)
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+static void enforce_vmpl0(void)
+{
+       u64 attrs;
+       int err;
+
+       /*
+        * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
+        * higher) privilege level. Here, clear the VMPL1 permission mask of the
+        * GHCB page. If the guest is not running at VMPL0, this will fail.
+        *
+        * If the guest is running at VMPL0, it will succeed. Even if that operation
+        * modifies permission bits, it is still ok to do so currently because Linux
+        * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks
+        * changing is a don't-care.
+        */
+       attrs = 1;
+       if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
+}
+
+void sev_enable(struct boot_params *bp)
+{
+       unsigned int eax, ebx, ecx, edx;
+       struct msr m;
+       bool snp;
+
+       /*
+        * Setup/preliminary detection of SNP. This will be sanity-checked
+        * against CPUID/MSR values later.
+        */
+       snp = snp_init(bp);
+
+       /* Check for the SME/SEV support leaf */
+       eax = 0x80000000;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       if (eax < 0x8000001f)
+               return;
+
+       /*
+        * Check for the SME/SEV feature:
+        *   CPUID Fn8000_001F[EAX]
+        *   - Bit 0 - Secure Memory Encryption support
+        *   - Bit 1 - Secure Encrypted Virtualization support
+        *   CPUID Fn8000_001F[EBX]
+        *   - Bits 5:0 - Pagetable bit position used to indicate encryption
+        */
+       eax = 0x8000001f;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       /* Check whether SEV is supported */
+       if (!(eax & BIT(1))) {
+               if (snp)
+                       error("SEV-SNP support indicated by CC blob, but not CPUID.");
+               return;
+       }
+
+       /* Set the SME mask if this is an SEV guest. */
+       boot_rdmsr(MSR_AMD64_SEV, &m);
+       sev_status = m.q;
+       if (!(sev_status & MSR_AMD64_SEV_ENABLED))
+               return;
+
+       /* Negotiate the GHCB protocol version. */
+       if (sev_status & MSR_AMD64_SEV_ES_ENABLED) {
+               if (!sev_es_negotiate_protocol())
+                       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED);
+       }
+
+       /*
+        * SNP is supported in v2 of the GHCB spec which mandates support for HV
+        * features.
+        */
+       if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+               if (!(get_hv_features() & GHCB_HV_FT_SNP))
+                       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+               enforce_vmpl0();
+       }
+
+       if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+               error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
+
+       sme_me_mask = BIT_ULL(ebx & 0x3f);
+}
+
+/* Search for Confidential Computing blob in the EFI config table. */
+static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
+{
+       unsigned long cfg_table_pa;
+       unsigned int cfg_table_len;
+       int ret;
+
+       ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
+       if (ret)
+               return NULL;
+
+       return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
+                                                               cfg_table_len,
+                                                               EFI_CC_BLOB_GUID);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the boot kernel
+ * by firmware/bootloader in the following ways:
+ *
+ * - via an entry in the EFI config table
+ * - via a setup_data structure, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       cc_info = find_cc_blob_efi(bp);
+       if (cc_info)
+               goto found_cc_info;
+
+       cc_info = find_cc_blob_setup_data(bp);
+       if (!cc_info)
+               return NULL;
+
+found_cc_info:
+       if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+       return cc_info;
+}
+
+/*
+ * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
+ * will verify the SNP CPUID/MSR bits.
+ */
+bool snp_init(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       if (!bp)
+               return false;
+
+       cc_info = find_cc_blob(bp);
+       if (!cc_info)
+               return false;
+
+       /*
+        * If a SNP-specific Confidential Computing blob is present, then
+        * firmware/bootloader have indicated SNP support. Verifying this
+        * involves CPUID checks which will be more reliable if the SNP
+        * CPUID table is used. See comments over snp_setup_cpuid_table() for
+        * more details.
+        */
+       setup_cpuid_table(cc_info);
+
+       /*
+        * Pass run-time kernel a pointer to CC info via boot_params so EFI
+        * config table doesn't need to be searched again during early startup
+        * phase.
+        */
+       bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+       return true;
+}
+
+void sev_prep_identity_maps(unsigned long top_level_pgt)
+{
+       /*
+        * The Confidential Computing blob is used very early in uncompressed
+        * kernel to find the in-memory CPUID table to handle CPUID
+        * instructions. Make sure an identity-mapping exists so it can be
+        * accessed after switchover.
+        */
+       if (sev_snp_enabled()) {
+               unsigned long cc_info_pa = boot_params->cc_blob_address;
+               struct cc_blob_sev_info *cc_info;
+
+               kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
+
+               cc_info = (struct cc_blob_sev_info *)cc_info_pa;
+               kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len);
+       }
+
+       sev_verify_cbit(top_level_pgt);
 }
diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S
new file mode 100644 (file)
index 0000000..46d0495
--- /dev/null
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../../coco/tdx/tdcall.S"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
new file mode 100644 (file)
index 0000000..918a760
--- /dev/null
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../cpuflags.h"
+#include "../string.h"
+#include "../io.h"
+#include "error.h"
+
+#include <vdso/limits.h>
+#include <uapi/asm/vmx.h>
+
+#include <asm/shared/tdx.h>
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+       error("TDVMCALL failed. TDX module bug?");
+}
+
+static inline unsigned int tdx_io_in(int size, u16 port)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = EXIT_REASON_IO_INSTRUCTION,
+               .r12 = size,
+               .r13 = 0,
+               .r14 = port,
+       };
+
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return UINT_MAX;
+
+       return args.r11;
+}
+
+static inline void tdx_io_out(int size, u16 port, u32 value)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = EXIT_REASON_IO_INSTRUCTION,
+               .r12 = size,
+               .r13 = 1,
+               .r14 = port,
+               .r15 = value,
+       };
+
+       __tdx_hypercall(&args, 0);
+}
+
+static inline u8 tdx_inb(u16 port)
+{
+       return tdx_io_in(1, port);
+}
+
+static inline void tdx_outb(u8 value, u16 port)
+{
+       tdx_io_out(1, port, value);
+}
+
+static inline void tdx_outw(u16 value, u16 port)
+{
+       tdx_io_out(2, port, value);
+}
+
+void early_tdx_detect(void)
+{
+       u32 eax, sig[3];
+
+       cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2],  &sig[1]);
+
+       if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+               return;
+
+       /* Use hypercalls instead of I/O instructions */
+       pio_ops.f_inb  = tdx_inb;
+       pio_ops.f_outb = tdx_outb;
+       pio_ops.f_outw = tdx_outw;
+}
diff --git a/arch/x86/boot/compressed/tdx.h b/arch/x86/boot/compressed/tdx.h
new file mode 100644 (file)
index 0000000..9055482
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_TDX_H
+#define BOOT_COMPRESSED_TDX_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+void early_tdx_detect(void);
+#else
+static inline void early_tdx_detect(void) { };
+#endif
+
+#endif /* BOOT_COMPRESSED_TDX_H */
index e1478d32de1aef50617f21932e1f7ab1fc237ba6..fed8d13ce2526086883422e7fee4b65de858be94 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/required-features.h>
 #include <asm/msr-index.h>
 #include "string.h"
+#include "msr.h"
 
 static u32 err_flags[NCAPINTS];
 
@@ -130,12 +131,11 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
                /* If this is an AMD and we're only missing SSE+SSE2, try to
                   turn them on */
 
-               u32 ecx = MSR_K7_HWCR;
-               u32 eax, edx;
+               struct msr m;
 
-               asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-               eax &= ~(1 << 15);
-               asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+               boot_rdmsr(MSR_K7_HWCR, &m);
+               m.l &= ~(1 << 15);
+               boot_wrmsr(MSR_K7_HWCR, &m);
 
                get_cpuflags(); /* Make sure it really did something */
                err = check_cpuflags();
@@ -145,28 +145,28 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
                /* If this is a VIA C3, we might have to enable CX8
                   explicitly */
 
-               u32 ecx = MSR_VIA_FCR;
-               u32 eax, edx;
+               struct msr m;
 
-               asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-               eax |= (1<<1)|(1<<7);
-               asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+               boot_rdmsr(MSR_VIA_FCR, &m);
+               m.l |= (1 << 1) | (1 << 7);
+               boot_wrmsr(MSR_VIA_FCR, &m);
 
                set_bit(X86_FEATURE_CX8, cpu.flags);
                err = check_cpuflags();
        } else if (err == 0x01 && is_transmeta()) {
                /* Transmeta might have masked feature bits in word 0 */
 
-               u32 ecx = 0x80860004;
-               u32 eax, edx;
+               struct msr m, m_tmp;
                u32 level = 1;
 
-               asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-               asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
+               boot_rdmsr(0x80860004, &m);
+               m_tmp = m;
+               m_tmp.l = ~0;
+               boot_wrmsr(0x80860004, &m_tmp);
                asm("cpuid"
                    : "+a" (level), "=d" (cpu.flags[0])
                    : : "ecx", "ebx");
-               asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+               boot_wrmsr(0x80860004, &m);
 
                err = check_cpuflags();
        } else if (err == 0x01 &&
index a0b75f73dc630d31d766394fc402a07f9336c059..a83d67ec627d1768facd1601e1eee537bcefbc2a 100644 (file)
@@ -71,8 +71,7 @@ int has_eflag(unsigned long mask)
 # define EBX_REG "=b"
 #endif
 
-static inline void cpuid_count(u32 id, u32 count,
-               u32 *a, u32 *b, u32 *c, u32 *d)
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d)
 {
        asm volatile(".ifnc %%ebx,%3 ; movl  %%ebx,%3 ; .endif  \n\t"
                     "cpuid                                     \n\t"
index 2e20814d3ce3e8d15e887bad15224e39646c100d..475b8fde90f7da3e80c1dd2d2b41b810c4dcb7a0 100644 (file)
@@ -17,5 +17,6 @@ extern u32 cpu_vendor[3];
 
 int has_eflag(unsigned long mask);
 void get_cpuflags(void);
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d);
 
 #endif
index 6dbd7e9f74c9caf51a08dcc3cd2924982b2120f1..0352e4589efa2e52636beb9ac169892cce41f619 100644 (file)
@@ -163,7 +163,11 @@ extra_header_fields:
        .long   0x200                           # SizeOfHeaders
        .long   0                               # CheckSum
        .word   IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
+#ifdef CONFIG_DXE_MEM_ATTRIBUTES
+       .word   IMAGE_DLL_CHARACTERISTICS_NX_COMPAT     # DllCharacteristics
+#else
        .word   0                               # DllCharacteristics
+#endif
 #ifdef CONFIG_X86_32
        .long   0                               # SizeOfStackReserve
        .long   0                               # SizeOfStackCommit
diff --git a/arch/x86/boot/io.h b/arch/x86/boot/io.h
new file mode 100644 (file)
index 0000000..1108809
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_IO_H
+#define BOOT_IO_H
+
+#include <asm/shared/io.h>
+
+#undef inb
+#undef inw
+#undef inl
+#undef outb
+#undef outw
+#undef outl
+
+struct port_io_ops {
+       u8      (*f_inb)(u16 port);
+       void    (*f_outb)(u8 v, u16 port);
+       void    (*f_outw)(u16 v, u16 port);
+};
+
+extern struct port_io_ops pio_ops;
+
+/*
+ * Use the normal I/O instructions by default.
+ * TDX guests override these to use hypercalls.
+ */
+static inline void init_default_io_ops(void)
+{
+       pio_ops.f_inb  = __inb;
+       pio_ops.f_outb = __outb;
+       pio_ops.f_outw = __outw;
+}
+
+/*
+ * Redirect port I/O operations via pio_ops callbacks.
+ * TDX guests override these callbacks with TDX-specific helpers.
+ */
+#define inb  pio_ops.f_inb
+#define outb pio_ops.f_outb
+#define outw pio_ops.f_outw
+
+#endif
index e3add857c2c9dfd91806c9c466cc86dfcf046de0..c4ea5258ab558fc1773d7cef85796238b62da026 100644 (file)
@@ -17,6 +17,8 @@
 
 struct boot_params boot_params __attribute__((aligned(16)));
 
+struct port_io_ops pio_ops;
+
 char *HEAP = _end;
 char *heap_end = _end;         /* Default end of heap = no heap */
 
@@ -33,7 +35,7 @@ static void copy_boot_params(void)
                u16 cl_offset;
        };
        const struct old_cmdline * const oldcmd =
-               (const struct old_cmdline *)OLD_CL_ADDRESS;
+               absolute_pointer(OLD_CL_ADDRESS);
 
        BUILD_BUG_ON(sizeof(boot_params) != 4096);
        memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
@@ -133,6 +135,8 @@ static void init_heap(void)
 
 void main(void)
 {
+       init_default_io_ops();
+
        /* First, copy the boot header into the "zeropage" */
        copy_boot_params();
 
diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h
new file mode 100644 (file)
index 0000000..aed66f7
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers/definitions related to MSR access.
+ */
+
+#ifndef BOOT_MSR_H
+#define BOOT_MSR_H
+
+#include <asm/shared/msr.h>
+
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void boot_rdmsr(unsigned int reg, struct msr *m)
+{
+       asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void boot_wrmsr(unsigned int reg, const struct msr *m)
+{
+       asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
+#endif /* BOOT_MSR_H */
index c1ead00017a7fa3938a53bc3e30cacb2d74e4f37..c816acf78b6aa0e36ebab582c4fb6558d91ab19a 100644 (file)
@@ -4,3 +4,5 @@ KASAN_SANITIZE_core.o   := n
 CFLAGS_core.o          += -fno-stack-protector
 
 obj-y += core.o
+
+obj-$(CONFIG_INTEL_TDX_GUEST)  += tdx/
index fc1365dd927e8001971bcb86a2a8a41bb89ef33e..49b44f881484680376a7393dc0f101e4565a4009 100644 (file)
@@ -18,7 +18,15 @@ static u64 cc_mask __ro_after_init;
 
 static bool intel_cc_platform_has(enum cc_attr attr)
 {
-       return false;
+       switch (attr) {
+       case CC_ATTR_GUEST_UNROLL_STRING_IO:
+       case CC_ATTR_HOTPLUG_DISABLED:
+       case CC_ATTR_GUEST_MEM_ENCRYPT:
+       case CC_ATTR_MEM_ENCRYPT:
+               return true;
+       default:
+               return false;
+       }
 }
 
 /*
@@ -57,6 +65,9 @@ static bool amd_cc_platform_has(enum cc_attr attr)
                return (sev_status & MSR_AMD64_SEV_ENABLED) &&
                        !(sev_status & MSR_AMD64_SEV_ES_ENABLED);
 
+       case CC_ATTR_GUEST_SEV_SNP:
+               return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+
        default:
                return false;
        }
@@ -87,9 +98,18 @@ EXPORT_SYMBOL_GPL(cc_platform_has);
 
 u64 cc_mkenc(u64 val)
 {
+       /*
+        * Both AMD and Intel use a bit in the page table to indicate
+        * encryption status of the page.
+        *
+        * - for AMD, bit *set* means the page is encrypted
+        * - for Intel *clear* means encrypted.
+        */
        switch (vendor) {
        case CC_VENDOR_AMD:
                return val | cc_mask;
+       case CC_VENDOR_INTEL:
+               return val & ~cc_mask;
        default:
                return val;
        }
@@ -97,9 +117,12 @@ u64 cc_mkenc(u64 val)
 
 u64 cc_mkdec(u64 val)
 {
+       /* See comment in cc_mkenc() */
        switch (vendor) {
        case CC_VENDOR_AMD:
                return val & ~cc_mask;
+       case CC_VENDOR_INTEL:
+               return val | cc_mask;
        default:
                return val;
        }
diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile
new file mode 100644 (file)
index 0000000..46c5599
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += tdx.o tdcall.o
diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
new file mode 100644 (file)
index 0000000..f9eb113
--- /dev/null
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/asm-offsets.h>
+#include <asm/asm.h>
+#include <asm/frame.h>
+#include <asm/unwind_hints.h>
+
+#include <linux/linkage.h>
+#include <linux/bits.h>
+#include <linux/errno.h>
+
+#include "../../virt/vmx/tdx/tdxcall.S"
+
+/*
+ * Bitmasks of exposed registers (with VMM).
+ */
+#define TDX_R10                BIT(10)
+#define TDX_R11                BIT(11)
+#define TDX_R12                BIT(12)
+#define TDX_R13                BIT(13)
+#define TDX_R14                BIT(14)
+#define TDX_R15                BIT(15)
+
+/*
+ * These registers are clobbered to hold arguments for each
+ * TDVMCALL. They are safe to expose to the VMM.
+ * Each bit in this mask represents a register ID. Bit field
+ * details can be found in TDX GHCI specification, section
+ * titled "TDCALL [TDG.VP.VMCALL] leaf".
+ */
+#define TDVMCALL_EXPOSE_REGS_MASK      ( TDX_R10 | TDX_R11 | \
+                                         TDX_R12 | TDX_R13 | \
+                                         TDX_R14 | TDX_R15 )
+
+/*
+ * __tdx_module_call()  - Used by TDX guests to request services from
+ * the TDX module (does not include VMM services) using TDCALL instruction.
+ *
+ * Transforms function call register arguments into the TDCALL register ABI.
+ * After TDCALL operation, TDX module output is saved in @out (if it is
+ * provided by the user).
+ *
+ *-------------------------------------------------------------------------
+ * TDCALL ABI:
+ *-------------------------------------------------------------------------
+ * Input Registers:
+ *
+ * RAX                 - TDCALL Leaf number.
+ * RCX,RDX,R8-R9       - TDCALL Leaf specific input registers.
+ *
+ * Output Registers:
+ *
+ * RAX                 - TDCALL instruction error code.
+ * RCX,RDX,R8-R11      - TDCALL Leaf specific output registers.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_module_call() function ABI:
+ *
+ * @fn  (RDI)          - TDCALL Leaf ID,    moved to RAX
+ * @rcx (RSI)          - Input parameter 1, moved to RCX
+ * @rdx (RDX)          - Input parameter 2, moved to RDX
+ * @r8  (RCX)          - Input parameter 3, moved to R8
+ * @r9  (R8)           - Input parameter 4, moved to R9
+ *
+ * @out (R9)           - struct tdx_module_output pointer
+ *                       stored temporarily in R12 (not
+ *                       shared with the TDX module). It
+ *                       can be NULL.
+ *
+ * Return status of TDCALL via RAX.
+ */
+SYM_FUNC_START(__tdx_module_call)
+       FRAME_BEGIN
+       TDX_MODULE_CALL host=0
+       FRAME_END
+       RET
+SYM_FUNC_END(__tdx_module_call)
+
+/*
+ * __tdx_hypercall() - Make hypercalls to a TDX VMM using TDVMCALL leaf
+ * of TDCALL instruction
+ *
+ * Transforms values in  function call argument struct tdx_hypercall_args @args
+ * into the TDCALL register ABI. After TDCALL operation, VMM output is saved
+ * back in @args.
+ *
+ *-------------------------------------------------------------------------
+ * TD VMCALL ABI:
+ *-------------------------------------------------------------------------
+ *
+ * Input Registers:
+ *
+ * RAX                 - TDCALL instruction leaf number (0 - TDG.VP.VMCALL)
+ * RCX                 - BITMAP which controls which part of TD Guest GPR
+ *                       is passed as-is to the VMM and back.
+ * R10                 - Set 0 to indicate TDCALL follows standard TDX ABI
+ *                       specification. Non zero value indicates vendor
+ *                       specific ABI.
+ * R11                 - VMCALL sub function number
+ * RBX, RBP, RDI, RSI  - Used to pass VMCALL sub function specific arguments.
+ * R8-R9, R12-R15      - Same as above.
+ *
+ * Output Registers:
+ *
+ * RAX                 - TDCALL instruction status (Not related to hypercall
+ *                        output).
+ * R10                 - Hypercall output error code.
+ * R11-R15             - Hypercall sub function specific output values.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_hypercall() function ABI:
+ *
+ * @args  (RDI)        - struct tdx_hypercall_args for input and output
+ * @flags (RSI)        - TDX_HCALL_* flags
+ *
+ * On successful completion, return the hypercall error code.
+ */
+SYM_FUNC_START(__tdx_hypercall)
+       FRAME_BEGIN
+
+       /* Save callee-saved GPRs as mandated by the x86_64 ABI */
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+
+       /* Mangle function call ABI into TDCALL ABI: */
+       /* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
+       xor %eax, %eax
+
+       /* Copy hypercall registers from arg struct: */
+       movq TDX_HYPERCALL_r10(%rdi), %r10
+       movq TDX_HYPERCALL_r11(%rdi), %r11
+       movq TDX_HYPERCALL_r12(%rdi), %r12
+       movq TDX_HYPERCALL_r13(%rdi), %r13
+       movq TDX_HYPERCALL_r14(%rdi), %r14
+       movq TDX_HYPERCALL_r15(%rdi), %r15
+
+       movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
+
+       /*
+        * For the idle loop STI needs to be called directly before the TDCALL
+        * that enters idle (EXIT_REASON_HLT case). STI instruction enables
+        * interrupts only one instruction later. If there is a window between
+        * STI and the instruction that emulates the HALT state, there is a
+        * chance for interrupts to happen in this window, which can delay the
+        * HLT operation indefinitely. Since this is the not the desired
+        * result, conditionally call STI before TDCALL.
+        */
+       testq $TDX_HCALL_ISSUE_STI, %rsi
+       jz .Lskip_sti
+       sti
+.Lskip_sti:
+       tdcall
+
+       /*
+        * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+        * something has gone horribly wrong with the TDX module.
+        *
+        * The return status of the hypercall operation is in a separate
+        * register (in R10). Hypercall errors are a part of normal operation
+        * and are handled by callers.
+        */
+       testq %rax, %rax
+       jne .Lpanic
+
+       /* TDVMCALL leaf return code is in R10 */
+       movq %r10, %rax
+
+       /* Copy hypercall result registers to arg struct if needed */
+       testq $TDX_HCALL_HAS_OUTPUT, %rsi
+       jz .Lout
+
+       movq %r10, TDX_HYPERCALL_r10(%rdi)
+       movq %r11, TDX_HYPERCALL_r11(%rdi)
+       movq %r12, TDX_HYPERCALL_r12(%rdi)
+       movq %r13, TDX_HYPERCALL_r13(%rdi)
+       movq %r14, TDX_HYPERCALL_r14(%rdi)
+       movq %r15, TDX_HYPERCALL_r15(%rdi)
+.Lout:
+       /*
+        * Zero out registers exposed to the VMM to avoid speculative execution
+        * with VMM-controlled values. This needs to include all registers
+        * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15
+        * context will be restored.
+        */
+       xor %r10d, %r10d
+       xor %r11d, %r11d
+
+       /* Restore callee-saved GPRs as mandated by the x86_64 ABI */
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+
+       FRAME_END
+
+       RET
+.Lpanic:
+       call __tdx_hypercall_failed
+       /* __tdx_hypercall_failed never returns */
+       REACHABLE
+       jmp .Lpanic
+SYM_FUNC_END(__tdx_hypercall)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
new file mode 100644 (file)
index 0000000..03deb4d
--- /dev/null
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021-2022 Intel Corporation */
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "tdx: " fmt
+
+#include <linux/cpufeature.h>
+#include <asm/coco.h>
+#include <asm/tdx.h>
+#include <asm/vmx.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <asm/pgtable.h>
+
+/* TDX module Call Leaf IDs */
+#define TDX_GET_INFO                   1
+#define TDX_GET_VEINFO                 3
+#define TDX_ACCEPT_PAGE                        6
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_MAP_GPA               0x10001
+
+/* MMIO direction */
+#define EPT_READ       0
+#define EPT_WRITE      1
+
+/* Port I/O direction */
+#define PORT_READ      0
+#define PORT_WRITE     1
+
+/* See Exit Qualification for I/O Instructions in VMX documentation */
+#define VE_IS_IO_IN(e)         ((e) & BIT(3))
+#define VE_GET_IO_SIZE(e)      (((e) & GENMASK(2, 0)) + 1)
+#define VE_GET_PORT_NUM(e)     ((e) >> 16)
+#define VE_IS_IO_STRING(e)     ((e) & BIT(4))
+
+/*
+ * Wrapper for standard use of __tdx_hypercall with no output aside from
+ * return code.
+ */
+static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = fn,
+               .r12 = r12,
+               .r13 = r13,
+               .r14 = r14,
+               .r15 = r15,
+       };
+
+       return __tdx_hypercall(&args, 0);
+}
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+       panic("TDVMCALL failed. TDX module bug?");
+}
+
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static u64 hcall_func(u64 exit_reason)
+{
+       return exit_reason;
+}
+
+#ifdef CONFIG_KVM_GUEST
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+                      unsigned long p3, unsigned long p4)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = nr,
+               .r11 = p1,
+               .r12 = p2,
+               .r13 = p3,
+               .r14 = p4,
+       };
+
+       return __tdx_hypercall(&args, 0);
+}
+EXPORT_SYMBOL_GPL(tdx_kvm_hypercall);
+#endif
+
+/*
+ * Used for TDX guests to make calls directly to the TD module.  This
+ * should only be used for calls that have no legitimate reason to fail
+ * or where the kernel can not survive the call failing.
+ */
+static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+                                  struct tdx_module_output *out)
+{
+       if (__tdx_module_call(fn, rcx, rdx, r8, r9, out))
+               panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
+}
+
+static u64 get_cc_mask(void)
+{
+       struct tdx_module_output out;
+       unsigned int gpa_width;
+
+       /*
+        * TDINFO TDX module call is used to get the TD execution environment
+        * information like GPA width, number of available vcpus, debug mode
+        * information, etc. More details about the ABI can be found in TDX
+        * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
+        * [TDG.VP.INFO].
+        *
+        * The GPA width that comes out of this call is critical. TDX guests
+        * can not meaningfully run without it.
+        */
+       tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
+
+       gpa_width = out.rcx & GENMASK(5, 0);
+
+       /*
+        * The highest bit of a guest physical address is the "sharing" bit.
+        * Set it for shared pages and clear it for private pages.
+        */
+       return BIT_ULL(gpa_width - 1);
+}
+
+static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_HLT),
+               .r12 = irq_disabled,
+       };
+
+       /*
+        * Emulate HLT operation via hypercall. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface
+        * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>.
+        *
+        * The VMM uses the "IRQ disabled" param to understand IRQ
+        * enabled status (RFLAGS.IF) of the TD guest and to determine
+        * whether or not it should schedule the halted vCPU if an
+        * IRQ becomes pending. E.g. if IRQs are disabled, the VMM
+        * can keep the vCPU in virtual HLT, even if an IRQ is
+        * pending, without hanging/breaking the guest.
+        */
+       return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+}
+
+static bool handle_halt(void)
+{
+       /*
+        * Since non safe halt is mainly used in CPU offlining
+        * and the guest will always stay in the halt state, don't
+        * call the STI instruction (set do_sti as false).
+        */
+       const bool irq_disabled = irqs_disabled();
+       const bool do_sti = false;
+
+       if (__halt(irq_disabled, do_sti))
+               return false;
+
+       return true;
+}
+
+void __cpuidle tdx_safe_halt(void)
+{
+        /*
+         * For do_sti=true case, __tdx_hypercall() function enables
+         * interrupts using the STI instruction before the TDCALL. So
+         * set irq_disabled as false.
+         */
+       const bool irq_disabled = false;
+       const bool do_sti = true;
+
+       /*
+        * Use WARN_ONCE() to report the failure.
+        */
+       if (__halt(irq_disabled, do_sti))
+               WARN_ONCE(1, "HLT instruction emulation failed\n");
+}
+
+static bool read_msr(struct pt_regs *regs)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_MSR_READ),
+               .r12 = regs->cx,
+       };
+
+       /*
+        * Emulate the MSR read via hypercall. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface
+        * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
+        */
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return false;
+
+       regs->ax = lower_32_bits(args.r11);
+       regs->dx = upper_32_bits(args.r11);
+       return true;
+}
+
+static bool write_msr(struct pt_regs *regs)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_MSR_WRITE),
+               .r12 = regs->cx,
+               .r13 = (u64)regs->dx << 32 | regs->ax,
+       };
+
+       /*
+        * Emulate the MSR write via hypercall. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface
+        * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
+        */
+       return !__tdx_hypercall(&args, 0);
+}
+
+static bool handle_cpuid(struct pt_regs *regs)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_CPUID),
+               .r12 = regs->ax,
+               .r13 = regs->cx,
+       };
+
+       /*
+        * Only allow VMM to control range reserved for hypervisor
+        * communication.
+        *
+        * Return all-zeros for any CPUID outside the range. It matches CPU
+        * behaviour for non-supported leaf.
+        */
+       if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
+               regs->ax = regs->bx = regs->cx = regs->dx = 0;
+               return true;
+       }
+
+       /*
+        * Emulate the CPUID instruction via a hypercall. More info about
+        * ABI can be found in TDX Guest-Host-Communication Interface
+        * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
+        */
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return false;
+
+       /*
+        * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
+        * EAX, EBX, ECX, EDX registers after the CPUID instruction execution.
+        * So copy the register contents back to pt_regs.
+        */
+       regs->ax = args.r12;
+       regs->bx = args.r13;
+       regs->cx = args.r14;
+       regs->dx = args.r15;
+
+       return true;
+}
+
+static bool mmio_read(int size, unsigned long addr, unsigned long *val)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION),
+               .r12 = size,
+               .r13 = EPT_READ,
+               .r14 = addr,
+               .r15 = *val,
+       };
+
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return false;
+       *val = args.r11;
+       return true;
+}
+
+static bool mmio_write(int size, unsigned long addr, unsigned long val)
+{
+       return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size,
+                              EPT_WRITE, addr, val);
+}
+
+static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+{
+       char buffer[MAX_INSN_SIZE];
+       unsigned long *reg, val;
+       struct insn insn = {};
+       enum mmio_type mmio;
+       int size, extend_size;
+       u8 extend_val = 0;
+
+       /* Only in-kernel MMIO is supported */
+       if (WARN_ON_ONCE(user_mode(regs)))
+               return false;
+
+       if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
+               return false;
+
+       if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
+               return false;
+
+       mmio = insn_decode_mmio(&insn, &size);
+       if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
+               return false;
+
+       if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+               reg = insn_get_modrm_reg_ptr(&insn, regs);
+               if (!reg)
+                       return false;
+       }
+
+       ve->instr_len = insn.length;
+
+       /* Handle writes first */
+       switch (mmio) {
+       case MMIO_WRITE:
+               memcpy(&val, reg, size);
+               return mmio_write(size, ve->gpa, val);
+       case MMIO_WRITE_IMM:
+               val = insn.immediate.value;
+               return mmio_write(size, ve->gpa, val);
+       case MMIO_READ:
+       case MMIO_READ_ZERO_EXTEND:
+       case MMIO_READ_SIGN_EXTEND:
+               /* Reads are handled below */
+               break;
+       case MMIO_MOVS:
+       case MMIO_DECODE_FAILED:
+               /*
+                * MMIO was accessed with an instruction that could not be
+                * decoded or handled properly. It was likely not using io.h
+                * helpers or accessed MMIO accidentally.
+                */
+               return false;
+       default:
+               WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
+               return false;
+       }
+
+       /* Handle reads */
+       if (!mmio_read(size, ve->gpa, &val))
+               return false;
+
+       switch (mmio) {
+       case MMIO_READ:
+               /* Zero-extend for 32-bit operation */
+               extend_size = size == 4 ? sizeof(*reg) : 0;
+               break;
+       case MMIO_READ_ZERO_EXTEND:
+               /* Zero extend based on operand size */
+               extend_size = insn.opnd_bytes;
+               break;
+       case MMIO_READ_SIGN_EXTEND:
+               /* Sign extend based on operand size */
+               extend_size = insn.opnd_bytes;
+               if (size == 1 && val & BIT(7))
+                       extend_val = 0xFF;
+               else if (size > 1 && val & BIT(15))
+                       extend_val = 0xFF;
+               break;
+       default:
+               /* All other cases has to be covered with the first switch() */
+               WARN_ON_ONCE(1);
+               return false;
+       }
+
+       if (extend_size)
+               memset(reg, extend_val, extend_size);
+       memcpy(reg, &val, size);
+       return true;
+}
+
+static bool handle_in(struct pt_regs *regs, int size, int port)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION),
+               .r12 = size,
+               .r13 = PORT_READ,
+               .r14 = port,
+       };
+       u64 mask = GENMASK(BITS_PER_BYTE * size, 0);
+       bool success;
+
+       /*
+        * Emulate the I/O read via hypercall. More info about ABI can be found
+        * in TDX Guest-Host-Communication Interface (GHCI) section titled
+        * "TDG.VP.VMCALL<Instruction.IO>".
+        */
+       success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+       /* Update part of the register affected by the emulated instruction */
+       regs->ax &= ~mask;
+       if (success)
+               regs->ax |= args.r11 & mask;
+
+       return success;
+}
+
+static bool handle_out(struct pt_regs *regs, int size, int port)
+{
+       u64 mask = GENMASK(BITS_PER_BYTE * size, 0);
+
+       /*
+        * Emulate the I/O write via hypercall. More info about ABI can be found
+        * in TDX Guest-Host-Communication Interface (GHCI) section titled
+        * "TDG.VP.VMCALL<Instruction.IO>".
+        */
+       return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size,
+                              PORT_WRITE, port, regs->ax & mask);
+}
+
+/*
+ * Emulate I/O using hypercall.
+ *
+ * Assumes the IO instruction was using ax, which is enforced
+ * by the standard io.h macros.
+ *
+ * Return True on success or False on failure.
+ */
+static bool handle_io(struct pt_regs *regs, u32 exit_qual)
+{
+       int size, port;
+       bool in;
+
+       if (VE_IS_IO_STRING(exit_qual))
+               return false;
+
+       in   = VE_IS_IO_IN(exit_qual);
+       size = VE_GET_IO_SIZE(exit_qual);
+       port = VE_GET_PORT_NUM(exit_qual);
+
+
+       if (in)
+               return handle_in(regs, size, port);
+       else
+               return handle_out(regs, size, port);
+}
+
+/*
+ * Early #VE exception handler. Only handles a subset of port I/O.
+ * Intended only for earlyprintk. If failed, return false.
+ */
+__init bool tdx_early_handle_ve(struct pt_regs *regs)
+{
+       struct ve_info ve;
+
+       tdx_get_ve_info(&ve);
+
+       if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
+               return false;
+
+       return handle_io(regs, ve.exit_qual);
+}
+
+void tdx_get_ve_info(struct ve_info *ve)
+{
+       struct tdx_module_output out;
+
+       /*
+        * Called during #VE handling to retrieve the #VE info from the
+        * TDX module.
+        *
+        * This has to be called early in #VE handling.  A "nested" #VE which
+        * occurs before this will raise a #DF and is not recoverable.
+        *
+        * The call retrieves the #VE info from the TDX module, which also
+        * clears the "#VE valid" flag. This must be done before anything else
+        * because any #VE that occurs while the valid flag is set will lead to
+        * #DF.
+        *
+        * Note, the TDX module treats virtual NMIs as inhibited if the #VE
+        * valid flag is set. It means that NMI=>#VE will not result in a #DF.
+        */
+       tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out);
+
+       /* Transfer the output parameters */
+       ve->exit_reason = out.rcx;
+       ve->exit_qual   = out.rdx;
+       ve->gla         = out.r8;
+       ve->gpa         = out.r9;
+       ve->instr_len   = lower_32_bits(out.r10);
+       ve->instr_info  = upper_32_bits(out.r10);
+}
+
+/* Handle the user initiated #VE */
+static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
+{
+       switch (ve->exit_reason) {
+       case EXIT_REASON_CPUID:
+               return handle_cpuid(regs);
+       default:
+               pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+               return false;
+       }
+}
+
+/* Handle the kernel #VE */
+static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+{
+       switch (ve->exit_reason) {
+       case EXIT_REASON_HLT:
+               return handle_halt();
+       case EXIT_REASON_MSR_READ:
+               return read_msr(regs);
+       case EXIT_REASON_MSR_WRITE:
+               return write_msr(regs);
+       case EXIT_REASON_CPUID:
+               return handle_cpuid(regs);
+       case EXIT_REASON_EPT_VIOLATION:
+               return handle_mmio(regs, ve);
+       case EXIT_REASON_IO_INSTRUCTION:
+               return handle_io(regs, ve->exit_qual);
+       default:
+               pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+               return false;
+       }
+}
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
+{
+       bool ret;
+
+       if (user_mode(regs))
+               ret = virt_exception_user(regs, ve);
+       else
+               ret = virt_exception_kernel(regs, ve);
+
+       /* After successful #VE handling, move the IP */
+       if (ret)
+               regs->ip += ve->instr_len;
+
+       return ret;
+}
+
+static bool tdx_tlb_flush_required(bool private)
+{
+       /*
+        * TDX guest is responsible for flushing TLB on private->shared
+        * transition. VMM is responsible for flushing on shared->private.
+        *
+        * The VMM _can't_ flush private addresses as it can't generate PAs
+        * with the guest's HKID.  Shared memory isn't subject to integrity
+        * checking, i.e. the VMM doesn't need to flush for its own protection.
+        *
+        * There's no need to flush when converting from shared to private,
+        * as flushing is the VMM's responsibility in this case, e.g. it must
+        * flush to avoid integrity failures in the face of a buggy or
+        * malicious guest.
+        */
+       return !private;
+}
+
+static bool tdx_cache_flush_required(void)
+{
+       /*
+        * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence.
+        * TDX doesn't have such capability.
+        *
+        * Flush cache unconditionally.
+        */
+       return true;
+}
+
+static bool try_accept_one(phys_addr_t *start, unsigned long len,
+                         enum pg_level pg_level)
+{
+       unsigned long accept_size = page_level_size(pg_level);
+       u64 tdcall_rcx;
+       u8 page_size;
+
+       if (!IS_ALIGNED(*start, accept_size))
+               return false;
+
+       if (len < accept_size)
+               return false;
+
+       /*
+        * Pass the page physical address to the TDX module to accept the
+        * pending, private page.
+        *
+        * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+        */
+       switch (pg_level) {
+       case PG_LEVEL_4K:
+               page_size = 0;
+               break;
+       case PG_LEVEL_2M:
+               page_size = 1;
+               break;
+       case PG_LEVEL_1G:
+               page_size = 2;
+               break;
+       default:
+               return false;
+       }
+
+       tdcall_rcx = *start | page_size;
+       if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+               return false;
+
+       *start += accept_size;
+       return true;
+}
+
+/*
+ * Inform the VMM of the guest's intent for this physical page: shared with
+ * the VMM or private to the guest.  The VMM is expected to change its mapping
+ * of the page in response.
+ */
+static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+{
+       phys_addr_t start = __pa(vaddr);
+       phys_addr_t end   = __pa(vaddr + numpages * PAGE_SIZE);
+
+       if (!enc) {
+               /* Set the shared (decrypted) bits: */
+               start |= cc_mkdec(0);
+               end   |= cc_mkdec(0);
+       }
+
+       /*
+        * Notify the VMM about page mapping conversion. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface (GHCI),
+        * section "TDG.VP.VMCALL<MapGPA>"
+        */
+       if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+               return false;
+
+       /* private->shared conversion  requires only MapGPA call */
+       if (!enc)
+               return true;
+
+       /*
+        * For shared->private conversion, accept the page using
+        * TDX_ACCEPT_PAGE TDX module call.
+        */
+       while (start < end) {
+               unsigned long len = end - start;
+
+               /*
+                * Try larger accepts first. It gives chance to VMM to keep
+                * 1G/2M SEPT entries where possible and speeds up process by
+                * cutting number of hypercalls (if successful).
+                */
+
+               if (try_accept_one(&start, len, PG_LEVEL_1G))
+                       continue;
+
+               if (try_accept_one(&start, len, PG_LEVEL_2M))
+                       continue;
+
+               if (!try_accept_one(&start, len, PG_LEVEL_4K))
+                       return false;
+       }
+
+       return true;
+}
+
+void __init tdx_early_init(void)
+{
+       u64 cc_mask;
+       u32 eax, sig[3];
+
+       cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2],  &sig[1]);
+
+       if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+               return;
+
+       setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
+
+       cc_set_vendor(CC_VENDOR_INTEL);
+       cc_mask = get_cc_mask();
+       cc_set_mask(cc_mask);
+
+       /*
+        * All bits above GPA width are reserved and kernel treats shared bit
+        * as flag, not as part of physical address.
+        *
+        * Adjust physical mask to only cover valid GPA bits.
+        */
+       physical_mask &= cc_mask - 1;
+
+       x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
+       x86_platform.guest.enc_tlb_flush_required   = tdx_tlb_flush_required;
+       x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+
+       pr_info("Guest detected\n");
+}
index a4c061fb7c6ea0c3a15201ef369e61014315907c..29b36e9e4e741e2e36c69219a25934d0bfa2f4b4 100644 (file)
@@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
        .if \save_ret
        pushq   %rsi            /* pt_regs->si */
        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
@@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with
        pushq   %rsi            /* pt_regs->si */
        .endif
        pushq   \rdx            /* pt_regs->dx */
-       pushq   %rcx            /* pt_regs->cx */
+       pushq   \rcx            /* pt_regs->cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
        pushq   %r9             /* pt_regs->r9 */
@@ -99,6 +99,7 @@ For 32-bit we have the following conventions - kernel is built with
         * well before they could be put to use in a speculative execution
         * gadget.
         */
+       xorl    %esi,  %esi     /* nospec si  */
        xorl    %edx,  %edx     /* nospec dx  */
        xorl    %ecx,  %ecx     /* nospec cx  */
        xorl    %r8d,  %r8d     /* nospec r8  */
@@ -114,32 +115,24 @@ For 32-bit we have the following conventions - kernel is built with
 
 .endm
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
-       PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+       PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
        CLEAR_REGS
 .endm
 
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
-       .if \skip_r11rcx
-       popq %rsi
-       .else
        popq %r11
-       .endif
        popq %r10
        popq %r9
        popq %r8
        popq %rax
-       .if \skip_r11rcx
-       popq %rsi
-       .else
        popq %rcx
-       .endif
        popq %rdx
        popq %rsi
        .if \pop_rdi
index 73d958522b6a47e285b9cf05c8562fdd58894141..4300ba49b5eeace08b31c83247e0e5a6a37f8d35 100644 (file)
@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
         * perf profiles. Nothing jumps here.
         */
 syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
-       POP_REGS pop_rdi=0 skip_r11rcx=1
+       POP_REGS pop_rdi=0
 
        /*
         * Now all regs are restored except RSP and RDI.
@@ -215,8 +214,13 @@ syscall_return_via_sysret:
 
        popq    %rdi
        popq    %rsp
+SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
        swapgs
        sysretq
+SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
+       int3
 SYM_CODE_END(entry_SYSCALL_64)
 
 /*
@@ -318,6 +322,14 @@ SYM_CODE_END(ret_from_fork)
 #endif
 .endm
 
+/* Save all registers in pt_regs */
+SYM_CODE_START_LOCAL(push_and_clear_regs)
+       UNWIND_HINT_FUNC
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
+       RET
+SYM_CODE_END(push_and_clear_regs)
+
 /**
  * idtentry_body - Macro to emit code calling the C function
  * @cfunc:             C function to be called
@@ -325,7 +337,21 @@ SYM_CODE_END(ret_from_fork)
  */
 .macro idtentry_body cfunc has_error_code:req
 
-       call    error_entry
+       call push_and_clear_regs
+       UNWIND_HINT_REGS
+
+       /*
+        * Call error_entry() and switch to the task stack if from userspace.
+        *
+        * When in XENPV, it is already in the task stack, and it can't fault
+        * for native_iret() nor native_load_gs_index() since XENPV uses its
+        * own pvops for IRET and load_gs_index().  And it doesn't need to
+        * switch the CR3.  So it can skip invoking error_entry().
+        */
+       ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+               "", X86_FEATURE_XENPV
+
+       ENCODE_FRAME_POINTER
        UNWIND_HINT_REGS
 
        movq    %rsp, %rdi                      /* pt_regs pointer into 1st argument*/
@@ -358,6 +384,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
        ENDBR
        ASM_CLAC
+       cld
 
        .if \has_error_code == 0
                pushq   $-1                     /* ORIG_RAX: no syscall to restart */
@@ -426,6 +453,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS
        ENDBR
        ASM_CLAC
+       cld
 
        pushq   $-1                     /* ORIG_RAX: no syscall to restart */
 
@@ -482,6 +510,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS
        ENDBR
        ASM_CLAC
+       cld
 
        /*
         * If the entry is from userspace, switch stacks and treat it as
@@ -508,6 +537,7 @@ SYM_CODE_START(\asmsym)
        call    vc_switch_off_ist
        movq    %rax, %rsp              /* Switch to new stack */
 
+       ENCODE_FRAME_POINTER
        UNWIND_HINT_REGS
 
        /* Update pt_regs */
@@ -544,6 +574,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS offset=8
        ENDBR
        ASM_CLAC
+       cld
 
        /* paranoid_entry returns GS information for paranoid_exit in EBX. */
        call    paranoid_entry
@@ -869,7 +900,6 @@ SYM_CODE_END(xen_failsafe_callback)
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
        UNWIND_HINT_FUNC
-       cld
        PUSH_AND_CLEAR_REGS save_ret=1
        ENCODE_FRAME_POINTER 8
 
@@ -983,13 +1013,10 @@ SYM_CODE_START_LOCAL(paranoid_exit)
 SYM_CODE_END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch GS if needed.
+ * Switch GS and CR3 if needed.
  */
 SYM_CODE_START_LOCAL(error_entry)
        UNWIND_HINT_FUNC
-       cld
-       PUSH_AND_CLEAR_REGS save_ret=1
-       ENCODE_FRAME_POINTER 8
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -997,19 +1024,15 @@ SYM_CODE_START_LOCAL(error_entry)
         * We entered from user mode or we're pretending to have entered
         * from user mode due to an IRET fault.
         */
-       SWAPGS
+       swapgs
        FENCE_SWAPGS_USER_ENTRY
        /* We have user CR3.  Change to kernel CR3. */
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
+       leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
 .Lerror_entry_from_usermode_after_swapgs:
        /* Put us onto the real thread stack. */
-       popq    %r12                            /* save return addr in %12 */
-       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
        call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-       ENCODE_FRAME_POINTER
-       pushq   %r12
        RET
 
        /*
@@ -1033,7 +1056,7 @@ SYM_CODE_START_LOCAL(error_entry)
         * gsbase and proceed.  We'll fix up the exception and land in
         * .Lgs_change's error handler with kernel gsbase.
         */
-       SWAPGS
+       swapgs
 
        /*
         * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
@@ -1041,6 +1064,7 @@ SYM_CODE_START_LOCAL(error_entry)
         */
 .Lerror_entry_done_lfence:
        FENCE_SWAPGS_KERNEL_ENTRY
+       leaq    8(%rsp), %rax                   /* return pt_regs pointer */
        RET
 
 .Lbstep_iret:
@@ -1053,7 +1077,7 @@ SYM_CODE_START_LOCAL(error_entry)
         * We came from an IRET to user mode, so we have user
         * gsbase and CR3.  Switch to kernel gsbase and CR3:
         */
-       SWAPGS
+       swapgs
        FENCE_SWAPGS_USER_ENTRY
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
@@ -1061,9 +1085,9 @@ SYM_CODE_START_LOCAL(error_entry)
         * Pretend that the exception came from user mode: set up pt_regs
         * as if we faulted immediately after IRET.
         */
-       mov     %rsp, %rdi
+       leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
        call    fixup_bad_iret
-       mov     %rax, %rsp
+       mov     %rax, %rdi
        jmp     .Lerror_entry_from_usermode_after_swapgs
 SYM_CODE_END(error_entry)
 
@@ -1126,6 +1150,7 @@ SYM_CODE_START(asm_exc_nmi)
         */
 
        ASM_CLAC
+       cld
 
        /* Use %rdx as our temp variable throughout */
        pushq   %rdx
@@ -1145,7 +1170,6 @@ SYM_CODE_START(asm_exc_nmi)
         */
 
        swapgs
-       cld
        FENCE_SWAPGS_USER_ENTRY
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
        movq    %rsp, %rdx
index 4fdb007cddbd12d6c802915e3dad27f73aa1c392..d1052742ad0cd51c3628978ec53d1a50b12445be 100644 (file)
@@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
        UNWIND_HINT_EMPTY
        ENDBR
        /* Interrupts are off on entry. */
-       SWAPGS
+       swapgs
 
        pushq   %rax
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -83,32 +83,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
        movl    %eax, %eax
 
        pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
-       pushq   %rsi                    /* pt_regs->si */
-       pushq   %rdx                    /* pt_regs->dx */
-       pushq   %rcx                    /* pt_regs->cx */
-       pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   $0                      /* pt_regs->r8  = 0 */
-       xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   $0                      /* pt_regs->r9  = 0 */
-       xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   $0                      /* pt_regs->r10 = 0 */
-       xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   $0                      /* pt_regs->r11 = 0 */
-       xorl    %r11d, %r11d            /* nospec   r11 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       xorl    %ebx, %ebx              /* nospec   rbx */
-       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-       xorl    %ebp, %ebp              /* nospec   rbp */
-       pushq   $0                      /* pt_regs->r12 = 0 */
-       xorl    %r12d, %r12d            /* nospec   r12 */
-       pushq   $0                      /* pt_regs->r13 = 0 */
-       xorl    %r13d, %r13d            /* nospec   r13 */
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       xorl    %r14d, %r14d            /* nospec   r14 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
-       xorl    %r15d, %r15d            /* nospec   r15 */
-
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
        UNWIND_HINT_REGS
 
        cld
@@ -225,35 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
 SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
        movl    %eax, %eax              /* discard orig_ax high bits */
        pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
-       pushq   %rsi                    /* pt_regs->si */
-       xorl    %esi, %esi              /* nospec   si */
-       pushq   %rdx                    /* pt_regs->dx */
-       xorl    %edx, %edx              /* nospec   dx */
-       pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
-       xorl    %ecx, %ecx              /* nospec   cx */
-       pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   $0                      /* pt_regs->r8  = 0 */
-       xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   $0                      /* pt_regs->r9  = 0 */
-       xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   $0                      /* pt_regs->r10 = 0 */
-       xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   $0                      /* pt_regs->r11 = 0 */
-       xorl    %r11d, %r11d            /* nospec   r11 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       xorl    %ebx, %ebx              /* nospec   rbx */
-       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-       xorl    %ebp, %ebp              /* nospec   rbp */
-       pushq   $0                      /* pt_regs->r12 = 0 */
-       xorl    %r12d, %r12d            /* nospec   r12 */
-       pushq   $0                      /* pt_regs->r13 = 0 */
-       xorl    %r13d, %r13d            /* nospec   r13 */
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       xorl    %r14d, %r14d            /* nospec   r14 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
-       xorl    %r15d, %r15d            /* nospec   r15 */
-
+       PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
        UNWIND_HINT_REGS
 
        movq    %rsp, %rdi
@@ -297,6 +244,8 @@ sysret32_from_system_call:
         * code.  We zero R8-R10 to avoid info leaks.
          */
        movq    RSP-ORIG_RAX(%rsp), %rsp
+SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
 
        /*
         * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
@@ -314,6 +263,9 @@ sysret32_from_system_call:
        xorl    %r10d, %r10d
        swapgs
        sysretl
+SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
+       int3
 SYM_CODE_END(entry_SYSCALL_compat)
 
 /*
@@ -362,54 +314,25 @@ SYM_CODE_START(entry_INT80_compat)
 
        /* switch to thread stack expects orig_ax and rdi to be pushed */
        pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
 
        /* Need to switch before accessing the thread stack. */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
        /* In the Xen PV case we already run on the thread stack. */
        ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
 
-       movq    %rsp, %rdi
+       movq    %rsp, %rax
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-       pushq   6*8(%rdi)               /* regs->ss */
-       pushq   5*8(%rdi)               /* regs->rsp */
-       pushq   4*8(%rdi)               /* regs->eflags */
-       pushq   3*8(%rdi)               /* regs->cs */
-       pushq   2*8(%rdi)               /* regs->ip */
-       pushq   1*8(%rdi)               /* regs->orig_ax */
-       pushq   (%rdi)                  /* pt_regs->di */
+       pushq   5*8(%rax)               /* regs->ss */
+       pushq   4*8(%rax)               /* regs->rsp */
+       pushq   3*8(%rax)               /* regs->eflags */
+       pushq   2*8(%rax)               /* regs->cs */
+       pushq   1*8(%rax)               /* regs->ip */
+       pushq   0*8(%rax)               /* regs->orig_ax */
 .Lint80_keep_stack:
 
-       pushq   %rsi                    /* pt_regs->si */
-       xorl    %esi, %esi              /* nospec   si */
-       pushq   %rdx                    /* pt_regs->dx */
-       xorl    %edx, %edx              /* nospec   dx */
-       pushq   %rcx                    /* pt_regs->cx */
-       xorl    %ecx, %ecx              /* nospec   cx */
-       pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   %r8                     /* pt_regs->r8 */
-       xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   %r9                     /* pt_regs->r9 */
-       xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   %r10                    /* pt_regs->r10*/
-       xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   %r11                    /* pt_regs->r11 */
-       xorl    %r11d, %r11d            /* nospec   r11 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       xorl    %ebx, %ebx              /* nospec   rbx */
-       pushq   %rbp                    /* pt_regs->rbp */
-       xorl    %ebp, %ebp              /* nospec   rbp */
-       pushq   %r12                    /* pt_regs->r12 */
-       xorl    %r12d, %r12d            /* nospec   r12 */
-       pushq   %r13                    /* pt_regs->r13 */
-       xorl    %r13d, %r13d            /* nospec   r13 */
-       pushq   %r14                    /* pt_regs->r14 */
-       xorl    %r14d, %r14d            /* nospec   r14 */
-       pushq   %r15                    /* pt_regs->r15 */
-       xorl    %r15d, %r15d            /* nospec   r15 */
-
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
        UNWIND_HINT_REGS
 
        cld
index 693f8b9031fb85eb2fa147e3b5370fe2744075b6..c2a8b76ae0bce2d1b77d420fa588b65d6bbdf0a4 100644 (file)
@@ -91,7 +91,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
 endif
 endif
 
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -148,6 +148,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
index 235a5794296acbef611d65d2de8cbec7de9f4ed3..1000d457c3321e2caf3c9428e7961b0e1c572458 100644 (file)
@@ -438,7 +438,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
 static __init int vdso_setup(char *s)
 {
        vdso64_enabled = simple_strtoul(s, NULL, 0);
-       return 0;
+       return 1;
 }
 __setup("vdso=", vdso_setup);
 
index fd2ee9408e914a20a4b50a6cdb1249e297e913c2..4af81df133ee8dc781c7c787c693d7701ae8793d 100644 (file)
@@ -48,7 +48,7 @@ static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
 #elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
        XONLY;
 #else
-       EMULATE;
+       #error VSYSCALL config is broken
 #endif
 
 static int __init vsyscall_setup(char *str)
index d6cdfe631674896010f79075303fc741b9e3963b..09c56965750ae0127423b2a505d203d0fec7675d 100644 (file)
@@ -44,4 +44,12 @@ config PERF_EVENTS_AMD_UNCORE
 
          To compile this driver as a module, choose M here: the
          module will be called 'amd-uncore'.
+
+config PERF_EVENTS_AMD_BRS
+       depends on PERF_EVENTS && CPU_SUP_AMD
+       bool "AMD Zen3 Branch Sampling support"
+       help
+         Enable AMD Zen3 branch sampling support (BRS) which samples up to
+         16 consecutive taken branches in registers.
+
 endmenu
index 6cbe38d5fd9d1b169eb4e9917696606b0d2f57bd..b9f5d4610256d16f2816126efb6a5f83da849628 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CPU_SUP_AMD)              += core.o
+obj-$(CONFIG_PERF_EVENTS_AMD_BRS)      += brs.o
 obj-$(CONFIG_PERF_EVENTS_AMD_POWER)    += power.o
 obj-$(CONFIG_X86_LOCAL_APIC)           += ibs.o
 obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE)   += amd-uncore.o
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
new file mode 100644 (file)
index 0000000..bee8765
--- /dev/null
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement support for AMD Fam19h Branch Sampling feature
+ * Based on specifications published in AMD PPR Fam19 Model 01
+ *
+ * Copyright 2021 Google LLC
+ * Contributed by Stephane Eranian <eranian@google.com>
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/msr.h>
+#include <asm/cpufeature.h>
+
+#include "../perf_event.h"
+
+#define BRS_POISON     0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
+
+/* Debug Extension Configuration register layout */
+union amd_debug_extn_cfg {
+       __u64 val;
+       struct {
+               __u64   rsvd0:2,  /* reserved */
+                       brsmen:1, /* branch sample enable */
+                       rsvd4_3:2,/* reserved - must be 0x3 */
+                       vb:1,     /* valid branches recorded */
+                       rsvd2:10, /* reserved */
+                       msroff:4, /* index of next entry to write */
+                       rsvd3:4,  /* reserved */
+                       pmc:3,    /* #PMC holding the sampling event */
+                       rsvd4:37; /* reserved */
+       };
+};
+
+static inline unsigned int brs_from(int idx)
+{
+       return MSR_AMD_SAMP_BR_FROM + 2 * idx;
+}
+
+static inline unsigned int brs_to(int idx)
+{
+       return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
+}
+
+static inline void set_debug_extn_cfg(u64 val)
+{
+       /* bits[4:3] must always be set to 11b */
+       wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
+}
+
+static inline u64 get_debug_extn_cfg(void)
+{
+       u64 val;
+
+       rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
+       return val;
+}
+
+static bool __init amd_brs_detect(void)
+{
+       if (!cpu_feature_enabled(X86_FEATURE_BRS))
+               return false;
+
+       switch (boot_cpu_data.x86) {
+       case 0x19: /* AMD Fam19h (Zen3) */
+               x86_pmu.lbr_nr = 16;
+
+               /* No hardware filtering supported */
+               x86_pmu.lbr_sel_map = NULL;
+               x86_pmu.lbr_sel_mask = 0;
+               break;
+       default:
+               return false;
+       }
+
+       return true;
+}
+
+/*
+ * Current BRS implementation does not support branch type or privilege level
+ * filtering. Therefore, this function simply enforces these limitations. No need for
+ * a br_sel_map. Software filtering is not supported because it would not correlate well
+ * with a sampling period.
+ */
+int amd_brs_setup_filter(struct perf_event *event)
+{
+       u64 type = event->attr.branch_sample_type;
+
+       /* No BRS support */
+       if (!x86_pmu.lbr_nr)
+               return -EOPNOTSUPP;
+
+       /* Can only capture all branches, i.e., no filtering */
+       if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
+               return -EINVAL;
+
+       return 0;
+}
+
+/* tos = top of stack, i.e., last valid entry written */
+static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
+{
+       /*
+        * msroff: index of next entry to write so top-of-stack is one off
+        * if BRS is full then msroff is set back to 0.
+        */
+       return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
+}
+
+/*
+ * make sure we have a sane BRS offset to begin with
+ * especially with kexec
+ */
+void amd_brs_reset(void)
+{
+       if (!cpu_feature_enabled(X86_FEATURE_BRS))
+               return;
+
+       /*
+        * Reset config
+        */
+       set_debug_extn_cfg(0);
+
+       /*
+        * Mark first entry as poisoned
+        */
+       wrmsrl(brs_to(0), BRS_POISON);
+}
+
+int __init amd_brs_init(void)
+{
+       if (!amd_brs_detect())
+               return -EOPNOTSUPP;
+
+       pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
+
+       return 0;
+}
+
+void amd_brs_enable(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       union amd_debug_extn_cfg cfg;
+
+       /* Activate only on first user */
+       if (++cpuc->brs_active > 1)
+               return;
+
+       cfg.val    = 0; /* reset all fields */
+       cfg.brsmen = 1; /* enable branch sampling */
+
+       /* Set enable bit */
+       set_debug_extn_cfg(cfg.val);
+}
+
+void amd_brs_enable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       if (cpuc->lbr_users)
+               amd_brs_enable();
+}
+
+void amd_brs_disable(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       union amd_debug_extn_cfg cfg;
+
+       /* Check if active (could be disabled via x86_pmu_disable_all()) */
+       if (!cpuc->brs_active)
+               return;
+
+       /* Only disable for last user */
+       if (--cpuc->brs_active)
+               return;
+
+       /*
+        * Clear the brsmen bit but preserve the others as they contain
+        * useful state such as vb and msroff
+        */
+       cfg.val = get_debug_extn_cfg();
+
+       /*
+        * When coming in on interrupt and BRS is full, then hw will have
+        * already stopped BRS, no need to issue wrmsr again
+        */
+       if (cfg.brsmen) {
+               cfg.brsmen = 0;
+               set_debug_extn_cfg(cfg.val);
+       }
+}
+
+void amd_brs_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       if (cpuc->lbr_users)
+               amd_brs_disable();
+}
+
+static bool amd_brs_match_plm(struct perf_event *event, u64 to)
+{
+       int type = event->attr.branch_sample_type;
+       int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
+       int plm_u = PERF_SAMPLE_BRANCH_USER;
+
+       if (!(type & plm_k) && kernel_ip(to))
+               return 0;
+
+       if (!(type & plm_u) && !kernel_ip(to))
+               return 0;
+
+       return 1;
+}
+
+/*
+ * Caller must ensure amd_brs_inuse() is true before calling
+ * return:
+ */
+void amd_brs_drain(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_event *event = cpuc->events[0];
+       struct perf_branch_entry *br = cpuc->lbr_entries;
+       union amd_debug_extn_cfg cfg;
+       u32 i, nr = 0, num, tos, start;
+       u32 shift = 64 - boot_cpu_data.x86_virt_bits;
+
+       /*
+        * BRS event forced on PMC0,
+        * so check if there is an event.
+        * It is possible to have lbr_users > 0 but the event
+        * not yet scheduled due to long latency PMU irq
+        */
+       if (!event)
+               goto empty;
+
+       cfg.val = get_debug_extn_cfg();
+
+       /* Sanity check [0-x86_pmu.lbr_nr] */
+       if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
+               goto empty;
+
+       /* No valid branch */
+       if (cfg.vb == 0)
+               goto empty;
+
+       /*
+        * msr.off points to next entry to be written
+        * tos = most recent entry index = msr.off - 1
+        * BRS register buffer saturates, so we know we have
+        * start < tos and that we have to read from start to tos
+        */
+       start = 0;
+       tos = amd_brs_get_tos(&cfg);
+
+       num = tos - start + 1;
+
+       /*
+        * BRS is only one pass (saturation) from MSROFF to depth-1
+        * MSROFF wraps to zero when buffer is full
+        */
+       for (i = 0; i < num; i++) {
+               u32 brs_idx = tos - i;
+               u64 from, to;
+
+               rdmsrl(brs_to(brs_idx), to);
+
+               /* Entry does not belong to us (as marked by kernel) */
+               if (to == BRS_POISON)
+                       break;
+
+               /*
+                * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
+                * Necessary to generate proper virtual addresses suitable for
+                * symbolization
+                */
+               to = (u64)(((s64)to << shift) >> shift);
+
+               if (!amd_brs_match_plm(event, to))
+                       continue;
+
+               rdmsrl(brs_from(brs_idx), from);
+
+               perf_clear_branch_entry_bitfields(br+nr);
+
+               br[nr].from = from;
+               br[nr].to   = to;
+
+               nr++;
+       }
+empty:
+       /* Record number of sampled branches */
+       cpuc->lbr_stack.nr = nr;
+}
+
+/*
+ * Poison most recent entry to prevent reuse by next task
+ * required because BRS entry are not tagged by PID
+ */
+static void amd_brs_poison_buffer(void)
+{
+       union amd_debug_extn_cfg cfg;
+       unsigned int idx;
+
+       /* Get current state */
+       cfg.val = get_debug_extn_cfg();
+
+       /* idx is most recently written entry */
+       idx = amd_brs_get_tos(&cfg);
+
+       /* Poison target of entry */
+       wrmsrl(brs_to(idx), BRS_POISON);
+}
+
+/*
+ * On context switch in, we need to make sure no samples from previous user
+ * are left in the BRS.
+ *
+ * On ctxswin, sched_in = true, called after the PMU has started
+ * On ctxswout, sched_in = false, called before the PMU is stopped
+ */
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /* no active users */
+       if (!cpuc->lbr_users)
+               return;
+
+       /*
+        * On context switch in, we need to ensure we do not use entries
+        * from previous BRS user on that CPU, so we poison the buffer as
+        * a faster way compared to resetting all entries.
+        */
+       if (sched_in)
+               amd_brs_poison_buffer();
+}
+
+/*
+ * called from ACPI processor_idle.c or acpi_pad.c
+ * with interrupts disabled
+ */
+void perf_amd_brs_lopwr_cb(bool lopwr_in)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       union amd_debug_extn_cfg cfg;
+
+       /*
+        * on mwait in, we may end up in non C0 state.
+        * we must disable branch sampling to avoid holding the NMI
+        * for too long. We disable it in hardware but we
+        * keep the state in cpuc, so we can re-enable.
+        *
+        * The hardware will deliver the NMI if needed when brsmen cleared
+        */
+       if (cpuc->brs_active) {
+               cfg.val = get_debug_extn_cfg();
+               cfg.brsmen = !lopwr_in;
+               set_debug_extn_cfg(cfg.val);
+       }
+}
+
+DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
+
+void __init amd_brs_lopwr_init(void)
+{
+       static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+}
index 9687a8aef01c5d48533d020aba872fe58f6b41f8..9ac3718410ce4bf4173ee566f6bce208938ea08e 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/perf_event.h>
+#include <linux/jump_label.h>
 #include <linux/export.h>
 #include <linux/types.h>
 #include <linux/init.h>
@@ -7,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/jiffies.h>
 #include <asm/apicdef.h>
+#include <asm/apic.h>
 #include <asm/nmi.h>
 
 #include "../perf_event.h"
@@ -18,6 +20,9 @@ static unsigned long perf_nmi_window;
 #define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
 #define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
 
+/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
+static u64 amd_pmu_global_cntr_mask __read_mostly;
+
 static __initconst const u64 amd_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -325,8 +330,16 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
        }
 }
 
+#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
+static inline int amd_is_brs_event(struct perf_event *e)
+{
+       return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
+}
+
 static int amd_core_hw_config(struct perf_event *event)
 {
+       int ret = 0;
+
        if (event->attr.exclude_host && event->attr.exclude_guest)
                /*
                 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -343,7 +356,66 @@ static int amd_core_hw_config(struct perf_event *event)
        if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
                event->hw.flags |= PERF_X86_EVENT_PAIR;
 
-       return 0;
+       /*
+        * if branch stack is requested
+        */
+       if (has_branch_stack(event)) {
+               /*
+                * Due to interrupt holding, BRS is not recommended in
+                * counting mode.
+                */
+               if (!is_sampling_event(event))
+                       return -EINVAL;
+
+               /*
+                * Due to the way BRS operates by holding the interrupt until
+                * lbr_nr entries have been captured, it does not make sense
+                * to allow sampling on BRS with an event that does not match
+                * what BRS is capturing, i.e., retired taken branches.
+                * Otherwise the correlation with the event's period is even
+                * more loose:
+                *
+                * With retired taken branch:
+                *   Effective P = P + 16 + X
+                * With any other event:
+                *   Effective P = P + Y + X
+                *
+                * Where X is the number of taken branches due to interrupt
+                * skid. Skid is large.
+                *
+                * Where Y is the occurences of the event while BRS is
+                * capturing the lbr_nr entries.
+                *
+                * By using retired taken branches, we limit the impact on the
+                * Y variable. We know it cannot be more than the depth of
+                * BRS.
+                */
+               if (!amd_is_brs_event(event))
+                       return -EINVAL;
+
+               /*
+                * BRS implementation does not work with frequency mode
+                * reprogramming of the period.
+                */
+               if (event->attr.freq)
+                       return -EINVAL;
+               /*
+                * The kernel subtracts BRS depth from period, so it must
+                * be big enough.
+                */
+               if (event->attr.sample_period <= x86_pmu.lbr_nr)
+                       return -EINVAL;
+
+               /*
+                * Check if we can allow PERF_SAMPLE_BRANCH_STACK
+                */
+               ret = amd_brs_setup_filter(event);
+
+               /* only set in case of success */
+               if (!ret)
+                       event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
+       }
+       return ret;
 }
 
 static inline int amd_is_nb_event(struct hw_perf_event *hwc)
@@ -366,7 +438,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
        if (event->attr.precise_ip && get_ibs_caps())
                return -ENOENT;
 
-       if (has_branch_stack(event))
+       if (has_branch_stack(event) && !x86_pmu.lbr_nr)
                return -EOPNOTSUPP;
 
        ret = x86_pmu_hw_config(event);
@@ -510,6 +582,18 @@ static struct amd_nb *amd_alloc_nb(int cpu)
        return nb;
 }
 
+static void amd_pmu_cpu_reset(int cpu)
+{
+       if (x86_pmu.version < 2)
+               return;
+
+       /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
+       wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+
+       /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
+       wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
+}
+
 static int amd_pmu_cpu_prepare(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -555,6 +639,9 @@ static void amd_pmu_cpu_starting(int cpu)
 
        cpuc->amd_nb->nb_id = nb_id;
        cpuc->amd_nb->refcnt++;
+
+       amd_brs_reset();
+       amd_pmu_cpu_reset(cpu);
 }
 
 static void amd_pmu_cpu_dead(int cpu)
@@ -574,8 +661,54 @@ static void amd_pmu_cpu_dead(int cpu)
 
                cpuhw->amd_nb = NULL;
        }
+
+       amd_pmu_cpu_reset(cpu);
+}
+
+static inline void amd_pmu_set_global_ctl(u64 ctl)
+{
+       wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
 }
 
+static inline u64 amd_pmu_get_global_status(void)
+{
+       u64 status;
+
+       /* PerfCntrGlobalStatus is read-only */
+       rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+
+       return status & amd_pmu_global_cntr_mask;
+}
+
+static inline void amd_pmu_ack_global_status(u64 status)
+{
+       /*
+        * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
+        * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
+        * clears the same bit in PerfCntrGlobalStatus
+        */
+
+       /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
+       status &= amd_pmu_global_cntr_mask;
+       wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+}
+
+static bool amd_pmu_test_overflow_topbit(int idx)
+{
+       u64 counter;
+
+       rdmsrl(x86_pmu_event_addr(idx), counter);
+
+       return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
+}
+
+static bool amd_pmu_test_overflow_status(int idx)
+{
+       return amd_pmu_get_global_status() & BIT_ULL(idx);
+}
+
+DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
+
 /*
  * When a PMC counter overflows, an NMI is used to process the event and
  * reset the counter. NMI latency can result in the counter being updated
@@ -588,7 +721,6 @@ static void amd_pmu_cpu_dead(int cpu)
 static void amd_pmu_wait_on_overflow(int idx)
 {
        unsigned int i;
-       u64 counter;
 
        /*
         * Wait for the counter to be reset if it has overflowed. This loop
@@ -596,8 +728,7 @@ static void amd_pmu_wait_on_overflow(int idx)
         * forever...
         */
        for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
-               rdmsrl(x86_pmu_event_addr(idx), counter);
-               if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+               if (!static_call(amd_pmu_test_overflow)(idx))
                        break;
 
                /* Might be in IRQ context, so can't sleep */
@@ -605,13 +736,11 @@ static void amd_pmu_wait_on_overflow(int idx)
        }
 }
 
-static void amd_pmu_disable_all(void)
+static void amd_pmu_check_overflow(void)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        int idx;
 
-       x86_pmu_disable_all();
-
        /*
         * This shouldn't be called from NMI context, but add a safeguard here
         * to return, since if we're in NMI context we can't wait for an NMI
@@ -634,6 +763,47 @@ static void amd_pmu_disable_all(void)
        }
 }
 
+static void amd_pmu_enable_event(struct perf_event *event)
+{
+       x86_pmu_enable_event(event);
+}
+
+static void amd_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       amd_brs_enable_all();
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               /* only activate events which are marked as active */
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+
+               amd_pmu_enable_event(cpuc->events[idx]);
+       }
+}
+
+static void amd_pmu_v2_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /*
+        * Testing cpu_hw_events.enabled should be skipped in this case unlike
+        * in x86_pmu_enable_event().
+        *
+        * Since cpu_hw_events.enabled is set only after returning from
+        * x86_pmu_start(), the PMCs must be programmed and kept ready.
+        * Counting starts only after x86_pmu_enable_all() is called.
+        */
+       __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+static void amd_pmu_v2_enable_all(int added)
+{
+       amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
+}
+
 static void amd_pmu_disable_event(struct perf_event *event)
 {
        x86_pmu_disable_event(event);
@@ -651,6 +821,32 @@ static void amd_pmu_disable_event(struct perf_event *event)
        amd_pmu_wait_on_overflow(event->hw.idx);
 }
 
+static void amd_pmu_disable_all(void)
+{
+       amd_brs_disable_all();
+       x86_pmu_disable_all();
+       amd_pmu_check_overflow();
+}
+
+static void amd_pmu_v2_disable_all(void)
+{
+       /* Disable all PMCs */
+       amd_pmu_set_global_ctl(0);
+       amd_pmu_check_overflow();
+}
+
+static void amd_pmu_add_event(struct perf_event *event)
+{
+       if (needs_branch_stack(event))
+               amd_pmu_brs_add(event);
+}
+
+static void amd_pmu_del_event(struct perf_event *event)
+{
+       if (needs_branch_stack(event))
+               amd_pmu_brs_del(event);
+}
+
 /*
  * Because of NMI latency, if multiple PMC counters are active or other sources
  * of NMIs are received, the perf NMI handler can handle one or more overflowed
@@ -669,13 +865,8 @@ static void amd_pmu_disable_event(struct perf_event *event)
  * handled a counter. When an un-handled NMI is received, it will be claimed
  * only if arriving within that window.
  */
-static int amd_pmu_handle_irq(struct pt_regs *regs)
+static inline int amd_pmu_adjust_nmi_window(int handled)
 {
-       int handled;
-
-       /* Process any counter overflows */
-       handled = x86_pmu_handle_irq(regs);
-
        /*
         * If a counter was handled, record a timestamp such that un-handled
         * NMIs will be claimed if arriving within that window.
@@ -692,6 +883,113 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
        return NMI_HANDLED;
 }
 
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int handled;
+       int pmu_enabled;
+
+       /*
+        * Save the PMU state.
+        * It needs to be restored when leaving the handler.
+        */
+       pmu_enabled = cpuc->enabled;
+       cpuc->enabled = 0;
+
+       /* stop everything (includes BRS) */
+       amd_pmu_disable_all();
+
+       /* Drain BRS is in use (could be inactive) */
+       if (cpuc->lbr_users)
+               amd_brs_drain();
+
+       /* Process any counter overflows */
+       handled = x86_pmu_handle_irq(regs);
+
+       cpuc->enabled = pmu_enabled;
+       if (pmu_enabled)
+               amd_pmu_enable_all(0);
+
+       return amd_pmu_adjust_nmi_window(handled);
+}
+
+static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_sample_data data;
+       struct hw_perf_event *hwc;
+       struct perf_event *event;
+       int handled = 0, idx;
+       u64 status, mask;
+       bool pmu_enabled;
+
+       /*
+        * Save the PMU state as it needs to be restored when leaving the
+        * handler
+        */
+       pmu_enabled = cpuc->enabled;
+       cpuc->enabled = 0;
+
+       /* Stop counting */
+       amd_pmu_v2_disable_all();
+
+       status = amd_pmu_get_global_status();
+
+       /* Check if any overflows are pending */
+       if (!status)
+               goto done;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+
+               event = cpuc->events[idx];
+               hwc = &event->hw;
+               x86_perf_event_update(event);
+               mask = BIT_ULL(idx);
+
+               if (!(status & mask))
+                       continue;
+
+               /* Event overflow */
+               handled++;
+               perf_sample_data_init(&data, 0, hwc->last_period);
+
+               if (!x86_perf_event_set_period(event))
+                       continue;
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+
+               status &= ~mask;
+       }
+
+       /*
+        * It should never be the case that some overflows are not handled as
+        * the corresponding PMCs are expected to be inactive according to the
+        * active_mask
+        */
+       WARN_ON(status > 0);
+
+       /* Clear overflow bits */
+       amd_pmu_ack_global_status(~status);
+
+       /*
+        * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
+        * PMI entry is not set by the local APIC when a PMC overflow occurs
+        */
+       inc_irq_stat(apic_perf_irqs);
+
+done:
+       cpuc->enabled = pmu_enabled;
+
+       /* Resume counting only if PMU is active */
+       if (pmu_enabled)
+               amd_pmu_v2_enable_all(0);
+
+       return amd_pmu_adjust_nmi_window(handled);
+}
+
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                          struct perf_event *event)
@@ -897,6 +1195,51 @@ static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
                --cpuc->n_pair;
 }
 
+/*
+ * Because of the way BRS operates with an inactive and active phases, and
+ * the link to one counter, it is not possible to have two events using BRS
+ * scheduled at the same time. There would be an issue with enforcing the
+ * period of each one and given that the BRS saturates, it would not be possible
+ * to guarantee correlated content for all events. Therefore, in situations
+ * where multiple events want to use BRS, the kernel enforces mutual exclusion.
+ * Exclusion is enforced by chosing only one counter for events using BRS.
+ * The event scheduling logic will then automatically multiplex the
+ * events and ensure that at most one event is actively using BRS.
+ *
+ * The BRS counter could be any counter, but there is no constraint on Fam19h,
+ * therefore all counters are equal and thus we pick the first one: PMC0
+ */
+static struct event_constraint amd_fam19h_brs_cntr0_constraint =
+       EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
+
+static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
+       __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
+
+static struct event_constraint *
+amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       bool has_brs = has_amd_brs(hwc);
+
+       /*
+        * In case BRS is used with an event requiring a counter pair,
+        * the kernel allows it but only on counter 0 & 1 to enforce
+        * multiplexing requiring to protect BRS in case of multiple
+        * BRS users
+        */
+       if (amd_is_pair_event_code(hwc)) {
+               return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
+                              : &pair_constraint;
+       }
+
+       if (has_brs)
+               return &amd_fam19h_brs_cntr0_constraint;
+
+       return &unconstrained;
+}
+
+
 static ssize_t amd_event_sysfs_show(char *page, u64 config)
 {
        u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -905,12 +1248,31 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
        return x86_event_sysfs_show(page, config, event);
 }
 
+static void amd_pmu_sched_task(struct perf_event_context *ctx,
+                                bool sched_in)
+{
+       if (sched_in && x86_pmu.lbr_nr)
+               amd_pmu_brs_sched_task(ctx, sched_in);
+}
+
+static u64 amd_pmu_limit_period(struct perf_event *event, u64 left)
+{
+       /*
+        * Decrease period by the depth of the BRS feature to get the last N
+        * taken branches and approximate the desired period
+        */
+       if (has_branch_stack(event) && left > x86_pmu.lbr_nr)
+               left -= x86_pmu.lbr_nr;
+
+       return left;
+}
+
 static __initconst const struct x86_pmu amd_pmu = {
        .name                   = "AMD",
        .handle_irq             = amd_pmu_handle_irq,
        .disable_all            = amd_pmu_disable_all,
-       .enable_all             = x86_pmu_enable_all,
-       .enable                 = x86_pmu_enable_event,
+       .enable_all             = amd_pmu_enable_all,
+       .enable                 = amd_pmu_enable_event,
        .disable                = amd_pmu_disable_event,
        .hw_config              = amd_pmu_hw_config,
        .schedule_events        = x86_schedule_events,
@@ -920,6 +1282,8 @@ static __initconst const struct x86_pmu amd_pmu = {
        .event_map              = amd_pmu_event_map,
        .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
        .num_counters           = AMD64_NUM_COUNTERS,
+       .add                    = amd_pmu_add_event,
+       .del                    = amd_pmu_del_event,
        .cntval_bits            = 48,
        .cntval_mask            = (1ULL << 48) - 1,
        .apic                   = 1,
@@ -938,8 +1302,55 @@ static __initconst const struct x86_pmu amd_pmu = {
        .amd_nb_constraints     = 1,
 };
 
+static ssize_t branches_show(struct device *cdev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static struct attribute *amd_pmu_brs_attrs[] = {
+       &dev_attr_branches.attr,
+       NULL,
+};
+
+static umode_t
+amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+       return x86_pmu.lbr_nr ? attr->mode : 0;
+}
+
+static struct attribute_group group_caps_amd_brs = {
+       .name  = "caps",
+       .attrs = amd_pmu_brs_attrs,
+       .is_visible = amd_brs_is_visible,
+};
+
+EVENT_ATTR_STR(branch-brs, amd_branch_brs,
+              "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
+
+static struct attribute *amd_brs_events_attrs[] = {
+       EVENT_PTR(amd_branch_brs),
+       NULL,
+};
+
+static struct attribute_group group_events_amd_brs = {
+       .name       = "events",
+       .attrs      = amd_brs_events_attrs,
+       .is_visible = amd_brs_is_visible,
+};
+
+static const struct attribute_group *amd_attr_update[] = {
+       &group_caps_amd_brs,
+       &group_events_amd_brs,
+       NULL,
+};
+
 static int __init amd_core_pmu_init(void)
 {
+       union cpuid_0x80000022_ebx ebx;
        u64 even_ctr_mask = 0ULL;
        int i;
 
@@ -957,6 +1368,27 @@ static int __init amd_core_pmu_init(void)
        x86_pmu.eventsel        = MSR_F15H_PERF_CTL;
        x86_pmu.perfctr         = MSR_F15H_PERF_CTR;
        x86_pmu.num_counters    = AMD64_NUM_COUNTERS_CORE;
+
+       /* Check for Performance Monitoring v2 support */
+       if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
+               ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+
+               /* Update PMU version for later usage */
+               x86_pmu.version = 2;
+
+               /* Find the number of available Core PMCs */
+               x86_pmu.num_counters = ebx.split.num_core_pmc;
+
+               amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
+
+               /* Update PMC handling functions */
+               x86_pmu.enable_all = amd_pmu_v2_enable_all;
+               x86_pmu.disable_all = amd_pmu_v2_disable_all;
+               x86_pmu.enable = amd_pmu_v2_enable_event;
+               x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
+               static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
+       }
+
        /*
         * AMD Core perfctr has separate MSRs for the NB events, see
         * the amd/uncore.c driver.
@@ -989,6 +1421,23 @@ static int __init amd_core_pmu_init(void)
                x86_pmu.flags |= PMU_FL_PAIR;
        }
 
+       /*
+        * BRS requires special event constraints and flushing on ctxsw.
+        */
+       if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
+               x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
+               x86_pmu.sched_task = amd_pmu_sched_task;
+               x86_pmu.limit_period = amd_pmu_limit_period;
+               /*
+                * put_event_constraints callback same as Fam17h, set above
+                */
+
+               /* branch sampling must be stopped when entering low power */
+               amd_brs_lopwr_init();
+       }
+
+       x86_pmu.attr_update = amd_attr_update;
+
        pr_cont("core perfctr, ");
        return 0;
 }
@@ -1023,6 +1472,24 @@ __init int amd_pmu_init(void)
        return 0;
 }
 
+static inline void amd_pmu_reload_virt(void)
+{
+       if (x86_pmu.version >= 2) {
+               /*
+                * Clear global enable bits, reprogram the PERF_CTL
+                * registers with updated perf_ctr_virt_mask and then
+                * set global enable bits once again
+                */
+               amd_pmu_v2_disable_all();
+               amd_pmu_enable_all(0);
+               amd_pmu_v2_enable_all(0);
+               return;
+       }
+
+       amd_pmu_disable_all();
+       amd_pmu_enable_all(0);
+}
+
 void amd_pmu_enable_virt(void)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1030,8 +1497,7 @@ void amd_pmu_enable_virt(void)
        cpuc->perf_ctr_virt_mask = 0;
 
        /* Reload all events */
-       amd_pmu_disable_all();
-       x86_pmu_enable_all(0);
+       amd_pmu_reload_virt();
 }
 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
 
@@ -1048,7 +1514,6 @@ void amd_pmu_disable_virt(void)
        cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 
        /* Reload all events */
-       amd_pmu_disable_all();
-       x86_pmu_enable_all(0);
+       amd_pmu_reload_virt();
 }
 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
index 9739019d4b67afd63c167e6c5df170338708b3ff..c251bc44c088d8382b9bfac688e1901f78642bf1 100644 (file)
@@ -94,10 +94,6 @@ struct perf_ibs {
        unsigned int                    fetch_ignore_if_zero_rip : 1;
        struct cpu_perf_ibs __percpu    *pcpu;
 
-       struct attribute                **format_attrs;
-       struct attribute_group          format_group;
-       const struct attribute_group    *attr_groups[2];
-
        u64                             (*get_count)(u64 config);
 };
 
@@ -304,6 +300,16 @@ static int perf_ibs_init(struct perf_event *event)
        hwc->config_base = perf_ibs->msr;
        hwc->config = config;
 
+       /*
+        * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+        * recorded as part of interrupt regs. Thus we need to use rip from
+        * interrupt regs while unwinding call stack. Setting _EARLY flag
+        * makes sure we unwind call-stack before perf sample rip is set to
+        * IbsOpRip.
+        */
+       if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+               event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
+
        return 0;
 }
 
@@ -518,16 +524,118 @@ static void perf_ibs_del(struct perf_event *event, int flags)
 
 static void perf_ibs_read(struct perf_event *event) { }
 
+/*
+ * We need to initialize with empty group if all attributes in the
+ * group are dynamic.
+ */
+static struct attribute *attrs_empty[] = {
+       NULL,
+};
+
+static struct attribute_group empty_format_group = {
+       .name = "format",
+       .attrs = attrs_empty,
+};
+
+static struct attribute_group empty_caps_group = {
+       .name = "caps",
+       .attrs = attrs_empty,
+};
+
+static const struct attribute_group *empty_attr_groups[] = {
+       &empty_format_group,
+       &empty_caps_group,
+       NULL,
+};
+
 PMU_FORMAT_ATTR(rand_en,       "config:57");
 PMU_FORMAT_ATTR(cnt_ctl,       "config:19");
+PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
+PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
+PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
 
-static struct attribute *ibs_fetch_format_attrs[] = {
+static umode_t
+zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+       return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
+}
+
+static struct attribute *rand_en_attrs[] = {
        &format_attr_rand_en.attr,
        NULL,
 };
 
-static struct attribute *ibs_op_format_attrs[] = {
-       NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+static struct attribute *fetch_l3missonly_attrs[] = {
+       &fetch_l3missonly.attr.attr,
+       NULL,
+};
+
+static struct attribute *zen4_ibs_extensions_attrs[] = {
+       &zen4_ibs_extensions.attr.attr,
+       NULL,
+};
+
+static struct attribute_group group_rand_en = {
+       .name = "format",
+       .attrs = rand_en_attrs,
+};
+
+static struct attribute_group group_fetch_l3missonly = {
+       .name = "format",
+       .attrs = fetch_l3missonly_attrs,
+       .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static struct attribute_group group_zen4_ibs_extensions = {
+       .name = "caps",
+       .attrs = zen4_ibs_extensions_attrs,
+       .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static const struct attribute_group *fetch_attr_groups[] = {
+       &group_rand_en,
+       &empty_caps_group,
+       NULL,
+};
+
+static const struct attribute_group *fetch_attr_update[] = {
+       &group_fetch_l3missonly,
+       &group_zen4_ibs_extensions,
+       NULL,
+};
+
+static umode_t
+cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+       return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
+}
+
+static struct attribute *cnt_ctl_attrs[] = {
+       &format_attr_cnt_ctl.attr,
+       NULL,
+};
+
+static struct attribute *op_l3missonly_attrs[] = {
+       &op_l3missonly.attr.attr,
+       NULL,
+};
+
+static struct attribute_group group_cnt_ctl = {
+       .name = "format",
+       .attrs = cnt_ctl_attrs,
+       .is_visible = cnt_ctl_is_visible,
+};
+
+static struct attribute_group group_op_l3missonly = {
+       .name = "format",
+       .attrs = op_l3missonly_attrs,
+       .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static const struct attribute_group *op_attr_update[] = {
+       &group_cnt_ctl,
+       &group_op_l3missonly,
+       &group_zen4_ibs_extensions,
        NULL,
 };
 
@@ -551,7 +659,6 @@ static struct perf_ibs perf_ibs_fetch = {
        .max_period             = IBS_FETCH_MAX_CNT << 4,
        .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
        .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
-       .format_attrs           = ibs_fetch_format_attrs,
 
        .get_count              = get_ibs_fetch_count,
 };
@@ -577,7 +684,6 @@ static struct perf_ibs perf_ibs_op = {
        .max_period             = IBS_OP_MAX_CNT << 4,
        .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
        .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
-       .format_attrs           = ibs_op_format_attrs,
 
        .get_count              = get_ibs_op_count,
 };
@@ -687,6 +793,14 @@ fail:
                data.raw = &raw;
        }
 
+       /*
+        * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+        * recorded as part of interrupt regs. Thus we need to use rip from
+        * interrupt regs while unwinding call stack.
+        */
+       if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+               data.callchain = perf_callchain(event, iregs);
+
        throttle = perf_event_overflow(event, &data, &regs);
 out:
        if (throttle) {
@@ -739,17 +853,6 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 
        perf_ibs->pcpu = pcpu;
 
-       /* register attributes */
-       if (perf_ibs->format_attrs[0]) {
-               memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
-               perf_ibs->format_group.name     = "format";
-               perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
-
-               memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
-               perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
-               perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
-       }
-
        ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
        if (ret) {
                perf_ibs->pcpu = NULL;
@@ -759,10 +862,8 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
        return ret;
 }
 
-static __init void perf_event_ibs_init(void)
+static __init int perf_ibs_fetch_init(void)
 {
-       struct attribute **attr = ibs_op_format_attrs;
-
        /*
         * Some chips fail to reset the fetch count when it is written; instead
         * they need a 0-1 transition of IbsFetchEn.
@@ -773,12 +874,19 @@ static __init void perf_event_ibs_init(void)
        if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
                perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
 
-       perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+       if (ibs_caps & IBS_CAPS_ZEN4)
+               perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
+
+       perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
+       perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
 
-       if (ibs_caps & IBS_CAPS_OPCNT) {
+       return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+}
+
+static __init int perf_ibs_op_init(void)
+{
+       if (ibs_caps & IBS_CAPS_OPCNT)
                perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
-               *attr++ = &format_attr_cnt_ctl.attr;
-       }
 
        if (ibs_caps & IBS_CAPS_OPCNTEXT) {
                perf_ibs_op.max_period  |= IBS_OP_MAX_CNT_EXT_MASK;
@@ -786,15 +894,52 @@ static __init void perf_event_ibs_init(void)
                perf_ibs_op.cnt_mask    |= IBS_OP_MAX_CNT_EXT_MASK;
        }
 
-       perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+       if (ibs_caps & IBS_CAPS_ZEN4)
+               perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
+
+       perf_ibs_op.pmu.attr_groups = empty_attr_groups;
+       perf_ibs_op.pmu.attr_update = op_attr_update;
+
+       return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+}
+
+static __init int perf_event_ibs_init(void)
+{
+       int ret;
+
+       ret = perf_ibs_fetch_init();
+       if (ret)
+               return ret;
+
+       ret = perf_ibs_op_init();
+       if (ret)
+               goto err_op;
+
+       ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+       if (ret)
+               goto err_nmi;
 
-       register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
        pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+       return 0;
+
+err_nmi:
+       perf_pmu_unregister(&perf_ibs_op.pmu);
+       free_percpu(perf_ibs_op.pcpu);
+       perf_ibs_op.pcpu = NULL;
+err_op:
+       perf_pmu_unregister(&perf_ibs_fetch.pmu);
+       free_percpu(perf_ibs_fetch.pcpu);
+       perf_ibs_fetch.pcpu = NULL;
+
+       return ret;
 }
 
 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
 
-static __init void perf_event_ibs_init(void) { }
+static __init int perf_event_ibs_init(void)
+{
+       return 0;
+}
 
 #endif
 
@@ -1064,9 +1209,7 @@ static __init int amd_ibs_init(void)
                          x86_pmu_amd_ibs_starting_cpu,
                          x86_pmu_amd_ibs_dying_cpu);
 
-       perf_event_ibs_init();
-
-       return 0;
+       return perf_event_ibs_init();
 }
 
 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
index eef816fc216d344198f170e634e7944735920d12..30788894124f0e265ddda6fd4b76b8ea1c82b3fd 100644 (file)
@@ -1338,6 +1338,10 @@ static void x86_pmu_enable(struct pmu *pmu)
                        if (hwc->state & PERF_HES_ARCH)
                                continue;
 
+                       /*
+                        * if cpuc->enabled = 0, then no wrmsr as
+                        * per x86_pmu_enable_event()
+                        */
                        x86_pmu_start(event, PERF_EF_RELOAD);
                }
                cpuc->n_added = 0;
@@ -1704,11 +1708,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
                 * event overflow
                 */
                handled++;
-               perf_sample_data_init(&data, 0, event->hw.last_period);
 
                if (!x86_perf_event_set_period(event))
                        continue;
 
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (has_branch_stack(event))
+                       data.br_stack = &cpuc->lbr_stack;
+
                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
        }
@@ -1837,7 +1845,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
 
        /* string trumps id */
        if (pmu_attr->event_str)
-               return sprintf(page, "%s", pmu_attr->event_str);
+               return sprintf(page, "%s\n", pmu_attr->event_str);
 
        return x86_pmu.events_sysfs_show(page, config);
 }
index fc7f458eb3de6351ee25bee5e8b4ae5df8564245..955ae91c56dca5757222bc45fe8c202457921ca0 100644 (file)
@@ -6216,7 +6216,9 @@ __init int intel_pmu_init(void)
 
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
+       case INTEL_FAM6_ALDERLAKE_N:
        case INTEL_FAM6_RAPTORLAKE:
+       case INTEL_FAM6_RAPTORLAKE_P:
                /*
                 * Alder Lake has 2 types of CPU, core and atom.
                 *
index 48e5db21142c2257a29233e9037fb881583ad85d..8ec23f47fee9a7a4dd10632c76528d4c61549a7c 100644 (file)
@@ -682,7 +682,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_cstates),
+       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &adl_cstates),
+       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &adl_cstates),
        { },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
index fe1742c4ca4986cb3da2f46f6af101bf20d0b5c4..13179f31fe10facf139f4c7c00fd8627363a5594 100644 (file)
@@ -769,6 +769,7 @@ void intel_pmu_lbr_disable_all(void)
 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 {
        unsigned long mask = x86_pmu.lbr_nr - 1;
+       struct perf_branch_entry *br = cpuc->lbr_entries;
        u64 tos = intel_pmu_lbr_tos();
        int i;
 
@@ -784,15 +785,11 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 
                rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
 
-               cpuc->lbr_entries[i].from       = msr_lastbranch.from;
-               cpuc->lbr_entries[i].to         = msr_lastbranch.to;
-               cpuc->lbr_entries[i].mispred    = 0;
-               cpuc->lbr_entries[i].predicted  = 0;
-               cpuc->lbr_entries[i].in_tx      = 0;
-               cpuc->lbr_entries[i].abort      = 0;
-               cpuc->lbr_entries[i].cycles     = 0;
-               cpuc->lbr_entries[i].type       = 0;
-               cpuc->lbr_entries[i].reserved   = 0;
+               perf_clear_branch_entry_bitfields(br);
+
+               br->from        = msr_lastbranch.from;
+               br->to          = msr_lastbranch.to;
+               br++;
        }
        cpuc->lbr_stack.nr = i;
        cpuc->lbr_stack.hw_idx = tos;
@@ -807,6 +804,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 {
        bool need_info = false, call_stack = false;
        unsigned long mask = x86_pmu.lbr_nr - 1;
+       struct perf_branch_entry *br = cpuc->lbr_entries;
        u64 tos = intel_pmu_lbr_tos();
        int i;
        int out = 0;
@@ -878,15 +876,14 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
                if (abort && x86_pmu.lbr_double_abort && out > 0)
                        out--;
 
-               cpuc->lbr_entries[out].from      = from;
-               cpuc->lbr_entries[out].to        = to;
-               cpuc->lbr_entries[out].mispred   = mis;
-               cpuc->lbr_entries[out].predicted = pred;
-               cpuc->lbr_entries[out].in_tx     = in_tx;
-               cpuc->lbr_entries[out].abort     = abort;
-               cpuc->lbr_entries[out].cycles    = cycles;
-               cpuc->lbr_entries[out].type      = 0;
-               cpuc->lbr_entries[out].reserved  = 0;
+               perf_clear_branch_entry_bitfields(br+out);
+               br[out].from     = from;
+               br[out].to       = to;
+               br[out].mispred  = mis;
+               br[out].predicted = pred;
+               br[out].in_tx    = in_tx;
+               br[out].abort    = abort;
+               br[out].cycles   = cycles;
                out++;
        }
        cpuc->lbr_stack.nr = out;
@@ -951,6 +948,8 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
                to = rdlbr_to(i, lbr);
                info = rdlbr_info(i, lbr);
 
+               perf_clear_branch_entry_bitfields(e);
+
                e->from         = from;
                e->to           = to;
                e->mispred      = get_lbr_mispred(info);
@@ -959,7 +958,6 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
                e->abort        = !!(info & LBR_INFO_ABORT);
                e->cycles       = get_lbr_cycles(info);
                e->type         = get_lbr_br_type(info);
-               e->reserved     = 0;
        }
 
        cpuc->lbr_stack.nr = i;
index 7695dcae280e7067db8658827398489efbc04e54..db6c31bca80927cb14bd244da9daab6e6cf9a550 100644 (file)
@@ -1828,7 +1828,9 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
+       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &adl_uncore_init),
+       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &spr_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
        {},
index 4262351f52b60b5fd0a0f1bbbf2828c61f33c76d..ce440011cc4e411892ae6cd4009b4acba86df1f5 100644 (file)
 #define PCI_DEVICE_ID_INTEL_ADL_14_IMC         0x4650
 #define PCI_DEVICE_ID_INTEL_ADL_15_IMC         0x4668
 #define PCI_DEVICE_ID_INTEL_ADL_16_IMC         0x4670
+#define PCI_DEVICE_ID_INTEL_ADL_17_IMC         0x4614
+#define PCI_DEVICE_ID_INTEL_ADL_18_IMC         0x4617
+#define PCI_DEVICE_ID_INTEL_ADL_19_IMC         0x4618
+#define PCI_DEVICE_ID_INTEL_ADL_20_IMC         0x461B
+#define PCI_DEVICE_ID_INTEL_ADL_21_IMC         0x461C
 #define PCI_DEVICE_ID_INTEL_RPL_1_IMC          0xA700
 #define PCI_DEVICE_ID_INTEL_RPL_2_IMC          0xA702
 #define PCI_DEVICE_ID_INTEL_RPL_3_IMC          0xA706
 #define PCI_DEVICE_ID_INTEL_RPL_4_IMC          0xA709
+#define PCI_DEVICE_ID_INTEL_RPL_5_IMC          0xA701
+#define PCI_DEVICE_ID_INTEL_RPL_6_IMC          0xA703
+#define PCI_DEVICE_ID_INTEL_RPL_7_IMC          0xA704
+#define PCI_DEVICE_ID_INTEL_RPL_8_IMC          0xA705
+#define PCI_DEVICE_ID_INTEL_RPL_9_IMC          0xA706
+#define PCI_DEVICE_ID_INTEL_RPL_10_IMC         0xA707
+#define PCI_DEVICE_ID_INTEL_RPL_11_IMC         0xA708
+#define PCI_DEVICE_ID_INTEL_RPL_12_IMC         0xA709
+#define PCI_DEVICE_ID_INTEL_RPL_13_IMC         0xA70a
+#define PCI_DEVICE_ID_INTEL_RPL_14_IMC         0xA70b
+#define PCI_DEVICE_ID_INTEL_RPL_15_IMC         0xA715
+#define PCI_DEVICE_ID_INTEL_RPL_16_IMC         0xA716
+#define PCI_DEVICE_ID_INTEL_RPL_17_IMC         0xA717
+#define PCI_DEVICE_ID_INTEL_RPL_18_IMC         0xA718
+#define PCI_DEVICE_ID_INTEL_RPL_19_IMC         0xA719
+#define PCI_DEVICE_ID_INTEL_RPL_20_IMC         0xA71A
+#define PCI_DEVICE_ID_INTEL_RPL_21_IMC         0xA71B
+#define PCI_DEVICE_ID_INTEL_RPL_22_IMC         0xA71C
+#define PCI_DEVICE_ID_INTEL_RPL_23_IMC         0xA728
+#define PCI_DEVICE_ID_INTEL_RPL_24_IMC         0xA729
+#define PCI_DEVICE_ID_INTEL_RPL_25_IMC         0xA72A
+
+
+#define IMC_UNCORE_DEV(a)                                              \
+{                                                                      \
+       PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_##a##_IMC), \
+       .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),      \
+}
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -849,242 +882,80 @@ static struct intel_uncore_type *snb_pci_uncores[] = {
 };
 
 static const struct pci_device_id snb_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(SNB),
        { /* end: all zeroes */ },
 };
 
 static const struct pci_device_id ivb_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(IVB),
+       IMC_UNCORE_DEV(IVB_E3),
        { /* end: all zeroes */ },
 };
 
 static const struct pci_device_id hsw_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(HSW),
+       IMC_UNCORE_DEV(HSW_U),
        { /* end: all zeroes */ },
 };
 
 static const struct pci_device_id bdw_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(BDW),
        { /* end: all zeroes */ },
 };
 
 static const struct pci_device_id skl_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S4_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S5_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(SKL_Y),
+       IMC_UNCORE_DEV(SKL_U),
+       IMC_UNCORE_DEV(SKL_HD),
+       IMC_UNCORE_DEV(SKL_HQ),
+       IMC_UNCORE_DEV(SKL_SD),
+       IMC_UNCORE_DEV(SKL_SQ),
+       IMC_UNCORE_DEV(SKL_E3),
+       IMC_UNCORE_DEV(KBL_Y),
+       IMC_UNCORE_DEV(KBL_U),
+       IMC_UNCORE_DEV(KBL_UQ),
+       IMC_UNCORE_DEV(KBL_SD),
+       IMC_UNCORE_DEV(KBL_SQ),
+       IMC_UNCORE_DEV(KBL_HQ),
+       IMC_UNCORE_DEV(KBL_WQ),
+       IMC_UNCORE_DEV(CFL_2U),
+       IMC_UNCORE_DEV(CFL_4U),
+       IMC_UNCORE_DEV(CFL_4H),
+       IMC_UNCORE_DEV(CFL_6H),
+       IMC_UNCORE_DEV(CFL_2S_D),
+       IMC_UNCORE_DEV(CFL_4S_D),
+       IMC_UNCORE_DEV(CFL_6S_D),
+       IMC_UNCORE_DEV(CFL_8S_D),
+       IMC_UNCORE_DEV(CFL_4S_W),
+       IMC_UNCORE_DEV(CFL_6S_W),
+       IMC_UNCORE_DEV(CFL_8S_W),
+       IMC_UNCORE_DEV(CFL_4S_S),
+       IMC_UNCORE_DEV(CFL_6S_S),
+       IMC_UNCORE_DEV(CFL_8S_S),
+       IMC_UNCORE_DEV(AML_YD),
+       IMC_UNCORE_DEV(AML_YQ),
+       IMC_UNCORE_DEV(WHL_UQ),
+       IMC_UNCORE_DEV(WHL_4_UQ),
+       IMC_UNCORE_DEV(WHL_UD),
+       IMC_UNCORE_DEV(CML_H1),
+       IMC_UNCORE_DEV(CML_H2),
+       IMC_UNCORE_DEV(CML_H3),
+       IMC_UNCORE_DEV(CML_U1),
+       IMC_UNCORE_DEV(CML_U2),
+       IMC_UNCORE_DEV(CML_U3),
+       IMC_UNCORE_DEV(CML_S1),
+       IMC_UNCORE_DEV(CML_S2),
+       IMC_UNCORE_DEV(CML_S3),
+       IMC_UNCORE_DEV(CML_S4),
+       IMC_UNCORE_DEV(CML_S5),
        { /* end: all zeroes */ },
 };
 
 static const struct pci_device_id icl_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RKL_1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RKL_2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(ICL_U),
+       IMC_UNCORE_DEV(ICL_U2),
+       IMC_UNCORE_DEV(RKL_1),
+       IMC_UNCORE_DEV(RKL_2),
        { /* end: all zeroes */ },
 };
 
@@ -1326,106 +1197,57 @@ void nhm_uncore_cpu_init(void)
 /* Tiger Lake MMIO uncore support */
 
 static const struct pci_device_id tgl_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
+       IMC_UNCORE_DEV(TGL_U1),
+       IMC_UNCORE_DEV(TGL_U2),
+       IMC_UNCORE_DEV(TGL_U3),
+       IMC_UNCORE_DEV(TGL_U4),
+       IMC_UNCORE_DEV(TGL_H),
+       IMC_UNCORE_DEV(ADL_1),
+       IMC_UNCORE_DEV(ADL_2),
+       IMC_UNCORE_DEV(ADL_3),
+       IMC_UNCORE_DEV(ADL_4),
+       IMC_UNCORE_DEV(ADL_5),
+       IMC_UNCORE_DEV(ADL_6),
+       IMC_UNCORE_DEV(ADL_7),
+       IMC_UNCORE_DEV(ADL_8),
+       IMC_UNCORE_DEV(ADL_9),
+       IMC_UNCORE_DEV(ADL_10),
+       IMC_UNCORE_DEV(ADL_11),
+       IMC_UNCORE_DEV(ADL_12),
+       IMC_UNCORE_DEV(ADL_13),
+       IMC_UNCORE_DEV(ADL_14),
+       IMC_UNCORE_DEV(ADL_15),
+       IMC_UNCORE_DEV(ADL_16),
+       IMC_UNCORE_DEV(ADL_17),
+       IMC_UNCORE_DEV(ADL_18),
+       IMC_UNCORE_DEV(ADL_19),
+       IMC_UNCORE_DEV(ADL_20),
+       IMC_UNCORE_DEV(ADL_21),
+       IMC_UNCORE_DEV(RPL_1),
+       IMC_UNCORE_DEV(RPL_2),
+       IMC_UNCORE_DEV(RPL_3),
+       IMC_UNCORE_DEV(RPL_4),
+       IMC_UNCORE_DEV(RPL_5),
+       IMC_UNCORE_DEV(RPL_6),
+       IMC_UNCORE_DEV(RPL_7),
+       IMC_UNCORE_DEV(RPL_8),
+       IMC_UNCORE_DEV(RPL_9),
+       IMC_UNCORE_DEV(RPL_10),
+       IMC_UNCORE_DEV(RPL_11),
+       IMC_UNCORE_DEV(RPL_12),
+       IMC_UNCORE_DEV(RPL_13),
+       IMC_UNCORE_DEV(RPL_14),
+       IMC_UNCORE_DEV(RPL_15),
+       IMC_UNCORE_DEV(RPL_16),
+       IMC_UNCORE_DEV(RPL_17),
+       IMC_UNCORE_DEV(RPL_18),
+       IMC_UNCORE_DEV(RPL_19),
+       IMC_UNCORE_DEV(RPL_20),
+       IMC_UNCORE_DEV(RPL_21),
+       IMC_UNCORE_DEV(RPL_22),
+       IMC_UNCORE_DEV(RPL_23),
+       IMC_UNCORE_DEV(RPL_24),
+       IMC_UNCORE_DEV(RPL_25),
        { /* end: all zeroes */ }
 };
 
index 6d759f88315c6ba9bb3bdead7611196a607f43c7..ac542f98c0705e1220f0fa0689097deda823165c 100644 (file)
@@ -103,7 +103,9 @@ static bool test_intel(int idx, void *data)
        case INTEL_FAM6_ROCKETLAKE:
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
+       case INTEL_FAM6_ALDERLAKE_N:
        case INTEL_FAM6_RAPTORLAKE:
+       case INTEL_FAM6_RAPTORLAKE_P:
                if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
                        return true;
                break;
index 150261d929b9e945bb4aa7b52f8b664dcbe20daa..21a5482bcf8458c29a62fb6670a4eadaa4aaa683 100644 (file)
@@ -67,22 +67,23 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
 /*
  * struct hw_perf_event.flags flags
  */
-#define PERF_X86_EVENT_PEBS_LDLAT      0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST         0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW     0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_PEBS_LD_HSW     0x0008 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW     0x0010 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL            0x0020 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC         0x0040 /* dynamic alloc'd constraint */
-
-#define PERF_X86_EVENT_EXCL_ACCT       0x0100 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD     0x0200 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS      0x0400 /* use large PEBS */
-#define PERF_X86_EVENT_PEBS_VIA_PT     0x0800 /* use PT buffer for PEBS */
-#define PERF_X86_EVENT_PAIR            0x1000 /* Large Increment per Cycle */
-#define PERF_X86_EVENT_LBR_SELECT      0x2000 /* Save/Restore MSR_LBR_SELECT */
-#define PERF_X86_EVENT_TOPDOWN         0x4000 /* Count Topdown slots/metrics events */
-#define PERF_X86_EVENT_PEBS_STLAT      0x8000 /* st+stlat data address sampling */
+#define PERF_X86_EVENT_PEBS_LDLAT      0x00001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST         0x00002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW     0x00004 /* haswell style datala, store */
+#define PERF_X86_EVENT_PEBS_LD_HSW     0x00008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW     0x00010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL            0x00020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC         0x00040 /* dynamic alloc'd constraint */
+
+#define PERF_X86_EVENT_EXCL_ACCT       0x00100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD     0x00200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS      0x00400 /* use large PEBS */
+#define PERF_X86_EVENT_PEBS_VIA_PT     0x00800 /* use PT buffer for PEBS */
+#define PERF_X86_EVENT_PAIR            0x01000 /* Large Increment per Cycle */
+#define PERF_X86_EVENT_LBR_SELECT      0x02000 /* Save/Restore MSR_LBR_SELECT */
+#define PERF_X86_EVENT_TOPDOWN         0x04000 /* Count Topdown slots/metrics events */
+#define PERF_X86_EVENT_PEBS_STLAT      0x08000 /* st+stlat data address sampling */
+#define PERF_X86_EVENT_AMD_BRS         0x10000 /* AMD Branch Sampling */
 
 static inline bool is_topdown_count(struct perf_event *event)
 {
@@ -325,6 +326,8 @@ struct cpu_hw_events {
         * AMD specific bits
         */
        struct amd_nb                   *amd_nb;
+       int                             brs_active; /* BRS is enabled */
+
        /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
        u64                             perf_ctr_virt_mask;
        int                             n_pair; /* Large increment events */
@@ -1105,6 +1108,11 @@ int x86_pmu_hw_config(struct perf_event *event);
 
 void x86_pmu_disable_all(void);
 
+static inline bool has_amd_brs(struct hw_perf_event *hwc)
+{
+       return hwc->flags & PERF_X86_EVENT_AMD_BRS;
+}
+
 static inline bool is_counter_pair(struct hw_perf_event *hwc)
 {
        return hwc->flags & PERF_X86_EVENT_PAIR;
@@ -1211,6 +1219,75 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
 
 int amd_pmu_init(void);
 
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
+int amd_brs_init(void);
+void amd_brs_disable(void);
+void amd_brs_enable(void);
+void amd_brs_enable_all(void);
+void amd_brs_disable_all(void);
+void amd_brs_drain(void);
+void amd_brs_lopwr_init(void);
+void amd_brs_disable_all(void);
+int amd_brs_setup_filter(struct perf_event *event);
+void amd_brs_reset(void);
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       perf_sched_cb_inc(event->ctx->pmu);
+       cpuc->lbr_users++;
+       /*
+        * No need to reset BRS because it is reset
+        * on brs_enable() and it is saturating
+        */
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       cpuc->lbr_users--;
+       WARN_ON_ONCE(cpuc->lbr_users < 0);
+
+       perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
+#else
+static inline int amd_brs_init(void)
+{
+       return 0;
+}
+static inline void amd_brs_disable(void) {}
+static inline void amd_brs_enable(void) {}
+static inline void amd_brs_drain(void) {}
+static inline void amd_brs_lopwr_init(void) {}
+static inline void amd_brs_disable_all(void) {}
+static inline int amd_brs_setup_filter(struct perf_event *event)
+{
+       return 0;
+}
+static inline void amd_brs_reset(void) {}
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+}
+
+static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+#endif
+
 #else /* CONFIG_CPU_SUP_AMD */
 
 static inline int amd_pmu_init(void)
@@ -1218,6 +1295,22 @@ static inline int amd_pmu_init(void)
        return 0;
 }
 
+static inline int amd_brs_init(void)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void amd_brs_drain(void)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+static inline void amd_brs_disable_all(void)
+{
+}
 #endif /* CONFIG_CPU_SUP_AMD */
 
 static inline int is_pebs_pt(struct perf_event *event)
index 8e4d0391ff6c9bafa9bcb7dde33478bd6f569d09..e481056698de145f82c770ca9730168a46738dab 100644 (file)
@@ -5,7 +5,5 @@
 
 obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o
 
-obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
-
 audit-class-$(CONFIG_AUDIT) := audit.o
 obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
deleted file mode 100644 (file)
index 9bd1524..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  a.out loader for x86-64
- *
- *  Copyright (C) 1991, 1992, 1996  Linus Torvalds
- *  Hacked together by Andi Kleen
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-#include <linux/jiffies.h>
-#include <linux/perf_event.h>
-#include <linux/sched/task_stack.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/user32.h>
-#include <asm/ia32.h>
-
-#undef WARN_OLD
-
-static int load_aout_binary(struct linux_binprm *);
-static int load_aout_library(struct file *);
-
-static struct linux_binfmt aout_format = {
-       .module         = THIS_MODULE,
-       .load_binary    = load_aout_binary,
-       .load_shlib     = load_aout_library,
-};
-
-static int set_brk(unsigned long start, unsigned long end)
-{
-       start = PAGE_ALIGN(start);
-       end = PAGE_ALIGN(end);
-       if (end <= start)
-               return 0;
-       return vm_brk(start, end - start);
-}
-
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
-{
-       u32 __user *argv, *envp, *sp;
-       int argc = bprm->argc, envc = bprm->envc;
-
-       sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
-       sp -= envc+1;
-       envp = sp;
-       sp -= argc+1;
-       argv = sp;
-       put_user((unsigned long) envp, --sp);
-       put_user((unsigned long) argv, --sp);
-       put_user(argc, --sp);
-       current->mm->arg_start = (unsigned long) p;
-       while (argc-- > 0) {
-               char c;
-
-               put_user((u32)(unsigned long)p, argv++);
-               do {
-                       get_user(c, p++);
-               } while (c);
-       }
-       put_user(0, argv);
-       current->mm->arg_end = current->mm->env_start = (unsigned long) p;
-       while (envc-- > 0) {
-               char c;
-
-               put_user((u32)(unsigned long)p, envp++);
-               do {
-                       get_user(c, p++);
-               } while (c);
-       }
-       put_user(0, envp);
-       current->mm->env_end = (unsigned long) p;
-       return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries.  There is no binary dependent code anywhere else.
- */
-static int load_aout_binary(struct linux_binprm *bprm)
-{
-       unsigned long error, fd_offset, rlim;
-       struct pt_regs *regs = current_pt_regs();
-       struct exec ex;
-       int retval;
-
-       ex = *((struct exec *) bprm->buf);              /* exec-header */
-       if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
-            N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
-           N_TRSIZE(ex) || N_DRSIZE(ex) ||
-           i_size_read(file_inode(bprm->file)) <
-           ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
-               return -ENOEXEC;
-       }
-
-       fd_offset = N_TXTOFF(ex);
-
-       /* Check initial limits. This avoids letting people circumvent
-        * size limits imposed on them by creating programs with large
-        * arrays in the data or bss.
-        */
-       rlim = rlimit(RLIMIT_DATA);
-       if (rlim >= RLIM_INFINITY)
-               rlim = ~0;
-       if (ex.a_data + ex.a_bss > rlim)
-               return -ENOMEM;
-
-       /* Flush all traces of the currently running executable */
-       retval = begin_new_exec(bprm);
-       if (retval)
-               return retval;
-
-       /* OK, This is the point of no return */
-       set_personality(PER_LINUX);
-       set_personality_ia32(false);
-
-       setup_new_exec(bprm);
-
-       regs->cs = __USER32_CS;
-       regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-               regs->r13 = regs->r14 = regs->r15 = 0;
-
-       current->mm->end_code = ex.a_text +
-               (current->mm->start_code = N_TXTADDR(ex));
-       current->mm->end_data = ex.a_data +
-               (current->mm->start_data = N_DATADDR(ex));
-       current->mm->brk = ex.a_bss +
-               (current->mm->start_brk = N_BSSADDR(ex));
-
-       retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
-       if (retval < 0)
-               return retval;
-
-       if (N_MAGIC(ex) == OMAGIC) {
-               unsigned long text_addr, map_size;
-
-               text_addr = N_TXTADDR(ex);
-               map_size = ex.a_text+ex.a_data;
-
-               error = vm_brk(text_addr & PAGE_MASK, map_size);
-
-               if (error)
-                       return error;
-
-               error = read_code(bprm->file, text_addr, 32,
-                                 ex.a_text + ex.a_data);
-               if ((signed long)error < 0)
-                       return error;
-       } else {
-#ifdef WARN_OLD
-               static unsigned long error_time, error_time2;
-               if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
-                   (N_MAGIC(ex) != NMAGIC) &&
-                               time_after(jiffies, error_time2 + 5*HZ)) {
-                       printk(KERN_NOTICE "executable not page aligned\n");
-                       error_time2 = jiffies;
-               }
-
-               if ((fd_offset & ~PAGE_MASK) != 0 &&
-                           time_after(jiffies, error_time + 5*HZ)) {
-                       printk(KERN_WARNING
-                              "fd_offset is not page aligned. Please convert "
-                              "program: %pD\n",
-                              bprm->file);
-                       error_time = jiffies;
-               }
-#endif
-
-               if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
-                       error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
-                       if (error)
-                               return error;
-
-                       read_code(bprm->file, N_TXTADDR(ex), fd_offset,
-                                       ex.a_text+ex.a_data);
-                       goto beyond_if;
-               }
-
-               error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
-                               PROT_READ | PROT_EXEC,
-                               MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
-                               fd_offset);
-
-               if (error != N_TXTADDR(ex))
-                       return error;
-
-               error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
-                               PROT_READ | PROT_WRITE | PROT_EXEC,
-                               MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
-                               fd_offset + ex.a_text);
-               if (error != N_DATADDR(ex))
-                       return error;
-       }
-
-beyond_if:
-       error = set_brk(current->mm->start_brk, current->mm->brk);
-       if (error)
-               return error;
-
-       set_binfmt(&aout_format);
-
-       current->mm->start_stack =
-               (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
-       /* start thread */
-       loadsegment(fs, 0);
-       loadsegment(ds, __USER32_DS);
-       loadsegment(es, __USER32_DS);
-       load_gs_index(0);
-       (regs)->ip = ex.a_entry;
-       (regs)->sp = current->mm->start_stack;
-       (regs)->flags = 0x200;
-       (regs)->cs = __USER32_CS;
-       (regs)->ss = __USER32_DS;
-       regs->r8 = regs->r9 = regs->r10 = regs->r11 =
-       regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
-       return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
-       unsigned long bss, start_addr, len, error;
-       int retval;
-       struct exec ex;
-       loff_t pos = 0;
-
-       retval = -ENOEXEC;
-       error = kernel_read(file, &ex, sizeof(ex), &pos);
-       if (error != sizeof(ex))
-               goto out;
-
-       /* We come in here for the regular a.out style of shared libraries */
-       if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
-           N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
-           i_size_read(file_inode(file)) <
-           ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
-               goto out;
-       }
-
-       if (N_FLAGS(ex))
-               goto out;
-
-       /* For  QMAGIC, the starting address is 0x20 into the page.  We mask
-          this off to get the starting address for the page */
-
-       start_addr =  ex.a_entry & 0xfffff000;
-
-       if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
-#ifdef WARN_OLD
-               static unsigned long error_time;
-               if (time_after(jiffies, error_time + 5*HZ)) {
-                       printk(KERN_WARNING
-                              "N_TXTOFF is not page aligned. Please convert "
-                              "library: %pD\n",
-                              file);
-                       error_time = jiffies;
-               }
-#endif
-               retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
-               if (retval)
-                       goto out;
-
-               read_code(file, start_addr, N_TXTOFF(ex),
-                         ex.a_text + ex.a_data);
-               retval = 0;
-               goto out;
-       }
-       /* Now use mmap to map the library into memory. */
-       error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
-                       PROT_READ | PROT_WRITE | PROT_EXEC,
-                       MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
-                       N_TXTOFF(ex));
-       retval = error;
-       if (error != start_addr)
-               goto out;
-
-       len = PAGE_ALIGN(ex.a_text + ex.a_data);
-       bss = ex.a_text + ex.a_data + ex.a_bss;
-       if (bss > len) {
-               retval = vm_brk(start_addr + len, bss - len);
-               if (retval)
-                       goto out;
-       }
-       retval = 0;
-out:
-       return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
-       register_binfmt(&aout_format);
-       return 0;
-}
-
-static void __exit exit_aout_binfmt(void)
-{
-       unregister_binfmt(&aout_format);
-}
-
-module_init(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
index 9aff97f0de7fd24074c7ddb17e55bd08746f285c..d937c55e717e655992b32eaefecb3f189d34f045 100644 (file)
 
 /* Asm macros */
 
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE()                                 \
+do {                                                           \
+       if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))       \
+               wbinvd();                                       \
+} while (0)
 
 int __acpi_acquire_global_lock(unsigned int *lock);
 int __acpi_release_global_lock(unsigned int *lock);
index 46e1df45efc0462774de91891209c50c2acef0b5..aabdbb5ab92008a67e6501563c85f8278e836084 100644 (file)
@@ -49,7 +49,7 @@ union ibs_op_ctl {
        };
 };
 
-/* MSR 0xc0011035: IBS Op Data 2 */
+/* MSR 0xc0011035: IBS Op Data 1 */
 union ibs_op_data {
        __u64 val;
        struct {
index 00d1a400b7a17a272f2cbbf796682b8a18aa3665..ed0eaf65c43721ebfcf4f4ee74f827c6d91ec5d1 100644 (file)
@@ -16,7 +16,6 @@ extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
 
 extern bool early_is_amd_nb(u32 value);
 extern struct resource *amd_get_mmconfig_range(struct resource *res);
-extern int amd_cache_northbridges(void);
 extern void amd_flush_garts(void);
 extern int amd_numa_init(void);
 extern int amd_get_subcaches(int);
index 48067af946785b1569a802e3e2811eb67fbd7386..bd8ae0a7010ae572159e20d1c8011219e78cc743 100644 (file)
@@ -328,6 +328,8 @@ struct apic {
 
        /* wakeup_secondary_cpu */
        int     (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+       /* wakeup secondary CPU using 64-bit wakeup point */
+       int     (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip);
 
        void    (*inquire_remote_apic)(int apicid);
 
@@ -488,6 +490,11 @@ static inline unsigned int read_apic_id(void)
        return apic->get_apic_id(reg);
 }
 
+#ifdef CONFIG_X86_64
+typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
+extern void acpi_wake_cpu_handler_update(wakeup_cpu_handler handler);
+#endif
+
 extern int default_apic_id_valid(u32 apicid);
 extern int default_acpi_madt_oem_check(char *, char *);
 extern void default_setup_apic_routing(void);
index 5716f22f81ac4b05abc33882fa34749732c6e52a..92035eb3afeedf7345983c25cbf98e37b4cc9e6b 100644 (file)
 #define        APIC_LVTTHMR    0x330
 #define        APIC_LVTPC      0x340
 #define        APIC_LVT0       0x350
-#define                APIC_LVT_TIMER_BASE_MASK        (0x3 << 18)
-#define                GET_APIC_TIMER_BASE(x)          (((x) >> 18) & 0x3)
-#define                SET_APIC_TIMER_BASE(x)          (((x) << 18))
-#define                APIC_TIMER_BASE_CLKIN           0x0
-#define                APIC_TIMER_BASE_TMBASE          0x1
-#define                APIC_TIMER_BASE_DIV             0x2
 #define                APIC_LVT_TIMER_ONESHOT          (0 << 17)
 #define                APIC_LVT_TIMER_PERIODIC         (1 << 17)
 #define                APIC_LVT_TIMER_TSCDEADLINE      (2 << 17)
index 981fe923a59fe5983d385ee042e339aa6cfd131f..53e9b0620d969339a6f1aa890256fc7cc93b2968 100644 (file)
@@ -74,6 +74,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
                        BOOT_PARAM_PRESERVE(hdr),
                        BOOT_PARAM_PRESERVE(e820_table),
                        BOOT_PARAM_PRESERVE(eddbuf),
+                       BOOT_PARAM_PRESERVE(cc_blob_address),
                };
 
                memset(&scratch, 0, sizeof(scratch));
index aaf0cb0db4aecfebc103498e1f2f8106f5b52733..a3ec87d198ac8398309e2962b5bcc59eeb074692 100644 (file)
@@ -18,7 +18,7 @@
 #ifdef CONFIG_X86_32
 # define __BUG_REL(val)        ".long " __stringify(val)
 #else
-# define __BUG_REL(val)        ".long " __stringify(val) " - 2b"
+# define __BUG_REL(val)        ".long " __stringify(val) " - ."
 #endif
 
 #ifdef CONFIG_DEBUG_BUGVERBOSE
index 0a7fe032161301191eec97422f98712000c2fe08..215f5a65790fd52e0b64ad3c93f3bf51b5a4ff3e 100644 (file)
@@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
 #define arch_cmpxchg64_local(ptr, o, n)                                        \
        ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
                                               (unsigned long long)(n)))
+#define arch_try_cmpxchg64(ptr, po, n)                                 \
+       __try_cmpxchg64((ptr), (unsigned long long *)(po), \
+                       (unsigned long long)(n))
 #endif
 
 static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
@@ -70,6 +73,24 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
        return prev;
 }
 
+static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+{
+       bool success;
+       u64 old = *pold;
+       asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
+                    CC_SET(z)
+                    : CC_OUT(z) (success),
+                      [ptr] "+m" (*ptr),
+                      "+A" (old)
+                    : "b" ((u32)new),
+                      "c" ((u32)(new >> 32))
+                    : "memory");
+
+       if (unlikely(!success))
+               *pold = old;
+       return success;
+}
+
 #ifndef CONFIG_X86_CMPXCHG64
 /*
  * Building a kernel capable running on 80386 and 80486. It may be necessary
index 072e5459fe2fe33cbcd450fbdc8e3030f075addf..250187ac824842a4dabcb77bf394902447c0609a 100644 (file)
@@ -19,6 +19,12 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
        arch_cmpxchg_local((ptr), (o), (n));                            \
 })
 
+#define arch_try_cmpxchg64(ptr, po, n)                                 \
+({                                                                     \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       arch_try_cmpxchg((ptr), (po), (n));                             \
+})
+
 #define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
 
 #endif /* _ASM_X86_CMPXCHG_64_H */
index 86e5e4e26fcbefc68c961d9395e163f6d840a878..8cbf623f0ecfb6aec905ac8e376c7b0fe75933dc 100644 (file)
@@ -36,6 +36,8 @@ extern int _debug_hotplug_cpu(int cpu, int action);
 #endif
 #endif
 
+extern void ap_init_aperfmperf(void);
+
 int mwait_usable(const struct cpuinfo_x86 *);
 
 unsigned int x86_family(unsigned int sig);
@@ -43,14 +45,12 @@ unsigned int x86_model(unsigned int sig);
 unsigned int x86_stepping(unsigned int sig);
 #ifdef CONFIG_CPU_SUP_INTEL
 extern void __init sld_setup(struct cpuinfo_x86 *c);
-extern void switch_to_sld(unsigned long tifn);
 extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
 extern bool handle_guest_split_lock(unsigned long ip);
 extern void handle_bus_lock(struct pt_regs *regs);
 u8 get_this_hybrid_cpu_type(void);
 #else
 static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
-static inline void switch_to_sld(unsigned long tifn) {}
 static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
 {
        return false;
@@ -76,4 +76,22 @@ static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
 
 extern __noendbr void cet_disable(void);
 
+struct ucode_cpu_info;
+
+int intel_cpu_collect_info(struct ucode_cpu_info *uci);
+
+static inline bool intel_cpu_signatures_match(unsigned int s1, unsigned int p1,
+                                             unsigned int s2, unsigned int p2)
+{
+       if (s1 != s2)
+               return false;
+
+       /* Processor flags are either both 0 ... */
+       if (!p1 && !p2)
+               return true;
+
+       /* ... or they intersect. */
+       return p1 & p2;
+}
+
 #endif /* _ASM_X86_CPU_H */
index dd5ea1bdf04c5fae8c4fc9af3aa592533055bca5..75efc4c6f0766c3a601d023cb1012c81d7ada7c6 100644 (file)
@@ -143,7 +143,7 @@ extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
 
 extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
 
-static inline struct entry_stack *cpu_entry_stack(int cpu)
+static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
 {
        return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
 }
index 1261842d006c73a751354201d35a2afdb513c96a..66d3e3b1d24d8dc4f8689db628668d2859718fb7 100644 (file)
@@ -34,14 +34,17 @@ enum cpuid_leafs
        CPUID_8000_001F_EAX,
 };
 
+#define X86_CAP_FMT_NUM "%d:%d"
+#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
+
 #ifdef CONFIG_X86_FEATURE_NAMES
 extern const char * const x86_cap_flags[NCAPINTS*32];
 extern const char * const x86_power_flags[32];
 #define X86_CAP_FMT "%s"
 #define x86_cap_flag(flag) x86_cap_flags[flag]
 #else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+#define X86_CAP_FMT X86_CAP_FMT_NUM
+#define x86_cap_flag x86_cap_flag_num
 #endif
 
 /*
index 73e643ae94b6f2206d8dac0fdd10477e7c1eb8df..21bb78dfd41d40b2c39e723017de068a0e95cf82 100644 (file)
 #define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-/* FREE!                                ( 7*32+10) */
+#define X86_FEATURE_XCOMPACTED         ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
 #define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE          ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_LFENCE   ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_SSBD               ( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA                        ( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW          ( 7*32+19) /* "" Fill RSB on context switches */
-/* FREE!                                ( 7*32+20) */
+#define X86_FEATURE_PERFMON_V2         ( 7*32+20) /* AMD Performance Monitoring Version 2 */
 #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW                ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE  ( 7*32+23) /* "" Disable Speculative Store Bypass. */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 #define X86_FEATURE_PVUNLOCK           ( 8*32+20) /* "" PV unlock function */
 #define X86_FEATURE_VCPUPREEMPT                ( 8*32+21) /* "" PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST          ( 8*32+22) /* Intel Trust Domain Extensions Guest */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_VIRT_SSBD          (13*32+25) /* Virtualized Speculative Store Bypass Disable */
 #define X86_FEATURE_AMD_SSB_NO         (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
 #define X86_FEATURE_CPPC               (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BRS                        (13*32+31) /* Branch Sampling available */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM             (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
new file mode 100644 (file)
index 0000000..70b2db1
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPUID-related helpers/definitions
+ *
+ * Derived from arch/x86/kvm/cpuid.c
+ */
+
+#ifndef _ASM_X86_CPUID_H
+#define _ASM_X86_CPUID_H
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+       switch (function) {
+       case 4:
+       case 7:
+       case 0xb:
+       case 0xd:
+       case 0xf:
+       case 0x10:
+       case 0x12:
+       case 0x14:
+       case 0x17:
+       case 0x18:
+       case 0x1d:
+       case 0x1e:
+       case 0x1f:
+       case 0x8000001d:
+               return true;
+       }
+
+       return false;
+}
+
+#endif /* _ASM_X86_CPUID_H */
index 1231d63f836d81386f122c1022a7f5a25d503ef1..36369e76cc631ecbb488c175d3c330cebee88737 100644 (file)
  * cpu_feature_enabled().
  */
 
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP  0
-#else
-# define DISABLE_SMAP  (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
 #ifdef CONFIG_X86_UMIP
 # define DISABLE_UMIP  0
 #else
 # define DISABLE_SGX   (1 << (X86_FEATURE_SGX & 31))
 #endif
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+# define DISABLE_TDX_GUEST     0
+#else
+# define DISABLE_TDX_GUEST     (1 << (X86_FEATURE_TDX_GUEST & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -79,8 +79,8 @@
 #define DISABLED_MASK5 0
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
+#define DISABLED_MASK9 (DISABLE_SGX)
 #define DISABLED_MASK10        0
 #define DISABLED_MASK11        0
 #define DISABLED_MASK12        0
index 98938a68251cc49fc755328edea827fbe52d11df..bed74a0f2932dfdba626b239baddfbd07d06494f 100644 (file)
@@ -357,6 +357,11 @@ static inline u32 efi64_convert_status(efi_status_t status)
                                                   runtime),            \
                                    func, __VA_ARGS__))
 
+#define efi_dxe_call(func, ...)                                                \
+       (efi_is_native()                                                \
+               ? efi_dxe_table->func(__VA_ARGS__)                      \
+               : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__))
+
 #else /* CONFIG_EFI_MIXED */
 
 static inline bool efi_is_64bit(void)
index 29fea180a6658e84bd0a094d4e767558cc91b4e1..cb0ff1055ab1632f0c22b1f470ea8d0e57a23ffd 100644 (file)
@@ -116,7 +116,7 @@ extern unsigned int vdso32_enabled;
  * now struct_user_regs, they are different)
  */
 
-#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs)        \
+#define ELF_CORE_COPY_REGS(pr_reg, regs)       \
 do {                                           \
        pr_reg[0] = regs->bx;                   \
        pr_reg[1] = regs->cx;                   \
@@ -128,6 +128,7 @@ do {                                                \
        pr_reg[7] = regs->ds;                   \
        pr_reg[8] = regs->es;                   \
        pr_reg[9] = regs->fs;                   \
+       savesegment(gs, pr_reg[10]);            \
        pr_reg[11] = regs->orig_ax;             \
        pr_reg[12] = regs->ip;                  \
        pr_reg[13] = regs->cs;                  \
@@ -136,18 +137,6 @@ do {                                               \
        pr_reg[16] = regs->ss;                  \
 } while (0);
 
-#define ELF_CORE_COPY_REGS(pr_reg, regs)       \
-do {                                           \
-       ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
-       pr_reg[10] = get_user_gs(regs);         \
-} while (0);
-
-#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs)        \
-do {                                           \
-       ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
-       savesegment(gs, pr_reg[10]);            \
-} while (0);
-
 #define ELF_PLATFORM   (utsname()->machine)
 #define set_personality_64bit()        do { } while (0)
 
index 43184640b579a45565919801e8a23a2301e69017..674ed46d3cedaf88fbcb58e7e64e62cb1e5eba86 100644 (file)
@@ -10,7 +10,7 @@
 #include <asm/fpu/api.h>
 
 /* Check that the stack and regs on entry from user mode are sane. */
-static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 {
        if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
                /*
@@ -42,7 +42,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
                WARN_ON_ONCE(regs != task_pt_regs(current));
        }
 }
-#define arch_check_user_regs arch_check_user_regs
+#define arch_enter_from_user_mode arch_enter_from_user_mode
 
 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
                                                  unsigned long ti_work)
index c83b3020350ac264ebd65217bc50e3e9c957376b..6b0f31fb53f7e27e6d21a2076914a808b24c0685 100644 (file)
@@ -162,7 +162,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
 }
 
 /* prctl */
-struct task_struct;
-extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
+extern long fpu_xstate_prctl(int option, unsigned long arg2);
 
 #endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
deleted file mode 100644 (file)
index e69de29..0000000
index 032e020853aa6571345cb512a06cb44bcf735049..731ee7cc40a57176dddc8663cc2899f6eeaacda6 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/fixmap.h>
+#include <asm/pgtable_areas.h>
 
 /* declarations for highmem.c */
 extern unsigned long highstart_pfn, highend_pfn;
index 7924f27f5c8b14c52b7d1a742eb1459cf48e14ee..72184b0b2219e88502bad42ee77fec36bede52c3 100644 (file)
@@ -632,6 +632,10 @@ DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER,     exc_xen_hypervisor_callback);
 DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER,   exc_xen_unknown_trap);
 #endif
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+DECLARE_IDTENTRY(X86_TRAP_VE,          exc_virtualization_exception);
+#endif
+
 /* Device interrupts common/spurious */
 DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER,   common_interrupt);
 #ifdef CONFIG_X86_LOCAL_APIC
index e9736af126b29ee2083b6900e8354f820d5b02e8..1870b99c3356078e495ad4bcd12ac7e4cab3d4bb 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable_types.h>
+#include <asm/shared/io.h>
 
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
@@ -256,37 +257,23 @@ static inline void slow_down_io(void)
 #endif
 
 #define BUILDIO(bwl, bw, type)                                         \
-static inline void out##bwl(unsigned type value, int port)             \
-{                                                                      \
-       asm volatile("out" #bwl " %" #bw "0, %w1"                       \
-                    : : "a"(value), "Nd"(port));                       \
-}                                                                      \
-                                                                       \
-static inline unsigned type in##bwl(int port)                          \
-{                                                                      \
-       unsigned type value;                                            \
-       asm volatile("in" #bwl " %w1, %" #bw "0"                        \
-                    : "=a"(value) : "Nd"(port));                       \
-       return value;                                                   \
-}                                                                      \
-                                                                       \
-static inline void out##bwl##_p(unsigned type value, int port)         \
+static inline void out##bwl##_p(type value, u16 port)                  \
 {                                                                      \
        out##bwl(value, port);                                          \
        slow_down_io();                                                 \
 }                                                                      \
                                                                        \
-static inline unsigned type in##bwl##_p(int port)                      \
+static inline type in##bwl##_p(u16 port)                               \
 {                                                                      \
-       unsigned type value = in##bwl(port);                            \
+       type value = in##bwl(port);                                     \
        slow_down_io();                                                 \
        return value;                                                   \
 }                                                                      \
                                                                        \
-static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
 {                                                                      \
        if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {          \
-               unsigned type *value = (unsigned type *)addr;           \
+               type *value = (type *)addr;                             \
                while (count) {                                         \
                        out##bwl(*value, port);                         \
                        value++;                                        \
@@ -299,10 +286,10 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
        }                                                               \
 }                                                                      \
                                                                        \
-static inline void ins##bwl(int port, void *addr, unsigned long count) \
+static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
 {                                                                      \
        if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {          \
-               unsigned type *value = (unsigned type *)addr;           \
+               type *value = (type *)addr;                             \
                while (count) {                                         \
                        *value = in##bwl(port);                         \
                        value++;                                        \
@@ -315,13 +302,11 @@ static inline void ins##bwl(int port, void *addr, unsigned long count)    \
        }                                                               \
 }
 
-BUILDIO(b, b, char)
-BUILDIO(w, w, short)
-BUILDIO(l, , int)
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l,  , u32)
+#undef BUILDIO
 
-#define inb inb
-#define inw inw
-#define inl inl
 #define inb_p inb_p
 #define inw_p inw_p
 #define inl_p inl_p
@@ -329,9 +314,6 @@ BUILDIO(l, , int)
 #define insw insw
 #define insl insl
 
-#define outb outb
-#define outw outw
-#define outl outl
 #define outb_p outb_p
 #define outw_p outw_p
 #define outl_p outl_p
index 111104d1c2cd1c2650f5e7786fbfe08d7e69ac80..7793e52d6237a42279c716e2b1b94d3fa03e48a6 100644 (file)
@@ -137,14 +137,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
        if (!arch_irqs_disabled_flags(flags))
                arch_local_irq_enable();
 }
-#else
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_XEN_PV
-#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-#else
-#define SWAPGS swapgs
-#endif
-#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif
index 0449b125d27f3d49cfcc021ed79b213db5381a82..071572e23d3a06783e3a1f63e11bb47e99af9daa 100644 (file)
@@ -20,7 +20,7 @@
        _ASM_PTR "%c0 + %c1 - .\n\t"                    \
        ".popsection \n\t"
 
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
 
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
@@ -34,7 +34,7 @@ l_yes:
        return true;
 }
 
-#else
+#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
 
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
@@ -48,7 +48,7 @@ l_yes:
        return true;
 }
 
-#endif /* STACK_VALIDATION */
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
 
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
index 56935ebb1dfe1a418f3b2cb66f2b2cd33de4aef1..57bc74e112f20936d6ee2601443892ecc083b533 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/interrupt.h>
 #include <uapi/asm/kvm_para.h>
 
+#include <asm/tdx.h>
+
 #ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
@@ -32,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 static inline long kvm_hypercall0(unsigned int nr)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, 0, 0, 0, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr)
@@ -42,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr)
 static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, 0, 0, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1)
@@ -53,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
                                  unsigned long p2)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, p2, 0, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1), "c"(p2)
@@ -64,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
                                  unsigned long p2, unsigned long p3)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, p2, p3, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
@@ -76,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
                                  unsigned long p4)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, p2, p3, p4);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
index e2c6f433ed100b0b131b8cb8008fa659fb9b97d4..88ceaf3648b32dbee9fc3fba8bfb61548d302328 100644 (file)
@@ -49,9 +49,6 @@ void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
-
 void __init sev_es_init_vc_handling(void);
 
 #define __bss_decrypted __section(".bss..decrypted")
@@ -89,6 +86,9 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
 
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
 /*
  * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
  * writing to or comparing values from the cr3 register.  Having the
index 27516046117a389a80b962579b8c42e6f17e67b9..b8d40ddeab00f9b3989962d095c8d62f73a2d3dc 100644 (file)
@@ -141,7 +141,7 @@ do {                                                \
 #ifdef CONFIG_X86_32
 #define deactivate_mm(tsk, mm)                 \
 do {                                           \
-       lazy_load_gs(0);                        \
+       loadsegment(gs, 0);                     \
 } while (0)
 #else
 #define deactivate_mm(tsk, mm)                 \
diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h
deleted file mode 100644 (file)
index e69de29..0000000
index ee15311b6be1d99e2bea11bd4c03a8a36fd8c706..403e83b4adc88ea3d61337279920de39f75df6ae 100644 (file)
@@ -76,6 +76,8 @@
 
 /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
 #define MSR_IA32_CORE_CAPS                       0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT    2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS        BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
 #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT  5
 #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT     BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
 
 #define MSR_IA32_POWER_CTL             0x000001fc
 #define MSR_IA32_POWER_CTL_BIT_EE      19
 
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS                     0x000002d9
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT   4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST       BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+
 #define MSR_LBR_NHM_FROM               0x00000680
 #define MSR_LBR_NHM_TO                 0x000006c0
 #define MSR_LBR_CORE_FROM              0x00000040
 
 /* Run Time Average Power Limiting (RAPL) Interface */
 
+#define MSR_VR_CURRENT_CONFIG  0x00000601
 #define MSR_RAPL_POWER_UNIT            0x00000606
 
 #define MSR_PKG_POWER_LIMIT            0x00000610
 #define MSR_AMD64_SEV                  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT      0
 #define MSR_AMD64_SEV_ES_ENABLED_BIT   1
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT  2
 #define MSR_AMD64_SEV_ENABLED          BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
 #define MSR_AMD64_SEV_ES_ENABLED       BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED      BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
 
 #define MSR_AMD64_VIRT_SPEC_CTRL       0xc001011f
 
 #define AMD_CPPC_DES_PERF(x)           (((x) & 0xff) << 16)
 #define AMD_CPPC_ENERGY_PERF_PREF(x)   (((x) & 0xff) << 24)
 
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS      0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL         0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR  0xc0000302
+
 /* Fam 17h MSRs */
 #define MSR_F17H_IRPERF                        0xc00000e9
 
 #define MSR_IA32_PERF_CTL              0x00000199
 #define INTEL_PERF_CTL_MASK            0xffff
 
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG           0xc000010f
+#define MSR_AMD_SAMP_BR_FROM           0xc0010300
+
 #define MSR_IA32_MPERF                 0x000000e7
 #define MSR_IA32_APERF                 0x000000e8
 
index d42e6c6b47b1e4791e5da4fac371e41027f77a51..65ec1965cd2810323ab71a8d5cb79851845237c4 100644 (file)
 #include <asm/errno.h>
 #include <asm/cpumask.h>
 #include <uapi/asm/msr.h>
-
-struct msr {
-       union {
-               struct {
-                       u32 l;
-                       u32 h;
-               };
-               u64 q;
-       };
-};
+#include <asm/shared/msr.h>
 
 struct msr_info {
        u32 msr_no;
index 1cb9c17a4cb4b1fba49646749ee2a6400ab6fb93..5c5f1e56c4048db1a725b450e6f700b484d150d2 100644 (file)
@@ -47,6 +47,7 @@ struct nmiaction {
 #define register_nmi_handler(t, fn, fg, n, init...)    \
 ({                                                     \
        static struct nmiaction init fn##_na = {        \
+               .list = LIST_HEAD_INIT(fn##_na.list),   \
                .handler = (fn),                        \
                .name = (n),                            \
                .flags = (fg),                          \
index e9c86299b835144c9df4d8e72c502ac8ac0101ef..baa70451b8df5d20ef0731b82f7409f9f9ff05d5 100644 (file)
@@ -16,7 +16,7 @@ extern unsigned long page_offset_base;
 extern unsigned long vmalloc_base;
 extern unsigned long vmemmap_base;
 
-static inline unsigned long __phys_addr_nodebug(unsigned long x)
+static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
 {
        unsigned long y = x - __START_KERNEL_map;
 
index a0627dfae5412a0b01fb57804fa2e6db454a7134..1307cd689d2a26c893b92c20038f4e24788d2cc4 100644 (file)
@@ -93,6 +93,15 @@ struct irq_routing_table {
        struct irq_info slots[];
 } __attribute__((packed));
 
+struct irt_routing_table {
+       u32 signature;                  /* IRT_SIGNATURE should be here */
+       u8 size;                        /* Number of entries provided */
+       u8 used;                        /* Number of entries actually used */
+       u16 exclusive_irqs;             /* IRQs devoted exclusively to
+                                          PCI usage */
+       struct irq_info slots[];
+} __attribute__((packed));
+
 extern unsigned int pcibios_irq_mask;
 
 extern raw_spinlock_t pci_config_lock;
index b06e4c573adddad0d734aeb72cff9afc1744285b..409725e86f42c4453306c6d446ff04207e1b594d 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_PERF_EVENT_H
 #define _ASM_X86_PERF_EVENT_H
 
+#include <linux/static_call.h>
+
 /*
  * Performance event hw details:
  */
@@ -184,6 +186,18 @@ union cpuid28_ecx {
        unsigned int            full;
 };
 
+/*
+ * AMD "Extended Performance Monitoring and Debug" CPUID
+ * detection/enumeration details:
+ */
+union cpuid_0x80000022_ebx {
+       struct {
+               /* Number of Core Performance Counters */
+               unsigned int    num_core_pmc:4;
+       } split;
+       unsigned int            full;
+};
+
 struct x86_pmu_capability {
        int             version;
        int             num_counters_gp;
@@ -370,6 +384,11 @@ struct pebs_xmm {
        u64 xmm[16*2];  /* two entries for each register */
 };
 
+/*
+ * AMD Extended Performance Monitoring and Debug cpuid feature detection
+ */
+#define EXT_PERFMON_DEBUG_FEATURES             0x80000022
+
 /*
  * IBS cpuid feature detection
  */
@@ -391,6 +410,7 @@ struct pebs_xmm {
 #define IBS_CAPS_OPBRNFUSE             (1U<<8)
 #define IBS_CAPS_FETCHCTLEXTD          (1U<<9)
 #define IBS_CAPS_OPDATA4               (1U<<10)
+#define IBS_CAPS_ZEN4                  (1U<<11)
 
 #define IBS_CAPS_DEFAULT               (IBS_CAPS_AVAIL         \
                                         | IBS_CAPS_FETCHSAM    \
@@ -404,6 +424,7 @@ struct pebs_xmm {
 #define IBSCTL_LVT_OFFSET_MASK         0x0F
 
 /* IBS fetch bits/masks */
+#define IBS_FETCH_L3MISSONLY   (1ULL<<59)
 #define IBS_FETCH_RAND_EN      (1ULL<<57)
 #define IBS_FETCH_VAL          (1ULL<<49)
 #define IBS_FETCH_ENABLE       (1ULL<<48)
@@ -420,6 +441,7 @@ struct pebs_xmm {
 #define IBS_OP_CNT_CTL         (1ULL<<19)
 #define IBS_OP_VAL             (1ULL<<18)
 #define IBS_OP_ENABLE          (1ULL<<17)
+#define IBS_OP_L3MISSONLY      (1ULL<<16)
 #define IBS_OP_MAX_CNT         0x0000FFFFULL
 #define IBS_OP_MAX_CNT_EXT     0x007FFFFFULL   /* not a register bit mask */
 #define IBS_OP_MAX_CNT_EXT_MASK        (0x7FULL<<20)   /* separate upper 7 bits */
@@ -518,6 +540,27 @@ static inline void intel_pt_handle_vmx(int on)
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
+
+#if defined(CONFIG_PERF_EVENTS_AMD_BRS)
+
+#define PERF_NEEDS_LOPWR_CB 1
+
+/*
+ * architectural low power callback impacts
+ * drivers/acpi/processor_idle.c
+ * drivers/acpi/acpi_pad.c
+ */
+extern void perf_amd_brs_lopwr_cb(bool lopwr_in);
+
+DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+
+static inline void perf_lopwr_cb(bool lopwr_in)
+{
+       static_call_mod(perf_lopwr_cb)(lopwr_in);
+}
+
+#endif /* PERF_NEEDS_LOPWR_CB */
+
 #else
  static inline void amd_pmu_enable_virt(void) { }
  static inline void amd_pmu_disable_virt(void) { }
index 1d5f14aff5f6fd975143874cf1010b32c999ee11..2e6c04d8a45b487868b87c1e75c047cf7b509020 100644 (file)
@@ -41,9 +41,6 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
        return __arch_override_mprotect_pkey(vma, prot, pkey);
 }
 
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-               unsigned long init_val);
-
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
 
 #define mm_pkey_allocation_map(mm)     (mm->context.pkey_allocation_map)
@@ -118,11 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
        return 0;
 }
 
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-               unsigned long init_val);
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-               unsigned long init_val);
-
 static inline int vma_pkey(struct vm_area_struct *vma)
 {
        unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
index feed36d44d0440f1739f1f63d4c7826331940132..12ef86b19910d3b80050aa3553f1ae1ee2e19672 100644 (file)
@@ -13,6 +13,8 @@ void syscall_init(void);
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
 void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 #endif
 
@@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
 void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
@@ -35,11 +39,9 @@ void xen_entry_INT80_compat(void);
 #endif
 
 void x86_configure_nx(void);
-void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl_common(struct task_struct *task, int option,
-                         unsigned long arg2);
+long do_arch_prctl_common(int option, unsigned long arg2);
 
 #endif /* _ASM_X86_PROTO_H */
index 4357e0f2cd5f702aacc7ec0a21ed2047bb5123cb..f4db78b09c8f0be1e0a904394d48f0a45b246cc8 100644 (file)
@@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
        bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
                    regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
 
+       ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+                     regs->ip <  (unsigned long)entry_SYSRETQ_end);
 #ifdef CONFIG_IA32_EMULATION
        ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
                      regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
+       ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+                     regs->ip <  (unsigned long)entry_SYSRETL_compat_end);
 #endif
 
        return ret;
index 331474b150f16964e1ba8788e6a623d0214aee62..fd6f6e5b755a7823c50ef4f6e2fe4daaee09caf5 100644 (file)
@@ -25,6 +25,7 @@ struct real_mode_header {
        u32     sev_es_trampoline_start;
 #endif
 #ifdef CONFIG_X86_64
+       u32     trampoline_start64;
        u32     trampoline_pgd;
 #endif
        /* ACPI S3 wakeup */
index 656ed6531d035d20d9aed5aaaddc79a32b065619..2e7890dd58a47714fbba4bbddb68e05605835ee5 100644 (file)
@@ -350,18 +350,6 @@ static inline void __loadsegment_fs(unsigned short value)
 #define savesegment(seg, value)                                \
        asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
-/*
- * x86-32 user GS accessors.  This is ugly and could do with some cleaning up.
- */
-#ifdef CONFIG_X86_32
-# define get_user_gs(regs)             (u16)({ unsigned long v; savesegment(gs, v); v; })
-# define set_user_gs(regs, v)          loadsegment(gs, (unsigned long)(v))
-# define task_user_gs(tsk)             ((tsk)->thread.gs)
-# define lazy_save_gs(v)               savesegment(gs, (v))
-# define lazy_load_gs(v)               loadsegment(gs, (v))
-# define load_gs_index(v)              loadsegment(gs, (v))
-#endif /* X86_32 */
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
index 896e48d45828cba79d15d5d9a98ea55fc62f9bbe..7590ac2570b964a80ff0e78f8044a1046626cec7 100644 (file)
@@ -50,7 +50,6 @@ extern unsigned long saved_video_mode;
 extern void reserve_standard_io_resources(void);
 extern void i386_reserve_resources(void);
 extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern unsigned long __startup_secondary_64(void);
 extern void startup_64_setup_env(unsigned long physbase);
 extern void early_setup_idt(void);
 extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
@@ -109,27 +108,19 @@ extern unsigned long _brk_end;
 void *extend_brk(size_t size, size_t align);
 
 /*
- * Reserve space in the brk section.  The name must be unique within
- * the file, and somewhat descriptive.  The size is in bytes.  Must be
- * used at file scope.
+ * Reserve space in the brk section.  The name must be unique within the file,
+ * and somewhat descriptive.  The size is in bytes.
  *
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to.  We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The allocation is done using inline asm (rather than using a section
+ * attribute on a normal variable) in order to allow the use of @nobits, so
+ * that it doesn't take up any space in the vmlinux file.
  */
-#define RESERVE_BRK(name,sz)                                           \
-       static void __section(".discard.text") __noendbr __used notrace \
-       __brk_reservation_fn_##name##__(void) {                         \
-               asm volatile (                                          \
-                       ".pushsection .brk_reservation,\"aw\",@nobits;" \
-                       ".brk." #name ":"                               \
-                       " 1:.skip %c0;"                                 \
-                       " .size .brk." #name ", . - 1b;"                \
-                       " .popsection"                                  \
-                       : : "i" (sz));                                  \
-       }
+#define RESERVE_BRK(name, size)                                                \
+       asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t"          \
+           ".brk." #name ":\n\t"                                       \
+           ".skip " __stringify(size) "\n\t"                           \
+           ".size .brk." #name ", " __stringify(size) "\n\t"           \
+           ".popsection\n\t")
 
 extern void probe_roms(void);
 #ifdef __i386__
index 1b2fd32b42fe4906d19b912486f5f7ba32e84ff4..b8357d6ecd47ef6766a0fe9fe5161f6447228c16 100644 (file)
 #define GHCB_MSR_AP_RESET_HOLD_REQ     0x006
 #define GHCB_MSR_AP_RESET_HOLD_RESP    0x007
 
+/* GHCB GPA Register */
+#define GHCB_MSR_REG_GPA_REQ           0x012
+#define GHCB_MSR_REG_GPA_REQ_VAL(v)                    \
+       /* GHCBData[63:12] */                           \
+       (((u64)((v) & GENMASK_ULL(51, 0)) << 12) |      \
+       /* GHCBData[11:0] */                            \
+       GHCB_MSR_REG_GPA_REQ)
+
+#define GHCB_MSR_REG_GPA_RESP          0x013
+#define GHCB_MSR_REG_GPA_RESP_VAL(v)                   \
+       /* GHCBData[63:12] */                           \
+       (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+/*
+ * SNP Page State Change Operation
+ *
+ * GHCBData[55:52] - Page operation:
+ *   0x0001    Page assignment, Private
+ *   0x0002    Page assignment, Shared
+ */
+enum psc_op {
+       SNP_PAGE_STATE_PRIVATE = 1,
+       SNP_PAGE_STATE_SHARED,
+};
+
+#define GHCB_MSR_PSC_REQ               0x014
+#define GHCB_MSR_PSC_REQ_GFN(gfn, op)                  \
+       /* GHCBData[55:52] */                           \
+       (((u64)((op) & 0xf) << 52) |                    \
+       /* GHCBData[51:12] */                           \
+       ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) |     \
+       /* GHCBData[11:0] */                            \
+       GHCB_MSR_PSC_REQ)
+
+#define GHCB_MSR_PSC_RESP              0x015
+#define GHCB_MSR_PSC_RESP_VAL(val)                     \
+       /* GHCBData[63:32] */                           \
+       (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+
 /* GHCB Hypervisor Feature Request/Response */
 #define GHCB_MSR_HV_FT_REQ             0x080
 #define GHCB_MSR_HV_FT_RESP            0x081
+#define GHCB_MSR_HV_FT_RESP_VAL(v)                     \
+       /* GHCBData[63:12] */                           \
+       (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+#define GHCB_HV_FT_SNP                 BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION     BIT_ULL(1)
+
+/* SNP Page State Change NAE event */
+#define VMGEXIT_PSC_MAX_ENTRY          253
+
+struct psc_hdr {
+       u16 cur_entry;
+       u16 end_entry;
+       u32 reserved;
+} __packed;
+
+struct psc_entry {
+       u64     cur_page        : 12,
+               gfn             : 40,
+               operation       : 4,
+               pagesize        : 1,
+               reserved        : 7;
+} __packed;
+
+struct snp_psc_desc {
+       struct psc_hdr hdr;
+       struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
+} __packed;
+
+/* Guest message request error code */
+#define SNP_GUEST_REQ_INVALID_LEN      BIT_ULL(32)
 
 #define GHCB_MSR_TERM_REQ              0x100
 #define GHCB_MSR_TERM_REASON_SET_POS   12
         /* GHCBData[23:16] */                          \
        ((((u64)reason_val) & 0xff) << 16))
 
+/* Error codes from reason set 0 */
+#define SEV_TERM_SET_GEN               0
 #define GHCB_SEV_ES_GEN_REQ            0
 #define GHCB_SEV_ES_PROT_UNSUPPORTED   1
+#define GHCB_SNP_UNSUPPORTED           2
+
+/* Linux-specific reason codes (used with reason set 1) */
+#define SEV_TERM_SET_LINUX             1
+#define GHCB_TERM_REGISTER             0       /* GHCB GPA registration failure */
+#define GHCB_TERM_PSC                  1       /* Page State Change failure */
+#define GHCB_TERM_PVALIDATE            2       /* Pvalidate failure */
+#define GHCB_TERM_NOT_VMPL0            3       /* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID                        4       /* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV             5       /* CPUID failure during hypervisor fallback */
 
 #define GHCB_RESP_CODE(v)              ((v) & GHCB_MSR_INFO_MASK)
 
index ec060c43358972671d34d445e3b6206e385da1ff..19514524f0f8bac6ae0add43b0ba4f054126a840 100644 (file)
 #include <linux/types.h>
 #include <asm/insn.h>
 #include <asm/sev-common.h>
+#include <asm/bootparam.h>
 
-#define GHCB_PROTO_OUR         0x0001UL
-#define GHCB_PROTOCOL_MAX      1ULL
+#define GHCB_PROTOCOL_MIN      1ULL
+#define GHCB_PROTOCOL_MAX      2ULL
 #define GHCB_DEFAULT_USAGE     0ULL
 
 #define        VMGEXIT()                       { asm volatile("rep; vmmcall\n\r"); }
@@ -42,6 +43,24 @@ struct es_em_ctxt {
        struct es_fault_info fi;
 };
 
+/*
+ * AMD SEV Confidential computing blob structure. The structure is
+ * defined in OVMF UEFI firmware header:
+ * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h
+ */
+#define CC_BLOB_SEV_HDR_MAGIC  0x45444d41
+struct cc_blob_sev_info {
+       u32 magic;
+       u16 version;
+       u16 reserved;
+       u64 secrets_phys;
+       u32 secrets_len;
+       u32 rsvd1;
+       u64 cpuid_phys;
+       u32 cpuid_len;
+       u32 rsvd2;
+} __packed;
+
 void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
 
 static inline u64 lower_bits(u64 val, unsigned int bits)
@@ -60,6 +79,61 @@ extern void vc_no_ghcb(void);
 extern void vc_boot_ghcb(void);
 extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
 
+/* Software defined (when rFlags.CF = 1) */
+#define PVALIDATE_FAIL_NOUPDATE                255
+
+/* RMP page size */
+#define RMP_PG_SIZE_4K                 0
+
+#define RMPADJUST_VMSA_PAGE_BIT                BIT(16)
+
+/* SNP Guest message request */
+struct snp_req_data {
+       unsigned long req_gpa;
+       unsigned long resp_gpa;
+       unsigned long data_gpa;
+       unsigned int data_npages;
+};
+
+struct sev_guest_platform_data {
+       u64 secrets_gpa;
+};
+
+/*
+ * The secrets page contains 96-bytes of reserved field that can be used by
+ * the guest OS. The guest OS uses the area to save the message sequence
+ * number for each VMPCK.
+ *
+ * See the GHCB spec section Secret page layout for the format for this area.
+ */
+struct secrets_os_area {
+       u32 msg_seqno_0;
+       u32 msg_seqno_1;
+       u32 msg_seqno_2;
+       u32 msg_seqno_3;
+       u64 ap_jump_table_pa;
+       u8 rsvd[40];
+       u8 guest_usage[32];
+} __packed;
+
+#define VMPCK_KEY_LEN          32
+
+/* See the SNP spec version 0.9 for secrets page format */
+struct snp_secrets_page_layout {
+       u32 version;
+       u32 imien       : 1,
+           rsvd1       : 31;
+       u32 fms;
+       u32 rsvd2;
+       u8 gosvw[16];
+       u8 vmpck0[VMPCK_KEY_LEN];
+       u8 vmpck1[VMPCK_KEY_LEN];
+       u8 vmpck2[VMPCK_KEY_LEN];
+       u8 vmpck3[VMPCK_KEY_LEN];
+       struct secrets_os_area os_area;
+       u8 rsvd3[3840];
+} __packed;
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 extern struct static_key_false sev_es_enable_key;
 extern void __sev_es_ist_enter(struct pt_regs *regs);
@@ -87,12 +161,71 @@ extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
                                          struct es_em_ctxt *ctxt,
                                          u64 exit_code, u64 exit_info_1,
                                          u64 exit_info_2);
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+{
+       int rc;
+
+       /* "rmpadjust" mnemonic support in binutils 2.36 and newer */
+       asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t"
+                    : "=a"(rc)
+                    : "a"(vaddr), "c"(rmp_psize), "d"(attrs)
+                    : "memory", "cc");
+
+       return rc;
+}
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+{
+       bool no_rmpupdate;
+       int rc;
+
+       /* "pvalidate" mnemonic support in binutils 2.36 and newer */
+       asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
+                    CC_SET(c)
+                    : CC_OUT(c) (no_rmpupdate), "=a"(rc)
+                    : "a"(vaddr), "c"(rmp_psize), "d"(validate)
+                    : "memory", "cc");
+
+       if (no_rmpupdate)
+               return PVALIDATE_FAIL_NOUPDATE;
+
+       return rc;
+}
+void setup_ghcb(void);
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+                                        unsigned int npages);
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+                                       unsigned int npages);
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+void snp_set_wakeup_secondary_cpu(void);
+bool snp_init(struct boot_params *bp);
+void snp_abort(void);
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
 static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
 static inline void sev_es_nmi_complete(void) { }
 static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+static inline void setup_ghcb(void) { }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline bool snp_init(struct boot_params *bp) { return false; }
+static inline void snp_abort(void) { }
+static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input,
+                                         unsigned long *fw_err)
+{
+       return -ENOTTY;
+}
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
new file mode 100644 (file)
index 0000000..c0ef921
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_IO_H
+#define _ASM_X86_SHARED_IO_H
+
+#include <linux/types.h>
+
+#define BUILDIO(bwl, bw, type)                                         \
+static inline void __out##bwl(type value, u16 port)                    \
+{                                                                      \
+       asm volatile("out" #bwl " %" #bw "0, %w1"                       \
+                    : : "a"(value), "Nd"(port));                       \
+}                                                                      \
+                                                                       \
+static inline type __in##bwl(u16 port)                                 \
+{                                                                      \
+       type value;                                                     \
+       asm volatile("in" #bwl " %w1, %" #bw "0"                        \
+                    : "=a"(value) : "Nd"(port));                       \
+       return value;                                                   \
+}
+
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l,  , u32)
+#undef BUILDIO
+
+#define inb __inb
+#define inw __inw
+#define inl __inl
+#define outb __outb
+#define outw __outw
+#define outl __outl
+
+#endif
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
new file mode 100644 (file)
index 0000000..1e6ec10
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_MSR_H
+#define _ASM_X86_SHARED_MSR_H
+
+struct msr {
+       union {
+               struct {
+                       u32 l;
+                       u32 h;
+               };
+               u64 q;
+       };
+};
+
+#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
new file mode 100644 (file)
index 0000000..e53f262
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_TDX_H
+#define _ASM_X86_SHARED_TDX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define TDX_HYPERCALL_STANDARD  0
+
+#define TDX_HCALL_HAS_OUTPUT   BIT(0)
+#define TDX_HCALL_ISSUE_STI    BIT(1)
+
+#define TDX_CPUID_LEAF_ID      0x21
+#define TDX_IDENT              "IntelTDX    "
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used in __tdx_hypercall() to pass down and get back registers' values of
+ * the TDCALL instruction when requesting services from the VMM.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_hypercall_args {
+       u64 r10;
+       u64 r11;
+       u64 r12;
+       u64 r13;
+       u64 r14;
+       u64 r15;
+};
+
+/* Used to request services from the VMM */
+u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags);
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_SHARED_TDX_H */
index d17b39893b7973073f21814e41d841aed5fec11a..bab490379c65f6b29b4ffa110c627790bd126de3 100644 (file)
 
 #ifdef __ASSEMBLY__
 
-#ifdef CONFIG_X86_SMAP
-
 #define ASM_CLAC \
        ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
 
 #define ASM_STAC \
        ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
 
-#else /* CONFIG_X86_SMAP */
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
 #else /* __ASSEMBLY__ */
 
-#ifdef CONFIG_X86_SMAP
-
 static __always_inline void clac(void)
 {
        /* Note: a barrier is implicit in alternative() */
@@ -76,19 +65,6 @@ static __always_inline void smap_restore(unsigned long flags)
 #define ASM_STAC \
        ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
 
-#else /* CONFIG_X86_SMAP */
-
-static inline void clac(void) { }
-static inline void stac(void) { }
-
-static inline unsigned long smap_save(void) { return 0; }
-static inline void smap_restore(unsigned long flags) { }
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_SMAP_H */
index 68c257a3de0d393b9ba904526fb485bb5f12ab38..45b18eb94fa1a854933f78ed6df4371d4855bb4d 100644 (file)
@@ -184,14 +184,15 @@ static inline void wbinvd(void)
        native_wbinvd();
 }
 
-#ifdef CONFIG_X86_64
 
 static inline void load_gs_index(unsigned int selector)
 {
+#ifdef CONFIG_X86_64
        native_load_gs_index(selector);
-}
-
+#else
+       loadsegment(gs, selector);
 #endif
+}
 
 #endif /* CONFIG_PARAVIRT_XXL */
 
index 7b132d0312ebfa914507a2d365e0bc59ce952d2b..a800abb1a99255b7ece0f95f2adc9953303da532 100644 (file)
@@ -19,7 +19,6 @@ struct saved_context {
        u16 gs;
        unsigned long cr0, cr2, cr3, cr4;
        u64 misc_enable;
-       bool misc_enable_saved;
        struct saved_msrs saved_msrs;
        struct desc_ptr gdt_desc;
        struct desc_ptr idt;
@@ -28,6 +27,7 @@ struct saved_context {
        unsigned long tr;
        unsigned long safety;
        unsigned long return_address;
+       bool misc_enable_saved;
 } __attribute__((packed));
 
 /* routines for saving/restoring kernel state */
index 35bb35d28733e52b7fe61886558bde878fbf01d3..54df06687d8348acbce916d589c53ddf73198ea2 100644 (file)
  * Image of the saved processor state, used by the low level ACPI suspend to
  * RAM code and by the low level hibernation code.
  *
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
  */
 struct saved_context {
        struct pt_regs regs;
@@ -36,7 +40,6 @@ struct saved_context {
 
        unsigned long cr0, cr2, cr3, cr4;
        u64 misc_enable;
-       bool misc_enable_saved;
        struct saved_msrs saved_msrs;
        unsigned long efer;
        u16 gdt_pad; /* Unused */
@@ -48,6 +51,7 @@ struct saved_context {
        unsigned long tr;
        unsigned long safety;
        unsigned long return_address;
+       bool misc_enable_saved;
 } __attribute__((packed));
 
 #define loaddebug(thread,register) \
index f70a5108d46421d7d5887526790eead68b8fa921..1b07fba11704ee63b5abb41db38db7bb519593f9 100644 (file)
@@ -271,6 +271,7 @@ struct vmcb_seg {
        u64 base;
 } __packed;
 
+/* Save area definition for legacy and SEV-MEM guests */
 struct vmcb_save_area {
        struct vmcb_seg es;
        struct vmcb_seg cs;
@@ -282,12 +283,12 @@ struct vmcb_save_area {
        struct vmcb_seg ldtr;
        struct vmcb_seg idtr;
        struct vmcb_seg tr;
-       u8 reserved_1[43];
+       u8 reserved_1[42];
+       u8 vmpl;
        u8 cpl;
        u8 reserved_2[4];
        u64 efer;
-       u8 reserved_3[104];
-       u64 xss;                /* Valid for SEV-ES only */
+       u8 reserved_3[112];
        u64 cr4;
        u64 cr3;
        u64 cr0;
@@ -297,7 +298,9 @@ struct vmcb_save_area {
        u64 rip;
        u8 reserved_4[88];
        u64 rsp;
-       u8 reserved_5[24];
+       u64 s_cet;
+       u64 ssp;
+       u64 isst_addr;
        u64 rax;
        u64 star;
        u64 lstar;
@@ -308,29 +311,145 @@ struct vmcb_save_area {
        u64 sysenter_esp;
        u64 sysenter_eip;
        u64 cr2;
-       u8 reserved_6[32];
+       u8 reserved_5[32];
        u64 g_pat;
        u64 dbgctl;
        u64 br_from;
        u64 br_to;
        u64 last_excp_from;
        u64 last_excp_to;
-
-       /*
-        * The following part of the save area is valid only for
-        * SEV-ES guests when referenced through the GHCB or for
-        * saving to the host save area.
-        */
-       u8 reserved_7[72];
+       u8 reserved_6[72];
        u32 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
-       u8 reserved_7b[4];
+} __packed;
+
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct sev_es_save_area {
+       struct vmcb_seg es;
+       struct vmcb_seg cs;
+       struct vmcb_seg ss;
+       struct vmcb_seg ds;
+       struct vmcb_seg fs;
+       struct vmcb_seg gs;
+       struct vmcb_seg gdtr;
+       struct vmcb_seg ldtr;
+       struct vmcb_seg idtr;
+       struct vmcb_seg tr;
+       u64 vmpl0_ssp;
+       u64 vmpl1_ssp;
+       u64 vmpl2_ssp;
+       u64 vmpl3_ssp;
+       u64 u_cet;
+       u8 reserved_1[2];
+       u8 vmpl;
+       u8 cpl;
+       u8 reserved_2[4];
+       u64 efer;
+       u8 reserved_3[104];
+       u64 xss;
+       u64 cr4;
+       u64 cr3;
+       u64 cr0;
+       u64 dr7;
+       u64 dr6;
+       u64 rflags;
+       u64 rip;
+       u64 dr0;
+       u64 dr1;
+       u64 dr2;
+       u64 dr3;
+       u64 dr0_addr_mask;
+       u64 dr1_addr_mask;
+       u64 dr2_addr_mask;
+       u64 dr3_addr_mask;
+       u8 reserved_4[24];
+       u64 rsp;
+       u64 s_cet;
+       u64 ssp;
+       u64 isst_addr;
+       u64 rax;
+       u64 star;
+       u64 lstar;
+       u64 cstar;
+       u64 sfmask;
+       u64 kernel_gs_base;
+       u64 sysenter_cs;
+       u64 sysenter_esp;
+       u64 sysenter_eip;
+       u64 cr2;
+       u8 reserved_5[32];
+       u64 g_pat;
+       u64 dbgctl;
+       u64 br_from;
+       u64 br_to;
+       u64 last_excp_from;
+       u64 last_excp_to;
+       u8 reserved_7[80];
        u32 pkru;
-       u8 reserved_7a[20];
-       u64 reserved_8;         /* rax already available at 0x01f8 */
+       u8 reserved_8[20];
+       u64 reserved_9;         /* rax already available at 0x01f8 */
+       u64 rcx;
+       u64 rdx;
+       u64 rbx;
+       u64 reserved_10;        /* rsp already available at 0x01d8 */
+       u64 rbp;
+       u64 rsi;
+       u64 rdi;
+       u64 r8;
+       u64 r9;
+       u64 r10;
+       u64 r11;
+       u64 r12;
+       u64 r13;
+       u64 r14;
+       u64 r15;
+       u8 reserved_11[16];
+       u64 guest_exit_info_1;
+       u64 guest_exit_info_2;
+       u64 guest_exit_int_info;
+       u64 guest_nrip;
+       u64 sev_features;
+       u64 vintr_ctrl;
+       u64 guest_exit_code;
+       u64 virtual_tom;
+       u64 tlb_id;
+       u64 pcpu_id;
+       u64 event_inj;
+       u64 xcr0;
+       u8 reserved_12[16];
+
+       /* Floating point area */
+       u64 x87_dp;
+       u32 mxcsr;
+       u16 x87_ftw;
+       u16 x87_fsw;
+       u16 x87_fcw;
+       u16 x87_fop;
+       u16 x87_ds;
+       u16 x87_cs;
+       u64 x87_rip;
+       u8 fpreg_x87[80];
+       u8 fpreg_xmm[256];
+       u8 fpreg_ymm[256];
+} __packed;
+
+struct ghcb_save_area {
+       u8 reserved_1[203];
+       u8 cpl;
+       u8 reserved_2[116];
+       u64 xss;
+       u8 reserved_3[24];
+       u64 dr7;
+       u8 reserved_4[16];
+       u64 rip;
+       u8 reserved_5[88];
+       u64 rsp;
+       u8 reserved_6[24];
+       u64 rax;
+       u8 reserved_7[264];
        u64 rcx;
        u64 rdx;
        u64 rbx;
-       u64 reserved_9;         /* rsp already available at 0x01d8 */
+       u8 reserved_8[8];
        u64 rbp;
        u64 rsi;
        u64 rdi;
@@ -342,22 +461,24 @@ struct vmcb_save_area {
        u64 r13;
        u64 r14;
        u64 r15;
-       u8 reserved_10[16];
+       u8 reserved_9[16];
        u64 sw_exit_code;
        u64 sw_exit_info_1;
        u64 sw_exit_info_2;
        u64 sw_scratch;
-       u8 reserved_11[56];
+       u8 reserved_10[56];
        u64 xcr0;
        u8 valid_bitmap[16];
        u64 x87_state_gpa;
 } __packed;
 
+#define GHCB_SHARED_BUF_SIZE   2032
+
 struct ghcb {
-       struct vmcb_save_area save;
-       u8 reserved_save[2048 - sizeof(struct vmcb_save_area)];
+       struct ghcb_save_area save;
+       u8 reserved_save[2048 - sizeof(struct ghcb_save_area)];
 
-       u8 shared_buffer[2032];
+       u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
 
        u8 reserved_1[10];
        u16 protocol_version;   /* negotiated SEV-ES/GHCB protocol version */
@@ -365,13 +486,17 @@ struct ghcb {
 } __packed;
 
 
-#define EXPECTED_VMCB_SAVE_AREA_SIZE           1032
+#define EXPECTED_VMCB_SAVE_AREA_SIZE           740
+#define EXPECTED_GHCB_SAVE_AREA_SIZE           1032
+#define EXPECTED_SEV_ES_SAVE_AREA_SIZE         1648
 #define EXPECTED_VMCB_CONTROL_AREA_SIZE                1024
 #define EXPECTED_GHCB_SIZE                     PAGE_SIZE
 
 static inline void __unused_size_checks(void)
 {
        BUILD_BUG_ON(sizeof(struct vmcb_save_area)      != EXPECTED_VMCB_SAVE_AREA_SIZE);
+       BUILD_BUG_ON(sizeof(struct ghcb_save_area)      != EXPECTED_GHCB_SAVE_AREA_SIZE);
+       BUILD_BUG_ON(sizeof(struct sev_es_save_area)    != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
        BUILD_BUG_ON(sizeof(struct vmcb_control_area)   != EXPECTED_VMCB_CONTROL_AREA_SIZE);
        BUILD_BUG_ON(sizeof(struct ghcb)                != EXPECTED_GHCB_SIZE);
 }
@@ -441,26 +566,26 @@ struct vmcb {
 /* GHCB Accessor functions */
 
 #define GHCB_BITMAP_IDX(field)                                                 \
-       (offsetof(struct vmcb_save_area, field) / sizeof(u64))
+       (offsetof(struct ghcb_save_area, field) / sizeof(u64))
 
 #define DEFINE_GHCB_ACCESSORS(field)                                           \
-       static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb)     \
+       static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
        {                                                                       \
                return test_bit(GHCB_BITMAP_IDX(field),                         \
                                (unsigned long *)&ghcb->save.valid_bitmap);     \
        }                                                                       \
                                                                                \
-       static inline u64 ghcb_get_##field(struct ghcb *ghcb)                   \
+       static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb)          \
        {                                                                       \
                return ghcb->save.field;                                        \
        }                                                                       \
                                                                                \
-       static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb)        \
+       static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
        {                                                                       \
                return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0;    \
        }                                                                       \
                                                                                \
-       static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value)       \
+       static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
        {                                                                       \
                __set_bit(GHCB_BITMAP_IDX(field),                               \
                          (unsigned long *)&ghcb->save.valid_bitmap);           \
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
new file mode 100644 (file)
index 0000000..020c81a
--- /dev/null
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022 Intel Corporation */
+#ifndef _ASM_X86_TDX_H
+#define _ASM_X86_TDX_H
+
+#include <linux/init.h>
+#include <linux/bits.h>
+#include <asm/ptrace.h>
+#include <asm/shared/tdx.h>
+
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR                      _BITUL(63)
+#define TDX_SW_ERROR                   (TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID     (TDX_SW_ERROR | _UL(0xFFFF0000))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used to gather the output registers values of the TDCALL and SEAMCALL
+ * instructions when requesting services from the TDX module.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_module_output {
+       u64 rcx;
+       u64 rdx;
+       u64 r8;
+       u64 r9;
+       u64 r10;
+       u64 r11;
+};
+
+/*
+ * Used by the #VE exception handler to gather the #VE exception
+ * info from the TDX module. This is a software only structure
+ * and not part of the TDX module/VMM ABI.
+ */
+struct ve_info {
+       u64 exit_reason;
+       u64 exit_qual;
+       /* Guest Linear (virtual) Address */
+       u64 gla;
+       /* Guest Physical Address */
+       u64 gpa;
+       u32 instr_len;
+       u32 instr_info;
+};
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+void __init tdx_early_init(void);
+
+/* Used to communicate with the TDX module */
+u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+                     struct tdx_module_output *out);
+
+void tdx_get_ve_info(struct ve_info *ve);
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+void tdx_safe_halt(void);
+
+bool tdx_early_handle_ve(struct pt_regs *regs);
+
+#else
+
+static inline void tdx_early_init(void) { };
+static inline void tdx_safe_halt(void) { };
+
+static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST)
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+                      unsigned long p3, unsigned long p4);
+#else
+static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
+                                    unsigned long p2, unsigned long p3,
+                                    unsigned long p4)
+{
+       return -ENODEV;
+}
+#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_TDX_H */
index ebec69c35e951c721d2640d74610cc29f2e8fbff..f0cb881c1d690859feac72cc92afdd9abe9d0b3a 100644 (file)
@@ -92,7 +92,6 @@ struct thread_info {
 #define TIF_NOCPUID            15      /* CPUID is not accessible in userland */
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
 #define TIF_NOTIFY_SIGNAL      17      /* signal notifications exist */
-#define TIF_SLD                        18      /* Restore split lock detection on context switch */
 #define TIF_MEMDIE             20      /* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG     21      /* idle is polling for TIF_NEED_RESCHED */
 #define TIF_IO_BITMAP          22      /* uses I/O bitmap */
@@ -116,7 +115,6 @@ struct thread_info {
 #define _TIF_NOCPUID           (1 << TIF_NOCPUID)
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
 #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_SLD               (1 << TIF_SLD)
 #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP         (1 << TIF_IO_BITMAP)
 #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
@@ -128,7 +126,7 @@ struct thread_info {
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE                                   \
        (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |           \
-        _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
+        _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
 
 /*
  * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
index a4a8b1b16c0c100d830124984b313f923531bd84..956e4145311b12744ad78df053e991308cfe03ed 100644 (file)
@@ -5,6 +5,15 @@
 #include <asm/processor.h>
 #include <asm/tsc.h>
 
+static inline unsigned long random_get_entropy(void)
+{
+       if (!IS_ENABLED(CONFIG_X86_TSC) &&
+           !cpu_feature_enabled(X86_FEATURE_TSC))
+               return random_get_entropy_fallback();
+       return rdtsc();
+}
+#define random_get_entropy random_get_entropy
+
 /* Assume we use the PIT time source for the clock tick */
 #define CLOCK_TICK_RATE                PIT_TICK_RATE
 
index 9619385bf7494b947330bb1203e30207300bb029..458c891a82736549469c2c09b38848f18634412a 100644 (file)
@@ -212,30 +212,19 @@ static inline long arch_scale_freq_capacity(int cpu)
 }
 #define arch_scale_freq_capacity arch_scale_freq_capacity
 
-extern void arch_scale_freq_tick(void);
-#define arch_scale_freq_tick arch_scale_freq_tick
-
 extern void arch_set_max_freq_ratio(bool turbo_disabled);
-void init_freq_invariance(bool secondary, bool cppc_ready);
+extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
 #else
-static inline void arch_set_max_freq_ratio(bool turbo_disabled)
-{
-}
-static inline void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-}
+static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
 #endif
 
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
 #ifdef CONFIG_ACPI_CPPC_LIB
 void init_freq_invariance_cppc(void);
 #define arch_init_invariance_cppc init_freq_invariance_cppc
-
-bool amd_set_max_freq_ratio(u64 *ratio);
-#else
-static inline bool amd_set_max_freq_ratio(u64 *ratio)
-{
-       return false;
-}
 #endif
 
 #endif /* _ASM_X86_TOPOLOGY_H */
index 35317c5c551d98eee0fd664ccfcfcfbb7e8cd92a..47ecfff2c83dade6d993461c1b9c7494a05ffb28 100644 (file)
@@ -13,7 +13,7 @@
 #ifdef CONFIG_X86_64
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
 void __init trap_init(void);
 asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
 #endif
index 01a300a9700b9bb4ac352fcd9c65593a8b762567..fbdc3d951494358faccee071be1ceb545257dcdb 100644 (file)
@@ -20,13 +20,12 @@ extern void disable_TSC(void);
 
 static inline cycles_t get_cycles(void)
 {
-#ifndef CONFIG_X86_TSC
-       if (!boot_cpu_has(X86_FEATURE_TSC))
+       if (!IS_ENABLED(CONFIG_X86_TSC) &&
+           !cpu_feature_enabled(X86_FEATURE_TSC))
                return 0;
-#endif
-
        return rdtsc();
 }
+#define get_cycles get_cycles
 
 extern struct system_counterval_t convert_art_to_tsc(u64 art);
 extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
index 7ee7ba0d63a352c1c22a36b82bcbc0510cfc9570..769b939444aef0911b3516d63c798fb40b8ee8e7 100644 (file)
@@ -31,9 +31,22 @@ enum hsmp_message_ids {
        HSMP_GET_CCLK_THROTTLE_LIMIT,   /* 10h Get CCLK frequency limit in socket */
        HSMP_GET_C0_PERCENT,            /* 11h Get average C0 residency in socket */
        HSMP_SET_NBIO_DPM_LEVEL,        /* 12h Set max/min LCLK DPM Level for a given NBIO */
-                                       /* 13h Reserved */
-       HSMP_GET_DDR_BANDWIDTH = 0x14,  /* 14h Get theoretical maximum and current DDR Bandwidth */
-       HSMP_GET_TEMP_MONITOR,          /* 15h Get per-DIMM temperature and refresh rates */
+       HSMP_GET_NBIO_DPM_LEVEL,        /* 13h Get LCLK DPM level min and max for a given NBIO */
+       HSMP_GET_DDR_BANDWIDTH,         /* 14h Get theoretical maximum and current DDR Bandwidth */
+       HSMP_GET_TEMP_MONITOR,          /* 15h Get socket temperature */
+       HSMP_GET_DIMM_TEMP_RANGE,       /* 16h Get per-DIMM temperature range and refresh rate */
+       HSMP_GET_DIMM_POWER,            /* 17h Get per-DIMM power consumption */
+       HSMP_GET_DIMM_THERMAL,          /* 18h Get per-DIMM thermal sensors */
+       HSMP_GET_SOCKET_FREQ_LIMIT,     /* 19h Get current active frequency per socket */
+       HSMP_GET_CCLK_CORE_LIMIT,       /* 1Ah Get CCLK frequency limit per core */
+       HSMP_GET_RAILS_SVI,             /* 1Bh Get SVI-based Telemetry for all rails */
+       HSMP_GET_SOCKET_FMAX_FMIN,      /* 1Ch Get Fmax and Fmin per socket */
+       HSMP_GET_IOLINK_BANDWITH,       /* 1Dh Get current bandwidth on IO Link */
+       HSMP_GET_XGMI_BANDWITH,         /* 1Eh Get current bandwidth on xGMI Link */
+       HSMP_SET_GMI3_WIDTH,            /* 1Fh Set max and min GMI3 Link width */
+       HSMP_SET_PCI_RATE,              /* 20h Control link rate on PCIe devices */
+       HSMP_SET_POWER_MODE,            /* 21h Select power efficiency profile policy */
+       HSMP_SET_PSTATE_MAX_MIN,        /* 22h Set the max and min DF P-State  */
        HSMP_MSG_ID_MAX,
 };
 
@@ -175,8 +188,12 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
         */
        {1, 0, HSMP_SET},
 
-       /* RESERVED message */
-       {0, 0, HSMP_RSVD},
+       /*
+        * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
+        * input: args[0] = nbioid[23:16]
+        * output: args[0] = max dpm level[15:8] + min dpm level[7:0]
+        */
+       {1, 1, HSMP_GET},
 
        /*
         * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
@@ -191,6 +208,93 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
         * [7:5] fractional part
         */
        {0, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
+        * input: args[0] = DIMM address[7:0]
+        * output: args[0] = refresh rate[3] + temperature range[2:0]
+        */
+       {1, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
+        * input: args[0] = DIMM address[7:0]
+        * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
+        * DIMM address[7:0]
+        */
+       {1, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
+        * input: args[0] = DIMM address[7:0]
+        * output: args[0] = temperature in degree celcius[31:21] + update rate in ms[16:8] +
+        * DIMM address[7:0]
+        */
+       {1, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
+        * output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
+        */
+       {0, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
+        * input: args[0] = apic id [31:0]
+        * output: args[0] = frequency in MHz[31:0]
+        */
+       {1, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
+        * output: args[0] = power in mW[31:0]
+        */
+       {0, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
+        * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
+        */
+       {0, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
+        * input: args[0] = link id[15:8] + bw type[2:0]
+        * output: args[0] = io bandwidth in Mbps[31:0]
+        */
+       {1, 1, HSMP_GET},
+
+       /*
+        * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
+        * input: args[0] = link id[15:8] + bw type[2:0]
+        * output: args[0] = xgmi bandwidth in Mbps[31:0]
+        */
+       {1, 1, HSMP_GET},
+
+       /*
+        * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
+        * input: args[0] = min link width[15:8] + max link width[7:0]
+        */
+       {1, 0, HSMP_SET},
+
+       /*
+        * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
+        * input: args[0] = link rate control value
+        * output: args[0] = previous link rate control value
+        */
+       {1, 1, HSMP_SET},
+
+       /*
+        * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
+        * input: args[0] = power efficiency mode[2:0]
+        */
+       {1, 0, HSMP_SET},
+
+       /*
+        * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
+        * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
+        */
+       {1, 0, HSMP_SET},
 };
 
 /* Reset to default packing */
index b25d3f82c2f36a9a210863d0540f2adc5744238a..bea5cdcdf53252bf4fed4199ee2c7b11a48416ac 100644 (file)
@@ -10,6 +10,7 @@
 #define SETUP_EFI                      4
 #define SETUP_APPLE_PROPERTIES         5
 #define SETUP_JAILHOUSE                        6
+#define SETUP_CC_BLOB                  7
 
 #define SETUP_INDIRECT                 (1<<31)
 
@@ -187,7 +188,8 @@ struct boot_params {
        __u32 ext_ramdisk_image;                        /* 0x0c0 */
        __u32 ext_ramdisk_size;                         /* 0x0c4 */
        __u32 ext_cmd_line_ptr;                         /* 0x0c8 */
-       __u8  _pad4[116];                               /* 0x0cc */
+       __u8  _pad4[112];                               /* 0x0cc */
+       __u32 cc_blob_address;                          /* 0x13c */
        struct edid_info edid_info;                     /* 0x140 */
        struct efi_info efi_info;                       /* 0x1c0 */
        __u32 alt_mem_k;                                /* 0x1e0 */
index efa969325ede55d39a3b86b7e72eb2f4b62f78a2..f69c168391aa58cf280ba6fb5aca6630e3593d1a 100644 (file)
 #define SVM_VMGEXIT_AP_JUMP_TABLE              0x80000005
 #define SVM_VMGEXIT_SET_AP_JUMP_TABLE          0
 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE          1
+#define SVM_VMGEXIT_PSC                                0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST              0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST          0x80000012
+#define SVM_VMGEXIT_AP_CREATION                        0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT          0
+#define SVM_VMGEXIT_AP_CREATE                  1
+#define SVM_VMGEXIT_AP_DESTROY                 2
+#define SVM_VMGEXIT_HV_FEATURES                        0x8000fffd
 #define SVM_VMGEXIT_UNSUPPORTED_EVENT          0x8000ffff
 
 /* Exit code reserved for hypervisor/software use */
        { SVM_VMGEXIT_NMI_COMPLETE,     "vmgexit_nmi_complete" }, \
        { SVM_VMGEXIT_AP_HLT_LOOP,      "vmgexit_ap_hlt_loop" }, \
        { SVM_VMGEXIT_AP_JUMP_TABLE,    "vmgexit_ap_jump_table" }, \
+       { SVM_VMGEXIT_PSC,              "vmgexit_page_state_change" }, \
+       { SVM_VMGEXIT_GUEST_REQUEST,    "vmgexit_guest_request" }, \
+       { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+       { SVM_VMGEXIT_AP_CREATION,      "vmgexit_ap_creation" }, \
+       { SVM_VMGEXIT_HV_FEATURES,      "vmgexit_hypervisor_feature" }, \
        { SVM_EXIT_ERR,         "invalid_guest_state" }
 
 
index c41ef42adbe8a3b13d10442b3d3b590b1ac40836..1a2dc328cb5ee82f465a3e230b9712285d6344e9 100644 (file)
@@ -46,8 +46,6 @@ endif
 # non-deterministic coverage.
 KCOV_INSTRUMENT                := n
 
-CFLAGS_head$(BITS).o   += -fno-stack-protector
-
 CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
 
 obj-y                  := process_$(BITS).o signal.o
index 0d01e7f5078c25a6b9d1c933691cebe3b6b30240..907cc98b1938084409350e5864b84558905b2846 100644 (file)
@@ -65,6 +65,13 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 static bool acpi_support_online_capable;
 #endif
 
+#ifdef CONFIG_X86_64
+/* Physical address of the Multiprocessor Wakeup Structure mailbox */
+static u64 acpi_mp_wake_mailbox_paddr;
+/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
+static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
 /*
  * Locks related to IOAPIC hotplug
@@ -336,7 +343,60 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e
        return 0;
 }
 
-#endif                         /*CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_X86_64
+static int acpi_wakeup_cpu(int apicid, unsigned long start_ip)
+{
+       /*
+        * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
+        *
+        * Wakeup of secondary CPUs is fully serialized in the core code.
+        * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
+        */
+       if (!acpi_mp_wake_mailbox) {
+               acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
+                                               sizeof(*acpi_mp_wake_mailbox),
+                                               MEMREMAP_WB);
+       }
+
+       /*
+        * Mailbox memory is shared between the firmware and OS. Firmware will
+        * listen on mailbox command address, and once it receives the wakeup
+        * command, the CPU associated with the given apicid will be booted.
+        *
+        * The value of 'apic_id' and 'wakeup_vector' must be visible to the
+        * firmware before the wakeup command is visible.  smp_store_release()
+        * ensures ordering and visibility.
+        */
+       acpi_mp_wake_mailbox->apic_id       = apicid;
+       acpi_mp_wake_mailbox->wakeup_vector = start_ip;
+       smp_store_release(&acpi_mp_wake_mailbox->command,
+                         ACPI_MP_WAKE_COMMAND_WAKEUP);
+
+       /*
+        * Wait for the CPU to wake up.
+        *
+        * The CPU being woken up is essentially in a spin loop waiting to be
+        * woken up. It should not take long for it wake up and acknowledge by
+        * zeroing out ->command.
+        *
+        * ACPI specification doesn't provide any guidance on how long kernel
+        * has to wait for a wake up acknowledgement. It also doesn't provide
+        * a way to cancel a wake up request if it takes too long.
+        *
+        * In TDX environment, the VMM has control over how long it takes to
+        * wake up secondary. It can postpone scheduling secondary vCPU
+        * indefinitely. Giving up on wake up request and reporting error opens
+        * possible attack vector for VMM: it can wake up a secondary CPU when
+        * kernel doesn't expect it. Wait until positive result of the wake up
+        * request.
+        */
+       while (READ_ONCE(acpi_mp_wake_mailbox->command))
+               cpu_relax();
+
+       return 0;
+}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_IO_APIC
 #define MP_ISA_BUS             0
@@ -375,7 +435,7 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
        isa_irq_to_gsi[bus_irq] = gsi;
 }
 
-static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+static void mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
                        int polarity)
 {
 #ifdef CONFIG_X86_MPPARSE
@@ -387,9 +447,9 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
        u8 pin;
 
        if (!acpi_ioapic)
-               return 0;
+               return;
        if (!dev || !dev_is_pci(dev))
-               return 0;
+               return;
 
        pdev = to_pci_dev(dev);
        number = pdev->bus->number;
@@ -408,7 +468,6 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
 
        mp_save_irq(&mp_irq);
 #endif
-       return 0;
 }
 
 static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
@@ -1083,6 +1142,29 @@ static int __init acpi_parse_madt_lapic_entries(void)
        }
        return 0;
 }
+
+#ifdef CONFIG_X86_64
+static int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+                                    const unsigned long end)
+{
+       struct acpi_madt_multiproc_wakeup *mp_wake;
+
+       if (!IS_ENABLED(CONFIG_SMP))
+               return -ENODEV;
+
+       mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
+       if (BAD_MADT_ENTRY(mp_wake, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(&header->common);
+
+       acpi_mp_wake_mailbox_paddr = mp_wake->base_address;
+
+       acpi_wake_cpu_handler_update(acpi_wakeup_cpu);
+
+       return 0;
+}
+#endif                         /* CONFIG_X86_64 */
 #endif                         /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_IO_APIC
@@ -1278,6 +1360,14 @@ static void __init acpi_process_madt(void)
 
                                smp_found_config = 1;
                        }
+
+#ifdef CONFIG_X86_64
+                       /*
+                        * Parse MADT MP Wake entry.
+                        */
+                       acpi_table_parse_madt(ACPI_MADT_TYPE_MULTIPROC_WAKEUP,
+                                             acpi_parse_mp_wake, 1);
+#endif
                }
                if (error == -EINVAL) {
                        /*
@@ -1772,7 +1862,7 @@ int __acpi_release_global_lock(unsigned int *lock)
 
 void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 {
-       e820__range_add(addr, size, E820_TYPE_ACPI);
+       e820__range_add(addr, size, E820_TYPE_NVS);
        e820__update_table_print();
 }
 
index df1644d9b3b66c635a8bf99241c3fd15be97fcbd..8b8cbf22461a4feff2249a2d731b538a31c30501 100644 (file)
@@ -50,20 +50,17 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
        return err;
 }
 
-bool amd_set_max_freq_ratio(u64 *ratio)
+static void amd_set_max_freq_ratio(void)
 {
        struct cppc_perf_caps perf_caps;
        u64 highest_perf, nominal_perf;
        u64 perf_ratio;
        int rc;
 
-       if (!ratio)
-               return false;
-
        rc = cppc_get_perf_caps(0, &perf_caps);
        if (rc) {
                pr_debug("Could not retrieve perf counters (%d)\n", rc);
-               return false;
+               return;
        }
 
        highest_perf = amd_get_highest_perf();
@@ -71,7 +68,7 @@ bool amd_set_max_freq_ratio(u64 *ratio)
 
        if (!highest_perf || !nominal_perf) {
                pr_debug("Could not retrieve highest or nominal performance\n");
-               return false;
+               return;
        }
 
        perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
@@ -79,25 +76,27 @@ bool amd_set_max_freq_ratio(u64 *ratio)
        perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
        if (!perf_ratio) {
                pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
-               return false;
+               return;
        }
 
-       *ratio = perf_ratio;
-       arch_set_max_freq_ratio(false);
-
-       return true;
+       freq_invariance_set_perf_ratio(perf_ratio, false);
 }
 
 static DEFINE_MUTEX(freq_invariance_lock);
 
 void init_freq_invariance_cppc(void)
 {
-       static bool secondary;
+       static bool init_done;
 
-       mutex_lock(&freq_invariance_lock);
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               return;
 
-       init_freq_invariance(secondary, true);
-       secondary = true;
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+               return;
 
+       mutex_lock(&freq_invariance_lock);
+       if (!init_done)
+               amd_set_max_freq_ratio();
+       init_done = true;
        mutex_unlock(&freq_invariance_lock);
 }
index d374cb3cf024c2bae7cba1ac42cce15d119dcda3..3c66073e7645ab83784e35f5cd6135cdb2aaac9c 100644 (file)
@@ -338,7 +338,7 @@ next:
        }
 }
 
-#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL)
 
 /*
  * CALL/JMP *%\reg
@@ -507,11 +507,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
        }
 }
 
-#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
 
 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
 
-#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
 
 #ifdef CONFIG_X86_KERNEL_IBT
 
index 020c906f79349548d1d1ed5ddf101e1c83293c85..190e0f76337559f27e3b42f246f4b06f12a23ddc 100644 (file)
@@ -188,7 +188,7 @@ int amd_smn_write(u16 node, u32 address, u32 value)
 EXPORT_SYMBOL_GPL(amd_smn_write);
 
 
-int amd_cache_northbridges(void)
+static int amd_cache_northbridges(void)
 {
        const struct pci_device_id *misc_ids = amd_nb_misc_ids;
        const struct pci_device_id *link_ids = amd_nb_link_ids;
@@ -210,14 +210,14 @@ int amd_cache_northbridges(void)
        }
 
        misc = NULL;
-       while ((misc = next_northbridge(misc, misc_ids)) != NULL)
+       while ((misc = next_northbridge(misc, misc_ids)))
                misc_count++;
 
        if (!misc_count)
                return -ENODEV;
 
        root = NULL;
-       while ((root = next_northbridge(root, root_ids)) != NULL)
+       while ((root = next_northbridge(root, root_ids)))
                root_count++;
 
        if (root_count) {
@@ -290,7 +290,6 @@ int amd_cache_northbridges(void)
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(amd_cache_northbridges);
 
 /*
  * Ignores subdevice/subvendor but as far as I can figure out
index b70344bf660083bbeb998bc0b74737a21047d46b..189d3a5e471adc43c44b7d6aa8d2805a0a9a475b 100644 (file)
@@ -170,7 +170,7 @@ static __init int setup_apicpmtimer(char *s)
 {
        apic_calibrate_pmtmr = 1;
        notsc_setup(NULL);
-       return 0;
+       return 1;
 }
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
@@ -320,6 +320,9 @@ int lapic_get_maxlvt(void)
 #define APIC_DIVISOR 16
 #define TSC_DIVISOR  8
 
+/* i82489DX specific */
+#define                I82489DX_BASE_DIVIDER           (((0x2) << 18))
+
 /*
  * This function sets up the local APIC timer, with a timeout of
  * 'clocks' APIC bus clock. During calibration we actually call
@@ -340,8 +343,14 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
        else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
                lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
 
+       /*
+        * The i82489DX APIC uses bit 18 and 19 for the base divider.  This
+        * overlaps with bit 18 on integrated APICs, but is not documented
+        * in the SDM. No problem though. i82489DX equipped systems do not
+        * have TSC deadline timer.
+        */
        if (!lapic_is_integrated())
-               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+               lvtt_value |= I82489DX_BASE_DIVIDER;
 
        if (!irqen)
                lvtt_value |= APIC_LVT_MASKED;
@@ -1419,22 +1428,21 @@ void __init apic_intr_mode_init(void)
                return;
        case APIC_VIRTUAL_WIRE:
                pr_info("APIC: Switch to virtual wire mode setup\n");
-               default_setup_apic_routing();
                break;
        case APIC_VIRTUAL_WIRE_NO_CONFIG:
                pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
                upmode = true;
-               default_setup_apic_routing();
                break;
        case APIC_SYMMETRIC_IO:
                pr_info("APIC: Switch to symmetric I/O mode setup\n");
-               default_setup_apic_routing();
                break;
        case APIC_SYMMETRIC_IO_NO_ROUTING:
                pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
                break;
        }
 
+       default_setup_apic_routing();
+
        if (x86_platform.apic_post_init)
                x86_platform.apic_post_init();
 
@@ -2551,6 +2559,16 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
 }
 EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
 
+#ifdef CONFIG_X86_64
+void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler)
+{
+       struct apic **drv;
+
+       for (drv = __apicdrivers; drv < __apicdrivers_end; drv++)
+               (*drv)->wakeup_secondary_cpu_64 = handler;
+}
+#endif
+
 /*
  * Override the generic EOI implementation with an optimized version.
  * Only called during early boot when only one CPU is active and with
index c1bb384935b0555597c2fa500cbef9141d895c83..a868b76cd3d4241eea784dc2c1486d5bbba1cd11 100644 (file)
@@ -65,6 +65,7 @@
 #include <asm/irq_remapping.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/pgtable.h>
 
 #define        for_each_ioapic(idx)            \
        for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
@@ -2677,6 +2678,19 @@ static struct resource * __init ioapic_setup_resources(void)
        return res;
 }
 
+static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+{
+       pgprot_t flags = FIXMAP_PAGE_NOCACHE;
+
+       /*
+        * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot
+        * bits, just like normal ioremap():
+        */
+       flags = pgprot_decrypted(flags);
+
+       __set_fixmap(idx, phys, flags);
+}
+
 void __init io_apic_init_mappings(void)
 {
        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
@@ -2709,7 +2723,7 @@ fake_ioapic_page:
                                      __func__, PAGE_SIZE, PAGE_SIZE);
                        ioapic_phys = __pa(ioapic_phys);
                }
-               set_fixmap_nocache(idx, ioapic_phys);
+               io_apic_set_fixmap(idx, ioapic_phys);
                apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
                        __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
                        ioapic_phys);
@@ -2838,7 +2852,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
        ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
        ioapics[idx].mp_config.apicaddr = address;
 
-       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+       io_apic_set_fixmap(FIX_IO_APIC_BASE_0 + idx, address);
        if (bad_ioapic_register(idx)) {
                clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
                return -ENODEV;
index f5a48e66e4f5462f83c4a76e89554377372964c6..48285522796454b4b368e8ff1c0287e4bef7d574 100644 (file)
@@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void)
        int mmr_shift;
        char *state;
 
-       /* Different returns from different UV BIOS versions */
+       /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */
+       if (!is_uv(UV2|UV3|UV4)) {
+               mark_tsc_async_resets("UV5+");
+               return;
+       }
+
+       /* UV2,3,4, UV BIOS TSC sync state available */
        mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
        mmr_shift =
                is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
@@ -1340,7 +1346,7 @@ static void __init decode_gam_params(unsigned long ptr)
 static void __init decode_gam_rng_tbl(unsigned long ptr)
 {
        struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
-       unsigned long lgre = 0;
+       unsigned long lgre = 0, gend = 0;
        int index = 0;
        int sock_min = 999999, pnode_min = 99999;
        int sock_max = -1, pnode_max = -1;
@@ -1374,6 +1380,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
                        flag, size, suffix[order],
                        gre->type, gre->nasid, gre->sockid, gre->pnode);
 
+               if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+                       gend = (unsigned long)gre->limit << UV_GAM_RANGE_SHFT;
+
                /* update to next range start */
                lgre = gre->limit;
                if (sock_min > gre->sockid)
@@ -1391,7 +1400,8 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
        _max_pnode      = pnode_max;
        _gr_table_len   = index;
 
-       pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode);
+       pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x), pnodes(min:%x,max:%x), gap_end(%d)\n",
+         index, _min_socket, _max_socket, _min_pnode, _max_pnode, fls64(gend));
 }
 
 /* Walk through UVsystab decoding the fields */
index 9fb0a2f8b62a2da72cd1915eb73ea6a5880984a1..437308004ef2e4f1345947ce318c62bc56a6036f 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/bootparam.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
+#include <asm/tdx.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -65,6 +66,22 @@ static void __used common(void)
        OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
 #endif
 
+       BLANK();
+       OFFSET(TDX_MODULE_rcx, tdx_module_output, rcx);
+       OFFSET(TDX_MODULE_rdx, tdx_module_output, rdx);
+       OFFSET(TDX_MODULE_r8,  tdx_module_output, r8);
+       OFFSET(TDX_MODULE_r9,  tdx_module_output, r9);
+       OFFSET(TDX_MODULE_r10, tdx_module_output, r10);
+       OFFSET(TDX_MODULE_r11, tdx_module_output, r11);
+
+       BLANK();
+       OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10);
+       OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11);
+       OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12);
+       OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
+       OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
+       OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+
        BLANK();
        OFFSET(BP_scratch, boot_params, scratch);
        OFFSET(BP_secure_boot, boot_params, secure_boot);
index 9ca008f9e9b1ad8449b0053ab4a99e51e7c30bf7..1f60a2b279368e94c79f64ea43f736baf966f39f 100644 (file)
  * Copyright (C) 2017 Intel Corp.
  * Author: Len Brown <len.brown@intel.com>
  */
-
+#include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/ktime.h>
 #include <linux/math64.h>
 #include <linux/percpu.h>
-#include <linux/cpufreq.h>
-#include <linux/smp.h>
-#include <linux/sched/isolation.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/isolation.h>
+#include <linux/sched/topology.h>
+#include <linux/smp.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 
 #include "cpu.h"
 
-struct aperfmperf_sample {
-       unsigned int    khz;
-       atomic_t        scfpending;
-       ktime_t time;
-       u64     aperf;
-       u64     mperf;
+struct aperfmperf {
+       seqcount_t      seq;
+       unsigned long   last_update;
+       u64             acnt;
+       u64             mcnt;
+       u64             aperf;
+       u64             mperf;
 };
 
-static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
+       .seq = SEQCNT_ZERO(cpu_samples.seq)
+};
 
-#define APERFMPERF_CACHE_THRESHOLD_MS  10
-#define APERFMPERF_REFRESH_DELAY_MS    10
-#define APERFMPERF_STALE_THRESHOLD_MS  1000
+static void init_counter_refs(void)
+{
+       u64 aperf, mperf;
+
+       rdmsrl(MSR_IA32_APERF, aperf);
+       rdmsrl(MSR_IA32_MPERF, mperf);
 
+       this_cpu_write(cpu_samples.aperf, aperf);
+       this_cpu_write(cpu_samples.mperf, mperf);
+}
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
 /*
- * aperfmperf_snapshot_khz()
- * On the current CPU, snapshot APERF, MPERF, and jiffies
- * unless we already did it within 10ms
- * calculate kHz, save snapshot
+ * APERF/MPERF frequency ratio computation.
+ *
+ * The scheduler wants to do frequency invariant accounting and needs a <1
+ * ratio to account for the 'current' frequency, corresponding to
+ * freq_curr / freq_max.
+ *
+ * Since the frequency freq_curr on x86 is controlled by micro-controller and
+ * our P-state setting is little more than a request/hint, we need to observe
+ * the effective frequency 'BusyMHz', i.e. the average frequency over a time
+ * interval after discarding idle time. This is given by:
+ *
+ *   BusyMHz = delta_APERF / delta_MPERF * freq_base
+ *
+ * where freq_base is the max non-turbo P-state.
+ *
+ * The freq_max term has to be set to a somewhat arbitrary value, because we
+ * can't know which turbo states will be available at a given point in time:
+ * it all depends on the thermal headroom of the entire package. We set it to
+ * the turbo level with 4 cores active.
+ *
+ * Benchmarks show that's a good compromise between the 1C turbo ratio
+ * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
+ * which would ignore the entire turbo range (a conspicuous part, making
+ * freq_curr/freq_max always maxed out).
+ *
+ * An exception to the heuristic above is the Atom uarch, where we choose the
+ * highest turbo level for freq_max since Atom's are generally oriented towards
+ * power efficiency.
+ *
+ * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
+ * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
  */
-static void aperfmperf_snapshot_khz(void *dummy)
+
+DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
+static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
+
+void arch_set_max_freq_ratio(bool turbo_disabled)
 {
-       u64 aperf, aperf_delta;
-       u64 mperf, mperf_delta;
-       struct aperfmperf_sample *s = this_cpu_ptr(&samples);
-       unsigned long flags;
+       arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
+                                       arch_turbo_freq_ratio;
+}
+EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
 
-       local_irq_save(flags);
-       rdmsrl(MSR_IA32_APERF, aperf);
-       rdmsrl(MSR_IA32_MPERF, mperf);
-       local_irq_restore(flags);
+static bool __init turbo_disabled(void)
+{
+       u64 misc_en;
+       int err;
+
+       err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
+       if (err)
+               return false;
+
+       return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
+}
+
+static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+       int err;
+
+       err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
+       if (err)
+               return false;
+
+       err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
+       *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
+
+       return true;
+}
+
+#define X86_MATCH(model)                                       \
+       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,            \
+               INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
+
+static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
+       X86_MATCH(XEON_PHI_KNL),
+       X86_MATCH(XEON_PHI_KNM),
+       {}
+};
+
+static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
+       X86_MATCH(SKYLAKE_X),
+       {}
+};
+
+static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
+       X86_MATCH(ATOM_GOLDMONT),
+       X86_MATCH(ATOM_GOLDMONT_D),
+       X86_MATCH(ATOM_GOLDMONT_PLUS),
+       {}
+};
+
+static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
+                                         int num_delta_fratio)
+{
+       int fratio, delta_fratio, found;
+       int err, i;
+       u64 msr;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+       if (err)
+               return false;
+
+       fratio = (msr >> 8) & 0xFF;
+       i = 16;
+       found = 0;
+       do {
+               if (found >= num_delta_fratio) {
+                       *turbo_freq = fratio;
+                       return true;
+               }
+
+               delta_fratio = (msr >> (i + 5)) & 0x7;
+
+               if (delta_fratio) {
+                       found += 1;
+                       fratio -= delta_fratio;
+               }
+
+               i += 8;
+       } while (i < 64);
+
+       return true;
+}
+
+static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
+{
+       u64 ratios, counts;
+       u32 group_size;
+       int err, i;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
+       if (err)
+               return false;
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
+       if (err)
+               return false;
+
+       for (i = 0; i < 64; i += 8) {
+               group_size = (counts >> i) & 0xFF;
+               if (group_size >= size) {
+                       *turbo_freq = (ratios >> i) & 0xFF;
+                       return true;
+               }
+       }
+
+       return false;
+}
 
-       aperf_delta = aperf - s->aperf;
-       mperf_delta = mperf - s->mperf;
+static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+       u64 msr;
+       int err;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+       if (err)
+               return false;
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
+       *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
+
+       /* The CPU may have less than 4 cores */
+       if (!*turbo_freq)
+               *turbo_freq = msr & 0xFF;         /* 1C turbo    */
+
+       return true;
+}
+
+static bool __init intel_set_max_freq_ratio(void)
+{
+       u64 base_freq, turbo_freq;
+       u64 turbo_ratio;
 
+       if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
+               goto out;
+
+       if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
+           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+               goto out;
+
+       if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
+           knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+               goto out;
+
+       if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
+           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
+               goto out;
+
+       if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
+               goto out;
+
+       return false;
+
+out:
        /*
-        * There is no architectural guarantee that MPERF
-        * increments faster than we can read it.
+        * Some hypervisors advertise X86_FEATURE_APERFMPERF
+        * but then fill all MSR's with zeroes.
+        * Some CPUs have turbo boost but don't declare any turbo ratio
+        * in MSR_TURBO_RATIO_LIMIT.
         */
-       if (mperf_delta == 0)
-               return;
+       if (!base_freq || !turbo_freq) {
+               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
+               return false;
+       }
 
-       s->time = ktime_get();
-       s->aperf = aperf;
-       s->mperf = mperf;
-       s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
-       atomic_set_release(&s->scfpending, 0);
+       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
+       if (!turbo_ratio) {
+               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
+               return false;
+       }
+
+       arch_turbo_freq_ratio = turbo_ratio;
+       arch_set_max_freq_ratio(turbo_disabled());
+
+       return true;
 }
 
-static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+#ifdef CONFIG_PM_SLEEP
+static struct syscore_ops freq_invariance_syscore_ops = {
+       .resume = init_counter_refs,
+};
+
+static void register_freq_invariance_syscore_ops(void)
 {
-       s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
-       struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
+       register_syscore_ops(&freq_invariance_syscore_ops);
+}
+#else
+static inline void register_freq_invariance_syscore_ops(void) {}
+#endif
 
-       /* Don't bother re-computing within the cache threshold time. */
-       if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
-               return true;
+static void freq_invariance_enable(void)
+{
+       if (static_branch_unlikely(&arch_scale_freq_key)) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+       static_branch_enable(&arch_scale_freq_key);
+       register_freq_invariance_syscore_ops();
+       pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
+}
+
+void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
+{
+       arch_turbo_freq_ratio = ratio;
+       arch_set_max_freq_ratio(turbo_disabled);
+       freq_invariance_enable();
+}
+
+static void __init bp_init_freq_invariance(void)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return;
 
-       if (!atomic_xchg(&s->scfpending, 1) || wait)
-               smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
+       if (intel_set_max_freq_ratio())
+               freq_invariance_enable();
+}
 
-       /* Return false if the previous iteration was too long ago. */
-       return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
+static void disable_freq_invariance_workfn(struct work_struct *work)
+{
+       static_branch_disable(&arch_scale_freq_key);
 }
 
-unsigned int aperfmperf_get_khz(int cpu)
+static DECLARE_WORK(disable_freq_invariance_work,
+                   disable_freq_invariance_workfn);
+
+DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void scale_freq_tick(u64 acnt, u64 mcnt)
 {
-       if (!cpu_khz)
-               return 0;
+       u64 freq_scale;
 
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return 0;
+       if (!arch_scale_freq_invariant())
+               return;
 
-       if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
-               return 0;
+       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
+               goto error;
 
-       if (rcu_is_idle_cpu(cpu))
-               return 0; /* Idle CPUs are completely uninteresting. */
+       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+               goto error;
 
-       aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
-       return per_cpu(samples.khz, cpu);
+       freq_scale = div64_u64(acnt, mcnt);
+       if (!freq_scale)
+               goto error;
+
+       if (freq_scale > SCHED_CAPACITY_SCALE)
+               freq_scale = SCHED_CAPACITY_SCALE;
+
+       this_cpu_write(arch_freq_scale, freq_scale);
+       return;
+
+error:
+       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
+       schedule_work(&disable_freq_invariance_work);
 }
+#else
+static inline void bp_init_freq_invariance(void) { }
+static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
+#endif /* CONFIG_X86_64 && CONFIG_SMP */
 
-void arch_freq_prepare_all(void)
+void arch_scale_freq_tick(void)
 {
-       ktime_t now = ktime_get();
-       bool wait = false;
-       int cpu;
+       struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
+       u64 acnt, mcnt, aperf, mperf;
 
-       if (!cpu_khz)
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
                return;
 
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return;
+       rdmsrl(MSR_IA32_APERF, aperf);
+       rdmsrl(MSR_IA32_MPERF, mperf);
+       acnt = aperf - s->aperf;
+       mcnt = mperf - s->mperf;
 
-       for_each_online_cpu(cpu) {
-               if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
-                       continue;
-               if (rcu_is_idle_cpu(cpu))
-                       continue; /* Idle CPUs are completely uninteresting. */
-               if (!aperfmperf_snapshot_cpu(cpu, now, false))
-                       wait = true;
-       }
+       s->aperf = aperf;
+       s->mperf = mperf;
+
+       raw_write_seqcount_begin(&s->seq);
+       s->last_update = jiffies;
+       s->acnt = acnt;
+       s->mcnt = mcnt;
+       raw_write_seqcount_end(&s->seq);
 
-       if (wait)
-               msleep(APERFMPERF_REFRESH_DELAY_MS);
+       scale_freq_tick(acnt, mcnt);
 }
 
+/*
+ * Discard samples older than the define maximum sample age of 20ms. There
+ * is no point in sending IPIs in such a case. If the scheduler tick was
+ * not running then the CPU is either idle or isolated.
+ */
+#define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
+
 unsigned int arch_freq_get_on_cpu(int cpu)
 {
-       struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
+       struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
+       unsigned int seq, freq;
+       unsigned long last;
+       u64 acnt, mcnt;
 
-       if (!cpu_khz)
-               return 0;
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               goto fallback;
 
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return 0;
+       do {
+               seq = raw_read_seqcount_begin(&s->seq);
+               last = s->last_update;
+               acnt = s->acnt;
+               mcnt = s->mcnt;
+       } while (read_seqcount_retry(&s->seq, seq));
 
-       if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
-               return 0;
+       /*
+        * Bail on invalid count and when the last update was too long ago,
+        * which covers idle and NOHZ full CPUs.
+        */
+       if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
+               goto fallback;
+
+       return div64_u64((cpu_khz * acnt), mcnt);
+
+fallback:
+       freq = cpufreq_quick_get(cpu);
+       return freq ? freq : cpu_khz;
+}
 
-       if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
-               return per_cpu(samples.khz, cpu);
+static int __init bp_init_aperfmperf(void)
+{
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               return 0;
 
-       msleep(APERFMPERF_REFRESH_DELAY_MS);
-       atomic_set(&s->scfpending, 1);
-       smp_mb(); /* ->scfpending before smp_call_function_single(). */
-       smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+       init_counter_refs();
+       bp_init_freq_invariance();
+       return 0;
+}
+early_initcall(bp_init_aperfmperf);
 
-       return per_cpu(samples.khz, cpu);
+void ap_init_aperfmperf(void)
+{
+       if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               init_counter_refs();
 }
index 6296e1ebed1dbef3e7a1d147d14bf720dcd0cb5e..d879a6c93609a6f655bd76f1efbb7750b68de5b7 100644 (file)
@@ -446,6 +446,13 @@ void update_srbds_msr(void)
        if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
                return;
 
+       /*
+        * A MDS_NO CPU for which SRBDS mitigation is not needed due to TSX
+        * being disabled and it hasn't received the SRBDS MSR microcode.
+        */
+       if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
+               return;
+
        rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
 
        switch (srbds_mitigation) {
index e342ae4db3c4de5e88456b72e727cc0def5e6978..2e9142797c99786054a3182f281a53094f4d999c 100644 (file)
@@ -60,6 +60,7 @@
 #include <asm/uv/uv.h>
 #include <asm/sigframe.h>
 #include <asm/traps.h>
+#include <asm/sev.h>
 
 #include "cpu.h"
 
@@ -298,13 +299,6 @@ static int __init cachesize_setup(char *str)
 }
 __setup("cachesize=", cachesize_setup);
 
-static int __init x86_sep_setup(char *s)
-{
-       setup_clear_cpu_cap(X86_FEATURE_SEP);
-       return 1;
-}
-__setup("nosep", x86_sep_setup);
-
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
 {
@@ -376,26 +370,12 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
-static __init int setup_disable_smep(char *arg)
-{
-       setup_clear_cpu_cap(X86_FEATURE_SMEP);
-       return 1;
-}
-__setup("nosmep", setup_disable_smep);
-
 static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
        if (cpu_has(c, X86_FEATURE_SMEP))
                cr4_set_bits(X86_CR4_SMEP);
 }
 
-static __init int setup_disable_smap(char *arg)
-{
-       setup_clear_cpu_cap(X86_FEATURE_SMAP);
-       return 1;
-}
-__setup("nosmap", setup_disable_smap);
-
 static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 {
        unsigned long eflags = native_save_fl();
@@ -403,14 +383,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
        /* This should have been cleared long ago */
        BUG_ON(eflags & X86_EFLAGS_AC);
 
-       if (cpu_has(c, X86_FEATURE_SMAP)) {
-#ifdef CONFIG_X86_SMAP
+       if (cpu_has(c, X86_FEATURE_SMAP))
                cr4_set_bits(X86_CR4_SMAP);
-#else
-               clear_cpu_cap(c, X86_FEATURE_SMAP);
-               cr4_clear_bits(X86_CR4_SMAP);
-#endif
-       }
 }
 
 static __always_inline void setup_umip(struct cpuinfo_x86 *c)
@@ -1368,8 +1342,8 @@ static void detect_nopl(void)
 static void __init cpu_parse_early_param(void)
 {
        char arg[128];
-       char *argptr = arg;
-       int arglen, res, bit;
+       char *argptr = arg, *opt;
+       int arglen, taint = 0;
 
 #ifdef CONFIG_X86_32
        if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -1397,21 +1371,61 @@ static void __init cpu_parse_early_param(void)
                return;
 
        pr_info("Clearing CPUID bits:");
-       do {
-               res = get_option(&argptr, &bit);
-               if (res == 0 || res == 3)
-                       break;
 
-               /* If the argument was too long, the last bit may be cut off */
-               if (res == 1 && arglen >= sizeof(arg))
-                       break;
+       while (argptr) {
+               bool found __maybe_unused = false;
+               unsigned int bit;
+
+               opt = strsep(&argptr, ",");
+
+               /*
+                * Handle naked numbers first for feature flags which don't
+                * have names.
+                */
+               if (!kstrtouint(opt, 10, &bit)) {
+                       if (bit < NCAPINTS * 32) {
+
+#ifdef CONFIG_X86_FEATURE_NAMES
+                               /* empty-string, i.e., ""-defined feature flags */
+                               if (!x86_cap_flags[bit])
+                                       pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit));
+                               else
+#endif
+                                       pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+
+                               setup_clear_cpu_cap(bit);
+                               taint++;
+                       }
+                       /*
+                        * The assumption is that there are no feature names with only
+                        * numbers in the name thus go to the next argument.
+                        */
+                       continue;
+               }
+
+#ifdef CONFIG_X86_FEATURE_NAMES
+               for (bit = 0; bit < 32 * NCAPINTS; bit++) {
+                       if (!x86_cap_flag(bit))
+                               continue;
 
-               if (bit >= 0 && bit < NCAPINTS * 32) {
-                       pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+                       if (strcmp(x86_cap_flag(bit), opt))
+                               continue;
+
+                       pr_cont(" %s", opt);
                        setup_clear_cpu_cap(bit);
+                       taint++;
+                       found = true;
+                       break;
                }
-       } while (res == 2);
+
+               if (!found)
+                       pr_cont(" (unknown: %s)", opt);
+#endif
+       }
        pr_cont("\n");
+
+       if (taint)
+               add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
 }
 
 /*
@@ -1859,14 +1873,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
        tsx_ap_init();
 }
 
-static __init int setup_noclflush(char *arg)
-{
-       setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
-       setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT);
-       return 1;
-}
-__setup("noclflush", setup_noclflush);
-
 void print_cpu_info(struct cpuinfo_x86 *c)
 {
        const char *vendor = NULL;
@@ -2126,6 +2132,9 @@ void cpu_init_exception_handling(void)
 
        load_TR_desc();
 
+       /* GHCB needs to be setup to handle #VC. */
+       setup_ghcb();
+
        /* Finally load the IDT */
        load_current_idt();
 }
index f7a5370a9b3b83d4ed129e44879d774d6a8be6e4..fd5dead8371cc0446f11974f23af7a3ca802ad0f 100644 (file)
@@ -7,10 +7,13 @@
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/semaphore.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
 #include <linux/delay.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -91,7 +94,7 @@ static bool ring3mwait_disabled __read_mostly;
 static int __init ring3mwait_disable(char *__unused)
 {
        ring3mwait_disabled = true;
-       return 0;
+       return 1;
 }
 __setup("ring3mwait=disable", ring3mwait_disable);
 
@@ -181,6 +184,38 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
        return false;
 }
 
+int intel_cpu_collect_info(struct ucode_cpu_info *uci)
+{
+       unsigned int val[2];
+       unsigned int family, model;
+       struct cpu_signature csig = { 0 };
+       unsigned int eax, ebx, ecx, edx;
+
+       memset(uci, 0, sizeof(*uci));
+
+       eax = 0x00000001;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       csig.sig = eax;
+
+       family = x86_family(eax);
+       model  = x86_model(eax);
+
+       if (model >= 5 || family > 6) {
+               /* get processor flags from MSR 0x17 */
+               native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+               csig.pf = 1 << ((val[1] >> 18) & 7);
+       }
+
+       csig.rev = intel_get_microcode_revision();
+
+       uci->cpu_sig = csig;
+       uci->valid = 1;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
+
 static void early_init_intel(struct cpuinfo_x86 *c)
 {
        u64 misc_enable;
@@ -999,6 +1034,8 @@ static const struct {
 
 static struct ratelimit_state bld_ratelimit;
 
+static DEFINE_SEMAPHORE(buslock_sem);
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
        int len = strlen(opt), ratelimit;
@@ -1109,18 +1146,52 @@ static void split_lock_init(void)
                split_lock_verify_msr(sld_state != sld_off);
 }
 
+static void __split_lock_reenable(struct work_struct *work)
+{
+       sld_update_msr(true);
+       up(&buslock_sem);
+}
+
+/*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+ * detection then the delayed work will be executed on some other CPU. That
+ * handles releasing the buslock_sem, but because it executes on a
+ * different CPU probably won't re-enable split lock detection. This is a
+ * problem on HT systems since the sibling CPU on the same core may then be
+ * left running with split lock detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+       sld_update_msr(true);
+
+       return 0;
+}
+
+static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
+
 static void split_lock_warn(unsigned long ip)
 {
-       pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
-                           current->comm, current->pid, ip);
+       int cpu;
 
-       /*
-        * Disable the split lock detection for this task so it can make
-        * progress and set TIF_SLD so the detection is re-enabled via
-        * switch_to_sld() when the task is scheduled out.
-        */
+       if (!current->reported_split_lock)
+               pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+                                   current->comm, current->pid, ip);
+       current->reported_split_lock = 1;
+
+       /* misery factor #1, sleep 10ms before trying to execute split lock */
+       if (msleep_interruptible(10) > 0)
+               return;
+       /* Misery factor #2, only allow one buslocked disabled core at a time */
+       if (down_interruptible(&buslock_sem) == -EINTR)
+               return;
+       cpu = get_cpu();
+       schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+
+       /* Disable split lock detection on this CPU to make progress */
        sld_update_msr(false);
-       set_tsk_thread_flag(current, TIF_SLD);
+       put_cpu();
 }
 
 bool handle_guest_split_lock(unsigned long ip)
@@ -1193,18 +1264,6 @@ void handle_bus_lock(struct pt_regs *regs)
        }
 }
 
-/*
- * This function is called only when switching between tasks with
- * different split-lock detection modes. It sets the MSR for the
- * mode of the new task. This is right most of the time, but since
- * the MSR is shared by hyperthreads on a physical core there can
- * be glitches when the two threads need different modes.
- */
-void switch_to_sld(unsigned long tifn)
-{
-       sld_update_msr(!(tifn & _TIF_SLD));
-}
-
 /*
  * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
  * only be trusted if it is confirmed that a CPU model implements a
@@ -1230,6 +1289,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    1),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           1),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         1),
+       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          1),
        {}
 };
 
@@ -1274,10 +1334,14 @@ static void sld_state_show(void)
                pr_info("disabled\n");
                break;
        case sld_warn:
-               if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+               if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
                        pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
-               else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+                       if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+                                             "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+                               pr_warn("No splitlock CPU offline handler\n");
+               } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
                        pr_info("#DB: warning on user-space bus_locks\n");
+               }
                break;
        case sld_fatal:
                if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
index 1940d305db1c0fc6549dc38569121c85c5d62e14..1c87501e0fa3dd82fd6c290f562d062bb54d10d6 100644 (file)
@@ -1294,10 +1294,23 @@ out_free:
        kfree(bank);
 }
 
+static void __threshold_remove_device(struct threshold_bank **bp)
+{
+       unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
+
+       for (bank = 0; bank < numbanks; bank++) {
+               if (!bp[bank])
+                       continue;
+
+               threshold_remove_bank(bp[bank]);
+               bp[bank] = NULL;
+       }
+       kfree(bp);
+}
+
 int mce_threshold_remove_device(unsigned int cpu)
 {
        struct threshold_bank **bp = this_cpu_read(threshold_banks);
-       unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
 
        if (!bp)
                return 0;
@@ -1308,13 +1321,7 @@ int mce_threshold_remove_device(unsigned int cpu)
         */
        this_cpu_write(threshold_banks, NULL);
 
-       for (bank = 0; bank < numbanks; bank++) {
-               if (bp[bank]) {
-                       threshold_remove_bank(bp[bank]);
-                       bp[bank] = NULL;
-               }
-       }
-       kfree(bp);
+       __threshold_remove_device(bp);
        return 0;
 }
 
@@ -1351,15 +1358,14 @@ int mce_threshold_create_device(unsigned int cpu)
                if (!(this_cpu_read(bank_map) & (1 << bank)))
                        continue;
                err = threshold_create_bank(bp, cpu, bank);
-               if (err)
-                       goto out_err;
+               if (err) {
+                       __threshold_remove_device(bp);
+                       return err;
+               }
        }
        this_cpu_write(threshold_banks, bp);
 
        if (thresholding_irq_en)
                mce_threshold_vector = amd_threshold_interrupt;
        return 0;
-out_err:
-       mce_threshold_remove_device(cpu);
-       return err;
 }
index 0e3ae64d3b76b940a072efb0087938ca60611897..717192915f28a010cbe94504dcde628c279112ac 100644 (file)
@@ -177,16 +177,14 @@ retry:
        /* no more record */
        if (*record_id == APEI_ERST_INVALID_RECORD_ID)
                goto out;
-       rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
+       rc = erst_read_record(*record_id, &rcd.hdr, sizeof(rcd), sizeof(rcd),
+                       &CPER_CREATOR_MCE);
        /* someone else has cleared the record, try next one */
        if (rc == -ENOENT)
                goto retry;
        else if (rc < 0)
                goto out;
-       /* try to skip other type records in storage */
-       else if (rc != sizeof(rcd) ||
-                !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
-               goto retry;
+
        memcpy(m, &rcd.mce, sizeof(*m));
        rc = sizeof(*m);
 out:
index 981496e6bc0e41d44ee8f4802ef70deb10cbb8d8..d775fcd74e98d268d4251402d10c1cbf5a52e66f 100644 (file)
@@ -69,7 +69,9 @@ DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
 
 struct mce_bank {
        u64                     ctl;                    /* subevents to enable */
-       bool                    init;                   /* initialise bank? */
+
+       __u64 init                      : 1,            /* initialise bank? */
+             __reserved_1              : 63;
 };
 static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
 
index 1add869353497d690dc4cf0ca6054b41823f8b5a..00483d1c27e4f4a55acd87129ded713332fe43b5 100644 (file)
@@ -301,85 +301,65 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
        }
 }
 
-static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+/* See AMD PPR(s) section Machine Check Error Handling. */
+static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
 {
-       u64 mcx_cfg;
+       char *panic_msg = NULL;
+       int ret;
 
        /*
-        * We need to look at the following bits:
-        * - "succor" bit (data poisoning support), and
-        * - TCC bit (Task Context Corrupt)
-        * in MCi_STATUS to determine error severity.
+        * Default return value: Action required, the error must be handled
+        * immediately.
         */
-       if (!mce_flags.succor)
-               return MCE_PANIC_SEVERITY;
-
-       mcx_cfg = mce_rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(m->bank));
-
-       /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
-       if ((mcx_cfg & MCI_CONFIG_MCAX) &&
-           (m->status & MCI_STATUS_TCC) &&
-           (err_ctx == IN_KERNEL))
-               return MCE_PANIC_SEVERITY;
-
-        /* ...otherwise invoke hwpoison handler. */
-       return MCE_AR_SEVERITY;
-}
-
-/*
- * See AMD Error Scope Hierarchy table in a newer BKDG. For example
- * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
- */
-static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
-{
-       enum context ctx = error_context(m, regs);
+       ret = MCE_AR_SEVERITY;
 
        /* Processor Context Corrupt, no need to fumble too much, die! */
-       if (m->status & MCI_STATUS_PCC)
-               return MCE_PANIC_SEVERITY;
-
-       if (m->status & MCI_STATUS_UC) {
-
-               if (ctx == IN_KERNEL)
-                       return MCE_PANIC_SEVERITY;
+       if (m->status & MCI_STATUS_PCC) {
+               panic_msg = "Processor Context Corrupt";
+               ret = MCE_PANIC_SEVERITY;
+               goto out;
+       }
 
-               /*
-                * On older systems where overflow_recov flag is not present, we
-                * should simply panic if an error overflow occurs. If
-                * overflow_recov flag is present and set, then software can try
-                * to at least kill process to prolong system operation.
-                */
-               if (mce_flags.overflow_recov) {
-                       if (mce_flags.smca)
-                               return mce_severity_amd_smca(m, ctx);
-
-                       /* kill current process */
-                       return MCE_AR_SEVERITY;
-               } else {
-                       /* at least one error was not logged */
-                       if (m->status & MCI_STATUS_OVER)
-                               return MCE_PANIC_SEVERITY;
-               }
-
-               /*
-                * For any other case, return MCE_UC_SEVERITY so that we log the
-                * error and exit #MC handler.
-                */
-               return MCE_UC_SEVERITY;
+       if (m->status & MCI_STATUS_DEFERRED) {
+               ret = MCE_DEFERRED_SEVERITY;
+               goto out;
        }
 
        /*
-        * deferred error: poll handler catches these and adds to mce_ring so
-        * memory-failure can take recovery actions.
+        * If the UC bit is not set, the system either corrected or deferred
+        * the error. No action will be required after logging the error.
         */
-       if (m->status & MCI_STATUS_DEFERRED)
-               return MCE_DEFERRED_SEVERITY;
+       if (!(m->status & MCI_STATUS_UC)) {
+               ret = MCE_KEEP_SEVERITY;
+               goto out;
+       }
 
        /*
-        * corrected error: poll handler catches these and passes responsibility
-        * of decoding the error to EDAC
+        * On MCA overflow, without the MCA overflow recovery feature the
+        * system will not be able to recover, panic.
         */
-       return MCE_KEEP_SEVERITY;
+       if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
+               panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
+               ret = MCE_PANIC_SEVERITY;
+               goto out;
+       }
+
+       if (!mce_flags.succor) {
+               panic_msg = "Uncorrected error without MCA Recovery";
+               ret = MCE_PANIC_SEVERITY;
+               goto out;
+       }
+
+       if (error_context(m, regs) == IN_KERNEL) {
+               panic_msg = "Uncorrected unrecoverable error in kernel context";
+               ret = MCE_PANIC_SEVERITY;
+       }
+
+out:
+       if (msg && panic_msg)
+               *msg = panic_msg;
+
+       return ret;
 }
 
 static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
index d28a9f8f3fec1fbe3149318fd448f1c531afa7ff..025c8f0cd948c5483445e256a7944af033784717 100644 (file)
@@ -45,20 +45,6 @@ static struct microcode_intel *intel_ucode_patch;
 /* last level cache size per core */
 static int llc_size_per_core;
 
-static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
-                                       unsigned int s2, unsigned int p2)
-{
-       if (s1 != s2)
-               return false;
-
-       /* Processor flags are either both 0 ... */
-       if (!p1 && !p2)
-               return true;
-
-       /* ... or they intersect. */
-       return p1 & p2;
-}
-
 /*
  * Returns 1 if update has been found, 0 otherwise.
  */
@@ -69,7 +55,7 @@ static int find_matching_signature(void *mc, unsigned int csig, int cpf)
        struct extended_signature *ext_sig;
        int i;
 
-       if (cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
+       if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
                return 1;
 
        /* Look for ext. headers: */
@@ -80,7 +66,7 @@ static int find_matching_signature(void *mc, unsigned int csig, int cpf)
        ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
 
        for (i = 0; i < ext_hdr->count; i++) {
-               if (cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
+               if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
                        return 1;
                ext_sig++;
        }
@@ -342,37 +328,6 @@ next:
        return patch;
 }
 
-static int collect_cpu_info_early(struct ucode_cpu_info *uci)
-{
-       unsigned int val[2];
-       unsigned int family, model;
-       struct cpu_signature csig = { 0 };
-       unsigned int eax, ebx, ecx, edx;
-
-       memset(uci, 0, sizeof(*uci));
-
-       eax = 0x00000001;
-       ecx = 0;
-       native_cpuid(&eax, &ebx, &ecx, &edx);
-       csig.sig = eax;
-
-       family = x86_family(eax);
-       model  = x86_model(eax);
-
-       if ((model >= 5) || (family > 6)) {
-               /* get processor flags from MSR 0x17 */
-               native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-               csig.pf = 1 << ((val[1] >> 18) & 7);
-       }
-
-       csig.rev = intel_get_microcode_revision();
-
-       uci->cpu_sig = csig;
-       uci->valid = 1;
-
-       return 0;
-}
-
 static void show_saved_mc(void)
 {
 #ifdef DEBUG
@@ -386,7 +341,7 @@ static void show_saved_mc(void)
                return;
        }
 
-       collect_cpu_info_early(&uci);
+       intel_cpu_collect_info(&uci);
 
        sig     = uci.cpu_sig.sig;
        pf      = uci.cpu_sig.pf;
@@ -502,7 +457,7 @@ void show_ucode_info_early(void)
        struct ucode_cpu_info uci;
 
        if (delay_ucode_info) {
-               collect_cpu_info_early(&uci);
+               intel_cpu_collect_info(&uci);
                print_ucode_info(&uci, current_mc_date);
                delay_ucode_info = 0;
        }
@@ -604,7 +559,7 @@ int __init save_microcode_in_initrd_intel(void)
        if (!(cp.data && cp.size))
                return 0;
 
-       collect_cpu_info_early(&uci);
+       intel_cpu_collect_info(&uci);
 
        scan_microcode(cp.data, cp.size, &uci, true);
 
@@ -637,7 +592,7 @@ static struct microcode_intel *__load_ucode_intel(struct ucode_cpu_info *uci)
        if (!(cp.data && cp.size))
                return NULL;
 
-       collect_cpu_info_early(uci);
+       intel_cpu_collect_info(uci);
 
        return scan_microcode(cp.data, cp.size, uci, false);
 }
@@ -712,7 +667,7 @@ void reload_ucode_intel(void)
        struct microcode_intel *p;
        struct ucode_cpu_info uci;
 
-       collect_cpu_info_early(&uci);
+       intel_cpu_collect_info(&uci);
 
        p = find_patch(&uci);
        if (!p)
index 4eec8889b0ff1de48787fd162985bf930b89fd11..099b6f0d96bdc1c369b493867bc703fa228ab892 100644 (file)
@@ -84,14 +84,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
 
        if (cpu_has(c, X86_FEATURE_TSC)) {
-               unsigned int freq = aperfmperf_get_khz(cpu);
-
-               if (!freq)
-                       freq = cpufreq_quick_get(cpu);
-               if (!freq)
-                       freq = cpu_khz;
-               seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-                          freq / 1000, (freq % 1000));
+               unsigned int freq = arch_freq_get_on_cpu(cpu);
+
+               seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000));
        }
 
        /* Cache size */
index 83f901e2c2df9e9b99b97bf9bf9fea271cdcf427..f276aff521e8b851b0cf2641aaec1adef59a4c15 100644 (file)
@@ -341,14 +341,14 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 
        /* Check whether cpus belong to parent ctrl group */
        cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
                return -EINVAL;
        }
 
        /* Check whether cpus are dropped from this group */
        cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                /* Give any dropped cpus to parent rdtgroup */
                cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
                update_closid_rmid(tmpmask, prgrp);
@@ -359,7 +359,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
         * and update per-cpu rmid
         */
        cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                head = &prgrp->mon.crdtgrp_list;
                list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
                        if (crgrp == rdtgrp)
@@ -394,7 +394,7 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 
        /* Check whether cpus are dropped from this group */
        cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                /* Can't drop from default group */
                if (rdtgrp == &rdtgroup_default) {
                        rdt_last_cmd_puts("Can't drop CPUs from default group\n");
@@ -413,12 +413,12 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
         * and update per-cpu closid/rmid.
         */
        cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
                        if (r == rdtgrp)
                                continue;
                        cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
-                       if (cpumask_weight(tmpmask1))
+                       if (!cpumask_empty(tmpmask1))
                                cpumask_rdtgrp_clear(r, tmpmask1);
                }
                update_closid_rmid(tmpmask, rdtgrp);
@@ -488,7 +488,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 
        /* check that user didn't specify any offline cpus */
        cpumask_andnot(tmpmask, newmask, cpu_online_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                ret = -EINVAL;
                rdt_last_cmd_puts("Can only assign online CPUs\n");
                goto unlock;
index 4143b1e4c5c6d8160da5f4ae84075e1470e8e37d..dbaa8326d6f289d16a3dc431261c80fa063cebb7 100644 (file)
@@ -43,6 +43,7 @@ static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_CPB,              CPUID_EDX,  9, 0x80000007, 0 },
        { X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
        { X86_FEATURE_MBA,              CPUID_EBX,  6, 0x80000008, 0 },
+       { X86_FEATURE_PERFMON_V2,       CPUID_EAX,  0, 0x80000022, 0 },
        { 0, 0, 0, 0, 0 }
 };
 
index 7c63a1911fae977b7c00367a659e3958f7fcccc4..3c24e6124d955234a0998e77558f4ca02fa9306d 100644 (file)
 #include "encls.h"
 #include "sgx.h"
 
+#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
+/*
+ * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
+ * determine the page index associated with the first PCMD entry
+ * within a PCMD page.
+ */
+#define PCMD_FIRST_MASK GENMASK(4, 0)
+
+/**
+ * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
+ *                               a PCMD page is in process of being reclaimed.
+ * @encl:        Enclave to which PCMD page belongs
+ * @start_addr:  Address of enclave page using first entry within the PCMD page
+ *
+ * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
+ * stored. The PCMD data of a reclaimed enclave page contains enough
+ * information for the processor to verify the page at the time
+ * it is loaded back into the Enclave Page Cache (EPC).
+ *
+ * The backing storage to which enclave pages are reclaimed is laid out as
+ * follows:
+ * Encrypted enclave pages:SECS page:PCMD pages
+ *
+ * Each PCMD page contains the PCMD metadata of
+ * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
+ *
+ * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
+ * process of getting data (and thus soon being non-empty). (b) is tested with
+ * a check if an enclave page sharing the PCMD page is in the process of being
+ * reclaimed.
+ *
+ * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
+ * intends to reclaim that enclave page - it means that the PCMD page
+ * associated with that enclave page is about to get some data and thus
+ * even if the PCMD page is empty, it should not be truncated.
+ *
+ * Context: Enclave mutex (&sgx_encl->lock) must be held.
+ * Return: 1 if the reclaimer is about to write to the PCMD page
+ *         0 if the reclaimer has no intention to write to the PCMD page
+ */
+static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
+                                    unsigned long start_addr)
+{
+       int reclaimed = 0;
+       int i;
+
+       /*
+        * PCMD_FIRST_MASK is based on number of PCMD entries within
+        * PCMD page being 32.
+        */
+       BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
+
+       for (i = 0; i < PCMDS_PER_PAGE; i++) {
+               struct sgx_encl_page *entry;
+               unsigned long addr;
+
+               addr = start_addr + i * PAGE_SIZE;
+
+               /*
+                * Stop when reaching the SECS page - it does not
+                * have a page_array entry and its reclaim is
+                * started and completed with enclave mutex held so
+                * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
+                * flag.
+                */
+               if (addr == encl->base + encl->size)
+                       break;
+
+               entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+               if (!entry)
+                       continue;
+
+               /*
+                * VA page slot ID uses same bit as the flag so it is important
+                * to ensure that the page is not already in backing store.
+                */
+               if (entry->epc_page &&
+                   (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
+                       reclaimed = 1;
+                       break;
+               }
+       }
+
+       return reclaimed;
+}
+
 /*
  * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
  * follow right after the EPC data in the backing storage. In addition to the
@@ -47,6 +133,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
        unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
        struct sgx_encl *encl = encl_page->encl;
        pgoff_t page_index, page_pcmd_off;
+       unsigned long pcmd_first_page;
        struct sgx_pageinfo pginfo;
        struct sgx_backing b;
        bool pcmd_page_empty;
@@ -58,6 +145,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
        else
                page_index = PFN_DOWN(encl->size);
 
+       /*
+        * Address of enclave page using the first entry within the PCMD page.
+        */
+       pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
+
        page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
 
        ret = sgx_encl_get_backing(encl, page_index, &b);
@@ -84,6 +176,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
        }
 
        memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
+       set_page_dirty(b.pcmd);
 
        /*
         * The area for the PCMD in the page was zeroed above.  Check if the
@@ -94,12 +187,20 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
        kunmap_atomic(pcmd_page);
        kunmap_atomic((void *)(unsigned long)pginfo.contents);
 
-       sgx_encl_put_backing(&b, false);
+       get_page(b.pcmd);
+       sgx_encl_put_backing(&b);
 
        sgx_encl_truncate_backing_page(encl, page_index);
 
-       if (pcmd_page_empty)
+       if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
                sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
+               pcmd_page = kmap_atomic(b.pcmd);
+               if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
+                       pr_warn("PCMD page not empty after truncate.\n");
+               kunmap_atomic(pcmd_page);
+       }
+
+       put_page(b.pcmd);
 
        return ret;
 }
@@ -645,15 +746,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
 /**
  * sgx_encl_put_backing() - Unpin the backing storage
  * @backing:   data for accessing backing storage for the page
- * @do_write:  mark pages dirty
  */
-void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write)
+void sgx_encl_put_backing(struct sgx_backing *backing)
 {
-       if (do_write) {
-               set_page_dirty(backing->pcmd);
-               set_page_dirty(backing->contents);
-       }
-
        put_page(backing->pcmd);
        put_page(backing->contents);
 }
index fec43ca65065b0caecf5e05dd261c62cf045494e..d44e7372151f016ba2971e2567824980ccfa079b 100644 (file)
@@ -107,7 +107,7 @@ void sgx_encl_release(struct kref *ref);
 int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
 int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
                         struct sgx_backing *backing);
-void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
+void sgx_encl_put_backing(struct sgx_backing *backing);
 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
                                  struct sgx_encl_page *page);
 
index 8e4bc6453d263524493835027364f0785c37d241..ab4ec54bbdd94ca148efa45350d53f6abfd763ab 100644 (file)
@@ -191,6 +191,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
                          backing->pcmd_offset;
 
        ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
+       set_page_dirty(backing->pcmd);
+       set_page_dirty(backing->contents);
 
        kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
                                              backing->pcmd_offset));
@@ -308,6 +310,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
        sgx_encl_ewb(epc_page, backing);
        encl_page->epc_page = NULL;
        encl->secs_child_cnt--;
+       sgx_encl_put_backing(backing);
 
        if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
                ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
@@ -320,7 +323,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
                sgx_encl_free_epc_page(encl->secs.epc_page);
                encl->secs.epc_page = NULL;
 
-               sgx_encl_put_backing(&secs_backing, true);
+               sgx_encl_put_backing(&secs_backing);
        }
 
 out:
@@ -379,11 +382,14 @@ static void sgx_reclaim_pages(void)
                        goto skip;
 
                page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
+
+               mutex_lock(&encl_page->encl->lock);
                ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&encl_page->encl->lock);
                        goto skip;
+               }
 
-               mutex_lock(&encl_page->encl->lock);
                encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
                mutex_unlock(&encl_page->encl->lock);
                continue;
@@ -411,7 +417,6 @@ skip:
 
                encl_page = epc_page->owner;
                sgx_reclaimer_write(epc_page, &backing[i]);
-               sgx_encl_put_backing(&backing[i], true);
 
                kref_put(&encl_page->encl->refcount, sgx_encl_release);
                epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
index e8326a8d1c5dcae7e1c893f79c9b2f23f4e78580..9730c88530fc8b4c0a6c0bfc770acd087524ea10 100644 (file)
@@ -407,7 +407,7 @@ int crash_load_segments(struct kimage *image)
        }
        image->elf_load_addr = kbuf.mem;
        pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-                image->elf_load_addr, kbuf.bufsz, kbuf.bufsz);
+                image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
 
        return ret;
 }
index 39e1c8626ab999fea588e7558927724dd2f3b457..c8340156bfd2aadc490b5d0a663879ded2bb991f 100644 (file)
@@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
         * Non-compacted format and legacy features use the cached fixed
         * offsets.
         */
-       if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE)
+       if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
+           xfeature <= XFEATURE_SSE)
                return xstate_offsets[xfeature];
 
        /*
@@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void)
        /*
         * All components are now in init state. Read the state back so
         * that init_fpstate contains all non-zero init state. This only
-        * works with XSAVE, but not with XSAVEOPT and XSAVES because
+        * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
         * those use the init optimization which skips writing data for
         * components in init state.
         *
         * XSAVE could be used, but that would require to reshuffle the
-        * data when XSAVES is available because XSAVES uses xstate
+        * data when XSAVEC/S is available because XSAVEC/S uses xstate
         * compaction. But doing so is a pointless exercise because most
         * components have an all zeros init state except for the legacy
         * ones (FP and SSE). Those can be saved with FXSAVE into the
@@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
  */
 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
 {
-       bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+       bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
+       bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
        unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
        int i;
 
@@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
                 * Supervisor state components can be managed only by
                 * XSAVES.
                 */
-               if (!compacted && xfeature_is_supervisor(i)) {
+               if (!xsaves && xfeature_is_supervisor(i)) {
                        XSTATE_WARN_ON(1);
                        return false;
                }
@@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
  * the size of the *user* states.  If we use it to size a buffer
  * that we use 'XSAVES' on, we could potentially overflow the
  * buffer because 'XSAVES' saves system states too.
+ *
+ * This also takes compaction into account. So this works for
+ * XSAVEC as well.
  */
-static unsigned int __init get_xsaves_size(void)
+static unsigned int __init get_compacted_size(void)
 {
        unsigned int eax, ebx, ecx, edx;
        /*
@@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void)
         *    containing all the state components
         *    corresponding to bits currently set in
         *    XCR0 | IA32_XSS.
+        *
+        * When XSAVES is not available but XSAVEC is (virt), then there
+        * are no supervisor states, but XSAVEC still uses compacted
+        * format.
         */
        cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
        return ebx;
@@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void)
  * Get the total size of the enabled xstates without the independent supervisor
  * features.
  */
-static unsigned int __init get_xsaves_size_no_independent(void)
+static unsigned int __init get_xsave_compacted_size(void)
 {
        u64 mask = xfeatures_mask_independent();
        unsigned int size;
 
        if (!mask)
-               return get_xsaves_size();
+               return get_compacted_size();
 
        /* Disable independent features. */
        wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
@@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void)
         * Ask the hardware what size is required of the buffer.
         * This is the size required for the task->fpu buffer.
         */
-       size = get_xsaves_size();
+       size = get_compacted_size();
 
        /* Re-enable independent features so XSAVES will work on them again. */
        wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
@@ -687,20 +696,21 @@ static int __init init_xstate_size(void)
 {
        /* Recompute the context size for enabled features: */
        unsigned int user_size, kernel_size, kernel_default_size;
-       bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+       bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
 
        /* Uncompacted user space size */
        user_size = get_xsave_size_user();
 
        /*
-        * XSAVES kernel size includes supervisor states and
-        * uses compacted format when available.
+        * XSAVES kernel size includes supervisor states and uses compacted
+        * format. XSAVEC uses compacted format, but does not save
+        * supervisor states.
         *
-        * XSAVE does not support supervisor states so
-        * kernel and user size is identical.
+        * XSAVE[OPT] do not support supervisor states so kernel and user
+        * size is identical.
         */
        if (compacted)
-               kernel_size = get_xsaves_size_no_independent();
+               kernel_size = get_xsave_compacted_size();
        else
                kernel_size = user_size;
 
@@ -813,8 +823,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
        if (!cpu_feature_enabled(X86_FEATURE_XFD))
                fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
 
-       fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
-                             XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+       if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
+               fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
+       else
+               fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
+                                       XFEATURE_MASK_SUPERVISOR_SUPPORTED;
 
        fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
        fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
@@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
         */
        init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
 
+       /* Set up compaction feature bit */
+       if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
+           cpu_feature_enabled(X86_FEATURE_XSAVES))
+               setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
+
        /* Enable xstate instructions to be able to continue with initialization: */
        fpu__init_cpu_xstate();
 
@@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
        pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
                fpu_kernel_cfg.max_features,
                fpu_kernel_cfg.max_size,
-               boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+               boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
        return;
 
 out_disable:
@@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
        if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
                return NULL;
 
-       if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
+       if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
                if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
                        return NULL;
        }
@@ -1215,7 +1233,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
        }
 
        for (i = 0; i < XFEATURE_MAX; i++) {
-               u64 mask = ((u64)1 << i);
+               mask = BIT_ULL(i);
 
                if (hdr.xfeatures & mask) {
                        void *dst = __raw_xsave_addr(xsave, i);
@@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
         * vendors into extending XFD for the pre AMX states, especially
         * AVX512.
         */
-       bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+       bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
        struct fpu *fpu = &current->group_leader->thread.fpu;
        struct fpu_state_perm *perm;
        unsigned int ksize, usize;
@@ -1687,16 +1705,13 @@ EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
  */
-long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
+long fpu_xstate_prctl(int option, unsigned long arg2)
 {
        u64 __user *uptr = (u64 __user *)arg2;
        u64 permitted, supported;
        unsigned long idx = arg2;
        bool guest = false;
 
-       if (tsk != current)
-               return -EPERM;
-
        switch (option) {
        case ARCH_GET_XCOMP_SUPP:
                supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
index d22ace092ca290d21b3707c050e83f89284de33f..5ad47031383b54c69c129d097e0f18ae68585c55 100644 (file)
@@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
         * XRSTORS requires these bits set in xcomp_bv, or it will
         * trigger #GP:
         */
-       if (cpu_feature_enabled(X86_FEATURE_XSAVES))
+       if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED))
                xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
 }
 
@@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void)
 /* These macros all use (%edi)/(%rdi) as the single memory argument. */
 #define XSAVE          ".byte " REX_PREFIX "0x0f,0xae,0x27"
 #define XSAVEOPT       ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVEC         ".byte " REX_PREFIX "0x0f,0xc7,0x27"
 #define XSAVES         ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
 #define XRSTOR         ".byte " REX_PREFIX "0x0f,0xae,0x2f"
 #define XRSTORS                ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
@@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void)
                     : "memory")
 
 /*
- * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
- * format and supervisor states in addition to modified optimization in
- * XSAVEOPT.
+ * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor
+ * states in addition to XSAVEC.
+ *
+ * Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports
+ * compacted storage format in addition to XSAVEOPT.
  *
  * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
  * supports modified optimization which is not supported by XSAVE.
@@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void)
  * address of the instruction where we might get an exception at.
  */
 #define XSTATE_XSAVE(st, lmask, hmask, err)                            \
-       asm volatile(ALTERNATIVE_2(XSAVE,                               \
+       asm volatile(ALTERNATIVE_3(XSAVE,                               \
                                   XSAVEOPT, X86_FEATURE_XSAVEOPT,      \
+                                  XSAVEC,   X86_FEATURE_XSAVEC,        \
                                   XSAVES,   X86_FEATURE_XSAVES)        \
                     "\n"                                               \
                     "xor %[err], %[err]\n"                             \
index 1e31c7d21597bf56ce021793c2bcdb2ddf37b653..b09d73c2ba897610ff6f8d69cc527c2e56c684a7 100644 (file)
@@ -579,9 +579,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+#if defined(CONFIG_DYNAMIC_FTRACE) && !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
 extern void ftrace_graph_call(void);
 static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 {
@@ -610,18 +608,7 @@ int ftrace_disable_ftrace_graph_caller(void)
 
        return ftrace_mod_jmp(ip, &ftrace_stub);
 }
-#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
-int ftrace_enable_ftrace_graph_caller(void)
-{
-       return 0;
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
-       return 0;
-}
-#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
-#endif /* !CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_DYNAMIC_FTRACE && !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 
 /*
  * Hook the return address and push it in the stack of return addrs
index 4f5ecbbaae77c7f6f74eb40d82681c94a063a99c..bd4a34100ed0cd53acf56114dcb976ec6186a669 100644 (file)
@@ -40,6 +40,7 @@
 #include <asm/extable.h>
 #include <asm/trapnr.h>
 #include <asm/sev.h>
+#include <asm/tdx.h>
 
 /*
  * Manage page tables very early on.
@@ -143,7 +144,20 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
        if (sme_get_me_mask()) {
                vaddr = (unsigned long)__start_bss_decrypted;
                vaddr_end = (unsigned long)__end_bss_decrypted;
+
                for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+                       /*
+                        * On SNP, transition the page to shared in the RMP table so that
+                        * it is consistent with the page table attribute change.
+                        *
+                        * __start_bss_decrypted has a virtual address in the high range
+                        * mapping (kernel .text). PVALIDATE, by way of
+                        * early_snp_set_memory_shared(), requires a valid virtual
+                        * address but the kernel is currently running off of the identity
+                        * mapping so use __pa() to get a *currently* valid virtual address.
+                        */
+                       early_snp_set_memory_shared(__pa(vaddr), __pa(vaddr), PTRS_PER_PMD);
+
                        i = pmd_index(vaddr);
                        pmd[i] -= sme_get_me_mask();
                }
@@ -192,9 +206,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
        if (load_delta & ~PMD_PAGE_MASK)
                for (;;);
 
-       /* Activate Secure Memory Encryption (SME) if supported and enabled */
-       sme_enable(bp);
-
        /* Include the SME encryption mask in the fixup value */
        load_delta += sme_get_me_mask();
 
@@ -308,15 +319,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
        return sme_postprocess_startup(bp, pmd);
 }
 
-unsigned long __startup_secondary_64(void)
-{
-       /*
-        * Return the SME encryption mask (if SME is active) to be used as a
-        * modifier for the initial pgdir entry programmed into CR3.
-        */
-       return sme_get_me_mask();
-}
-
 /* Wipe all early page tables except for the kernel symbol map */
 static void __init reset_early_page_tables(void)
 {
@@ -416,6 +418,9 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
            trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
                return;
 
+       if (trapnr == X86_TRAP_VE && tdx_early_handle_ve(regs))
+               return;
+
        early_fixup_exception(regs, trapnr);
 }
 
@@ -514,6 +519,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 
        idt_setup_early_handler();
 
+       /* Needed before cc_platform_has() can be used for TDX */
+       tdx_early_init();
+
        copy_bootdata(__va(real_mode_data));
 
        /*
@@ -600,8 +608,10 @@ static void startup_64_load_idt(unsigned long physbase)
 void early_setup_idt(void)
 {
        /* VMM Communication Exception */
-       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+               setup_ghcb();
                set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
+       }
 
        bringup_idt_descr.address = (unsigned long)bringup_idt_table;
        native_load_idt(&bringup_idt_descr);
index b8e3019547a5d8271ed1a4bf8c7104ea4de3cafb..92c4afa2b7298d2a45f6fc3e67843fdb73354bfe 100644 (file)
@@ -65,10 +65,39 @@ SYM_CODE_START_NOALIGN(startup_64)
        leaq    (__end_init_task - FRAME_SIZE)(%rip), %rsp
 
        leaq    _text(%rip), %rdi
+
+       /*
+        * initial_gs points to initial fixed_percpu_data struct with storage for
+        * the stack protector canary. Global pointer fixups are needed at this
+        * stage, so apply them as is done in fixup_pointer(), and initialize %gs
+        * such that the canary can be accessed at %gs:40 for subsequent C calls.
+        */
+       movl    $MSR_GS_BASE, %ecx
+       movq    initial_gs(%rip), %rax
+       movq    $_text, %rdx
+       subq    %rdx, %rax
+       addq    %rdi, %rax
+       movq    %rax, %rdx
+       shrq    $32,  %rdx
+       wrmsr
+
        pushq   %rsi
        call    startup_64_setup_env
        popq    %rsi
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+       /*
+        * Activate SEV/SME memory encryption if supported/enabled. This needs to
+        * be done now, since this also includes setup of the SEV-SNP CPUID table,
+        * which needs to be done before any CPUID instructions are executed in
+        * subsequent code.
+        */
+       movq    %rsi, %rdi
+       pushq   %rsi
+       call    sme_enable
+       popq    %rsi
+#endif
+
        /* Now switch to __KERNEL_CS so IRET works reliably */
        pushq   $__KERNEL_CS
        leaq    .Lon_kernel_cs(%rip), %rax
@@ -134,16 +163,32 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
         * Retrieve the modifier (SME encryption mask if SME is active) to be
         * added to the initial pgdir entry that will be programmed into CR3.
         */
-       pushq   %rsi
-       call    __startup_secondary_64
-       popq    %rsi
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+       movq    sme_me_mask, %rax
+#else
+       xorq    %rax, %rax
+#endif
 
        /* Form the CR3 value being sure to include the CR3 modifier */
        addq    $(init_top_pgt - __START_KERNEL_map), %rax
 1:
 
+#ifdef CONFIG_X86_MCE
+       /*
+        * Preserve CR4.MCE if the kernel will enable #MC support.
+        * Clearing MCE may fault in some environments (that also force #MC
+        * support). Any machine check that occurs before #MC support is fully
+        * configured will crash the system regardless of the CR4.MCE value set
+        * here.
+        */
+       movq    %cr4, %rcx
+       andl    $X86_CR4_MCE, %ecx
+#else
+       movl    $0, %ecx
+#endif
+
        /* Enable PAE mode, PGE and LA57 */
-       movl    $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+       orl     $(X86_CR4_PAE | X86_CR4_PGE), %ecx
 #ifdef CONFIG_X86_5LEVEL
        testl   $1, __pgtable_l5_enabled(%rip)
        jz      1f
@@ -249,13 +294,23 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
        /* Setup EFER (Extended Feature Enable Register) */
        movl    $MSR_EFER, %ecx
        rdmsr
+       /*
+        * Preserve current value of EFER for comparison and to skip
+        * EFER writes if no change was made (for TDX guest)
+        */
+       movl    %eax, %edx
        btsl    $_EFER_SCE, %eax        /* Enable System Call */
        btl     $20,%edi                /* No Execute supported? */
        jnc     1f
        btsl    $_EFER_NX, %eax
        btsq    $_PAGE_BIT_NX,early_pmd_flags(%rip)
-1:     wrmsr                           /* Make changes effective */
 
+       /* Avoid writing EFER if no change was made (for TDX guest) */
+1:     cmpl    %edx, %eax
+       je      1f
+       xor     %edx, %edx
+       wrmsr                           /* Make changes effective */
+1:
        /* Setup cr0 */
        movl    $CR0_STATE, %eax
        /* Make changes effective */
index 608eb63bf0444c9bf0636715a630e4b445350ff1..a58c6bc1cd68c2be54ebbe5bb149fa9c954b38d7 100644 (file)
@@ -69,6 +69,9 @@ static const __initconst struct idt_data early_idts[] = {
         */
        INTG(X86_TRAP_PF,               asm_exc_page_fault),
 #endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+       INTG(X86_TRAP_VE,               asm_exc_virtualization_exception),
+#endif
 };
 
 /*
index e73f7df362f5d178b008dc8e15ff4eb6d4f7af5e..cec0bfa3bc04fa21fbf741d5c6fca0464956bf1d 100644 (file)
@@ -157,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
        struct nmi_desc *desc = nmi_to_desc(type);
        unsigned long flags;
 
-       if (!action->handler)
+       if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
                return -EINVAL;
 
        raw_spin_lock_irqsave(&desc->lock, flags);
@@ -177,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
                list_add_rcu(&action->list, &desc->head);
        else
                list_add_tail_rcu(&action->list, &desc->head);
-       
+
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler);
 void unregister_nmi_handler(unsigned int type, const char *name)
 {
        struct nmi_desc *desc = nmi_to_desc(type);
-       struct nmiaction *n;
+       struct nmiaction *n, *found = NULL;
        unsigned long flags;
 
        raw_spin_lock_irqsave(&desc->lock, flags);
@@ -200,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name)
                        WARN(in_nmi(),
                                "Trying to free NMI (%s) from NMI context!\n", n->name);
                        list_del_rcu(&n->list);
+                       found = n;
                        break;
                }
        }
 
        raw_spin_unlock_irqrestore(&desc->lock, flags);
-       synchronize_rcu();
+       if (found) {
+               synchronize_rcu();
+               INIT_LIST_HEAD(&found->list);
+       }
 }
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
 
index 36e84d9042606476e0c69a5554853feff3052632..319fef37d9dce41f8109c768f58f627649dbac99 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/sections.h>
 #include <asm/io.h>
 #include <asm/setup_arch.h>
+#include <asm/sev.h>
 
 static struct resource system_rom_resource = {
        .name   = "System ROM",
@@ -197,11 +198,21 @@ static int __init romchecksum(const unsigned char *rom, unsigned long length)
 
 void __init probe_roms(void)
 {
-       const unsigned char *rom;
        unsigned long start, length, upper;
+       const unsigned char *rom;
        unsigned char c;
        int i;
 
+       /*
+        * The ROM memory range is not part of the e820 table and is therefore not
+        * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
+        * memory, and SNP requires encrypted memory to be validated before access.
+        * Do that here.
+        */
+       snp_prep_memory(video_rom_resource.start,
+                       ((system_rom_resource.end + 1) - video_rom_resource.start),
+                       SNP_PAGE_STATE_PRIVATE);
+
        /* video rom */
        upper = adapter_rom_resources[0].start;
        for (start = video_rom_resource.start; start < upper; start += 2048) {
index b370767f5b191e1556b3665a3b8b5f032decf0e6..58fb48d3004fe2ef766c61d4b8ee13a175823583 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/proto.h>
 #include <asm/frame.h>
 #include <asm/unwind.h>
+#include <asm/tdx.h>
 
 #include "process.h"
 
@@ -160,6 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
        savesegment(ds, p->thread.ds);
 #else
        p->thread.sp0 = (unsigned long) (childregs + 1);
+       savesegment(gs, p->thread.gs);
        /*
         * Clear all status flags including IF and set fixed bit. 64bit
         * does not have this initialization as the frame does not contain
@@ -191,10 +193,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
        if (sp)
                childregs->sp = sp;
 
-#ifdef CONFIG_X86_32
-       task_user_gs(p) = get_user_gs(current_pt_regs());
-#endif
-
        if (unlikely(p->flags & PF_IO_WORKER)) {
                /*
                 * An IO thread is a user space thread, but it doesn't
@@ -334,7 +332,7 @@ static int get_cpuid_mode(void)
        return !test_thread_flag(TIF_NOCPUID);
 }
 
-static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+static int set_cpuid_mode(unsigned long cpuid_enabled)
 {
        if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
                return -ENODEV;
@@ -405,7 +403,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
 }
 
 /**
- * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
+ * native_tss_update_io_bitmap - Update I/O bitmap before exiting to user mode
  */
 void native_tss_update_io_bitmap(void)
 {
@@ -686,9 +684,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
                /* Enforce MSR update to ensure consistent state */
                __speculation_ctrl_update(~tifn, tifn);
        }
-
-       if ((tifp ^ tifn) & _TIF_SLD)
-               switch_to_sld(tifn);
 }
 
 /*
@@ -873,6 +868,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
        } else if (prefer_mwait_c1_over_halt(c)) {
                pr_info("using mwait in idle threads\n");
                x86_idle = mwait_idle;
+       } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+               pr_info("using TDX aware idle routine\n");
+               x86_idle = tdx_safe_halt;
        } else
                x86_idle = default_idle;
 }
@@ -985,20 +983,19 @@ unsigned long __get_wchan(struct task_struct *p)
        return addr;
 }
 
-long do_arch_prctl_common(struct task_struct *task, int option,
-                         unsigned long arg2)
+long do_arch_prctl_common(int option, unsigned long arg2)
 {
        switch (option) {
        case ARCH_GET_CPUID:
                return get_cpuid_mode();
        case ARCH_SET_CPUID:
-               return set_cpuid_mode(task, arg2);
+               return set_cpuid_mode(arg2);
        case ARCH_GET_XCOMP_SUPP:
        case ARCH_GET_XCOMP_PERM:
        case ARCH_REQ_XCOMP_PERM:
        case ARCH_GET_XCOMP_GUEST_PERM:
        case ARCH_REQ_XCOMP_GUEST_PERM:
-               return fpu_xstate_prctl(task, option, arg2);
+               return fpu_xstate_prctl(option, arg2);
        }
 
        return -EINVAL;
index 26edb1cd07a4332e6f9ab500cab316781fc62b26..2f314b170c9f0737d6ee673ed380d894a6978911 100644 (file)
@@ -63,10 +63,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
        unsigned long d0, d1, d2, d3, d6, d7;
        unsigned short gs;
 
-       if (user_mode(regs))
-               gs = get_user_gs(regs);
-       else
-               savesegment(gs, gs);
+       savesegment(gs, gs);
 
        show_ip(regs, log_lvl);
 
@@ -114,7 +111,7 @@ void release_thread(struct task_struct *dead_task)
 void
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 {
-       set_user_gs(regs, 0);
+       loadsegment(gs, 0);
        regs->fs                = 0;
        regs->ds                = __USER_DS;
        regs->es                = __USER_DS;
@@ -177,7 +174,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * used %fs or %gs (it does not today), or if the kernel is
         * running inside of a hypervisor layer.
         */
-       lazy_save_gs(prev->gs);
+       savesegment(gs, prev->gs);
 
        /*
         * Load the per-thread Thread-Local Storage descriptor.
@@ -208,7 +205,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * Restore %gs if needed (which is common)
         */
        if (prev->gs | next->gs)
-               lazy_load_gs(next->gs);
+               loadsegment(gs, next->gs);
 
        this_cpu_write(current_task, next_p);
 
@@ -222,5 +219,5 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return do_arch_prctl_common(current, option, arg2);
+       return do_arch_prctl_common(option, arg2);
 }
index e459253649be23f6ad942a0ae76b7d3987aa9215..1962008fe7437f89be48e2511199e0356348573f 100644 (file)
@@ -844,7 +844,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 
        ret = do_arch_prctl_64(current, option, arg2);
        if (ret == -EINVAL)
-               ret = do_arch_prctl_common(current, option, arg2);
+               ret = do_arch_prctl_common(option, arg2);
 
        return ret;
 }
@@ -852,7 +852,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 #ifdef CONFIG_IA32_EMULATION
 COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return do_arch_prctl_common(current, option, arg2);
+       return do_arch_prctl_common(option, arg2);
 }
 #endif
 
index 98d10ef605717820e802540b86fdb5c1b051bd38..37c12fb92906b64cfb2e0b7e96a20a2ffa57cfca 100644 (file)
@@ -170,9 +170,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
                retval = *pt_regs_access(task_pt_regs(task), offset);
        else {
                if (task == current)
-                       retval = get_user_gs(task_pt_regs(task));
+                       savesegment(gs, retval);
                else
-                       retval = task_user_gs(task);
+                       retval = task->thread.gs;
        }
        return retval;
 }
@@ -210,7 +210,7 @@ static int set_segment_reg(struct task_struct *task,
                break;
 
        case offsetof(struct user_regs_struct, gs):
-               task_user_gs(task) = value;
+               task->thread.gs = value;
        }
 
        return 0;
index c95b9ac5a4571a30181af4a3e4082bd7af7ed601..249981bf3d8aa4a5a8e973142b7f81bb65d9f3db 100644 (file)
@@ -756,6 +756,30 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
        return 0;
 }
 
+void x86_configure_nx(void)
+{
+       if (boot_cpu_has(X86_FEATURE_NX))
+               __supported_pte_mask |= _PAGE_NX;
+       else
+               __supported_pte_mask &= ~_PAGE_NX;
+}
+
+static void __init x86_report_nx(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_NX)) {
+               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+                      "missing in CPU!\n");
+       } else {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+               printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#else
+               /* 32bit non-PAE kernel, NX cannot be used */
+               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+                      "cannot be enabled: non-PAE kernel!\n");
+#endif
+       }
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -896,9 +920,7 @@ void __init setup_arch(char **cmdline_p)
        /*
         * x86_configure_nx() is called before parse_early_param() to detect
         * whether hardware doesn't support NX (so that the early EHCI debug
-        * console setup can safely call set_fixmap()). It may then be called
-        * again from within noexec_setup() during parsing early parameters
-        * to honor the respective command line option.
+        * console setup can safely call set_fixmap()).
         */
        x86_configure_nx();
 
index ce987688bbc0520382fb0e2a8cc6db100be35c49..b478edf43bec2a8f9eb50c2f68316f71aca3605a 100644 (file)
 #define has_cpuflag(f) boot_cpu_has(f)
 #endif
 
+/* I/O parameters for CPUID-related helpers */
+struct cpuid_leaf {
+       u32 fn;
+       u32 subfn;
+       u32 eax;
+       u32 ebx;
+       u32 ecx;
+       u32 edx;
+};
+
+/*
+ * Individual entries of the SNP CPUID table, as defined by the SNP
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
+ */
+struct snp_cpuid_fn {
+       u32 eax_in;
+       u32 ecx_in;
+       u64 xcr0_in;
+       u64 xss_in;
+       u32 eax;
+       u32 ebx;
+       u32 ecx;
+       u32 edx;
+       u64 __reserved;
+} __packed;
+
+/*
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
+ * of 64 entries per CPUID table.
+ */
+#define SNP_CPUID_COUNT_MAX 64
+
+struct snp_cpuid_table {
+       u32 count;
+       u32 __reserved1;
+       u64 __reserved2;
+       struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
+} __packed;
+
+/*
+ * Since feature negotiation related variables are set early in the boot
+ * process they must reside in the .data section so as not to be zeroed
+ * out when the .bss section is later cleared.
+ *
+ * GHCB protocol version negotiated with the hypervisor.
+ */
+static u16 ghcb_version __ro_after_init;
+
+/* Copy of the SNP firmware's CPUID page. */
+static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
+
+/*
+ * These will be initialized based on CPUID table so that non-present
+ * all-zero leaves (for sparse tables) can be differentiated from
+ * invalid/out-of-range leaves. This is needed since all-zero leaves
+ * still need to be post-processed.
+ */
+static u32 cpuid_std_range_max __ro_after_init;
+static u32 cpuid_hyp_range_max __ro_after_init;
+static u32 cpuid_ext_range_max __ro_after_init;
+
 static bool __init sev_es_check_cpu_features(void)
 {
        if (!has_cpuflag(X86_FEATURE_RDRAND)) {
@@ -24,15 +86,12 @@ static bool __init sev_es_check_cpu_features(void)
        return true;
 }
 
-static void __noreturn sev_es_terminate(unsigned int reason)
+static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
 {
        u64 val = GHCB_MSR_TERM_REQ;
 
-       /*
-        * Tell the hypervisor what went wrong - only reason-set 0 is
-        * currently supported.
-        */
-       val |= GHCB_SEV_TERM_REASON(0, reason);
+       /* Tell the hypervisor what went wrong. */
+       val |= GHCB_SEV_TERM_REASON(set, reason);
 
        /* Request Guest Termination from Hypvervisor */
        sev_es_wr_ghcb_msr(val);
@@ -42,6 +101,42 @@ static void __noreturn sev_es_terminate(unsigned int reason)
                asm volatile("hlt\n" : : : "memory");
 }
 
+/*
+ * The hypervisor features are available from GHCB version 2 onward.
+ */
+static u64 get_hv_features(void)
+{
+       u64 val;
+
+       if (ghcb_version < 2)
+               return 0;
+
+       sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
+       VMGEXIT();
+
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
+               return 0;
+
+       return GHCB_MSR_HV_FT_RESP_VAL(val);
+}
+
+static void snp_register_ghcb_early(unsigned long paddr)
+{
+       unsigned long pfn = paddr >> PAGE_SHIFT;
+       u64 val;
+
+       sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
+       VMGEXIT();
+
+       val = sev_es_rd_ghcb_msr();
+
+       /* If the response GPA is not ours then abort the guest */
+       if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
+           (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
+}
+
 static bool sev_es_negotiate_protocol(void)
 {
        u64 val;
@@ -54,10 +149,12 @@ static bool sev_es_negotiate_protocol(void)
        if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
                return false;
 
-       if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
-           GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
+       if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+           GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
                return false;
 
+       ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
+
        return true;
 }
 
@@ -104,10 +201,7 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
 
        if (ret == 1) {
                u64 info = ghcb->save.sw_exit_info_2;
-               unsigned long v;
-
-               info = ghcb->save.sw_exit_info_2;
-               v = info & SVM_EVTINJ_VEC_MASK;
+               unsigned long v = info & SVM_EVTINJ_VEC_MASK;
 
                /* Check if exception information from hypervisor is sane. */
                if ((info & SVM_EVTINJ_VALID) &&
@@ -130,7 +224,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
                                   u64 exit_info_1, u64 exit_info_2)
 {
        /* Fill in protocol and format specifiers */
-       ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+       ghcb->protocol_version = ghcb_version;
        ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
 
        ghcb_set_sw_exit_code(ghcb, exit_code);
@@ -150,6 +244,290 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
        return verify_exception_info(ghcb, ctxt);
 }
 
+static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
+{
+       u64 val;
+
+       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+               return -EIO;
+
+       *reg = (val >> 32);
+
+       return 0;
+}
+
+static int sev_cpuid_hv(struct cpuid_leaf *leaf)
+{
+       int ret;
+
+       /*
+        * MSR protocol does not support fetching non-zero subfunctions, but is
+        * sufficient to handle current early-boot cases. Should that change,
+        * make sure to report an error rather than ignoring the index and
+        * grabbing random values. If this issue arises in the future, handling
+        * can be added here to use GHCB-page protocol for cases that occur late
+        * enough in boot that GHCB page is available.
+        */
+       if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
+               return -EINVAL;
+
+       ret =         __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
+       ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
+       ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
+       ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
+
+       return ret;
+}
+
+/*
+ * This may be called early while still running on the initial identity
+ * mapping. Use RIP-relative addressing to obtain the correct address
+ * while running with the initial identity mapping as well as the
+ * switch-over to kernel virtual addresses later.
+ */
+static const struct snp_cpuid_table *snp_cpuid_get_table(void)
+{
+       void *ptr;
+
+       asm ("lea cpuid_table_copy(%%rip), %0"
+            : "=r" (ptr)
+            : "p" (&cpuid_table_copy));
+
+       return ptr;
+}
+
+/*
+ * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
+ * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
+ * and 1 based on the corresponding features enabled by a particular
+ * combination of XCR0 and XSS registers so that a guest can look up the
+ * version corresponding to the features currently enabled in its XCR0/XSS
+ * registers. The only values that differ between these versions/table
+ * entries is the enabled XSAVE area size advertised via EBX.
+ *
+ * While hypervisors may choose to make use of this support, it is more
+ * robust/secure for a guest to simply find the entry corresponding to the
+ * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
+ * XSAVE area size using subfunctions 2 through 64, as documented in APM
+ * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
+ *
+ * Since base/legacy XSAVE area size is documented as 0x240, use that value
+ * directly rather than relying on the base size in the CPUID table.
+ *
+ * Return: XSAVE area size on success, 0 otherwise.
+ */
+static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+       u64 xfeatures_found = 0;
+       u32 xsave_size = 0x240;
+       int i;
+
+       for (i = 0; i < cpuid_table->count; i++) {
+               const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
+
+               if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
+                       continue;
+               if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
+                       continue;
+               if (xfeatures_found & (BIT_ULL(e->ecx_in)))
+                       continue;
+
+               xfeatures_found |= (BIT_ULL(e->ecx_in));
+
+               if (compacted)
+                       xsave_size += e->eax;
+               else
+                       xsave_size = max(xsave_size, e->eax + e->ebx);
+       }
+
+       /*
+        * Either the guest set unsupported XCR0/XSS bits, or the corresponding
+        * entries in the CPUID table were not present. This is not a valid
+        * state to be in.
+        */
+       if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
+               return 0;
+
+       return xsave_size;
+}
+
+static bool
+snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+       int i;
+
+       for (i = 0; i < cpuid_table->count; i++) {
+               const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
+
+               if (e->eax_in != leaf->fn)
+                       continue;
+
+               if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
+                       continue;
+
+               /*
+                * For 0xD subfunctions 0 and 1, only use the entry corresponding
+                * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
+                * See the comments above snp_cpuid_calc_xsave_size() for more
+                * details.
+                */
+               if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
+                       if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
+                               continue;
+
+               leaf->eax = e->eax;
+               leaf->ebx = e->ebx;
+               leaf->ecx = e->ecx;
+               leaf->edx = e->edx;
+
+               return true;
+       }
+
+       return false;
+}
+
+static void snp_cpuid_hv(struct cpuid_leaf *leaf)
+{
+       if (sev_cpuid_hv(leaf))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
+}
+
+static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
+{
+       struct cpuid_leaf leaf_hv = *leaf;
+
+       switch (leaf->fn) {
+       case 0x1:
+               snp_cpuid_hv(&leaf_hv);
+
+               /* initial APIC ID */
+               leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
+               /* APIC enabled bit */
+               leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
+
+               /* OSXSAVE enabled bit */
+               if (native_read_cr4() & X86_CR4_OSXSAVE)
+                       leaf->ecx |= BIT(27);
+               break;
+       case 0x7:
+               /* OSPKE enabled bit */
+               leaf->ecx &= ~BIT(4);
+               if (native_read_cr4() & X86_CR4_PKE)
+                       leaf->ecx |= BIT(4);
+               break;
+       case 0xB:
+               leaf_hv.subfn = 0;
+               snp_cpuid_hv(&leaf_hv);
+
+               /* extended APIC ID */
+               leaf->edx = leaf_hv.edx;
+               break;
+       case 0xD: {
+               bool compacted = false;
+               u64 xcr0 = 1, xss = 0;
+               u32 xsave_size;
+
+               if (leaf->subfn != 0 && leaf->subfn != 1)
+                       return 0;
+
+               if (native_read_cr4() & X86_CR4_OSXSAVE)
+                       xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+               if (leaf->subfn == 1) {
+                       /* Get XSS value if XSAVES is enabled. */
+                       if (leaf->eax & BIT(3)) {
+                               unsigned long lo, hi;
+
+                               asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
+                                                    : "c" (MSR_IA32_XSS));
+                               xss = (hi << 32) | lo;
+                       }
+
+                       /*
+                        * The PPR and APM aren't clear on what size should be
+                        * encoded in 0xD:0x1:EBX when compaction is not enabled
+                        * by either XSAVEC (feature bit 1) or XSAVES (feature
+                        * bit 3) since SNP-capable hardware has these feature
+                        * bits fixed as 1. KVM sets it to 0 in this case, but
+                        * to avoid this becoming an issue it's safer to simply
+                        * treat this as unsupported for SNP guests.
+                        */
+                       if (!(leaf->eax & (BIT(1) | BIT(3))))
+                               return -EINVAL;
+
+                       compacted = true;
+               }
+
+               xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
+               if (!xsave_size)
+                       return -EINVAL;
+
+               leaf->ebx = xsave_size;
+               }
+               break;
+       case 0x8000001E:
+               snp_cpuid_hv(&leaf_hv);
+
+               /* extended APIC ID */
+               leaf->eax = leaf_hv.eax;
+               /* compute ID */
+               leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
+               /* node ID */
+               leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
+               break;
+       default:
+               /* No fix-ups needed, use values as-is. */
+               break;
+       }
+
+       return 0;
+}
+
+/*
+ * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
+ * should be treated as fatal by caller.
+ */
+static int snp_cpuid(struct cpuid_leaf *leaf)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+       if (!cpuid_table->count)
+               return -EOPNOTSUPP;
+
+       if (!snp_cpuid_get_validated_func(leaf)) {
+               /*
+                * Some hypervisors will avoid keeping track of CPUID entries
+                * where all values are zero, since they can be handled the
+                * same as out-of-range values (all-zero). This is useful here
+                * as well as it allows virtually all guest configurations to
+                * work using a single SNP CPUID table.
+                *
+                * To allow for this, there is a need to distinguish between
+                * out-of-range entries and in-range zero entries, since the
+                * CPUID table entries are only a template that may need to be
+                * augmented with additional values for things like
+                * CPU-specific information during post-processing. So if it's
+                * not in the table, set the values to zero. Then, if they are
+                * within a valid CPUID range, proceed with post-processing
+                * using zeros as the initial values. Otherwise, skip
+                * post-processing and just return zeros immediately.
+                */
+               leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
+
+               /* Skip post-processing for out-of-range zero leafs. */
+               if (!(leaf->fn <= cpuid_std_range_max ||
+                     (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
+                     (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
+                       return 0;
+       }
+
+       return snp_cpuid_postprocess(leaf);
+}
+
 /*
  * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
  * page yet, so it only supports the MSR based communication with the
@@ -157,40 +535,33 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
  */
 void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 {
+       unsigned int subfn = lower_bits(regs->cx, 32);
        unsigned int fn = lower_bits(regs->ax, 32);
-       unsigned long val;
+       struct cpuid_leaf leaf;
+       int ret;
 
        /* Only CPUID is supported via MSR protocol */
        if (exit_code != SVM_EXIT_CPUID)
                goto fail;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
-               goto fail;
-       regs->ax = val >> 32;
+       leaf.fn = fn;
+       leaf.subfn = subfn;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
-               goto fail;
-       regs->bx = val >> 32;
+       ret = snp_cpuid(&leaf);
+       if (!ret)
+               goto cpuid_done;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+       if (ret != -EOPNOTSUPP)
                goto fail;
-       regs->cx = val >> 32;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+       if (sev_cpuid_hv(&leaf))
                goto fail;
-       regs->dx = val >> 32;
+
+cpuid_done:
+       regs->ax = leaf.eax;
+       regs->bx = leaf.ebx;
+       regs->cx = leaf.ecx;
+       regs->dx = leaf.edx;
 
        /*
         * This is a VC handler and the #VC is only raised when SEV-ES is
@@ -221,7 +592,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 
 fail:
        /* Terminate the guest */
-       sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
 }
 
 static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
@@ -481,12 +852,37 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
        return ret;
 }
 
+static int vc_handle_cpuid_snp(struct pt_regs *regs)
+{
+       struct cpuid_leaf leaf;
+       int ret;
+
+       leaf.fn = regs->ax;
+       leaf.subfn = regs->cx;
+       ret = snp_cpuid(&leaf);
+       if (!ret) {
+               regs->ax = leaf.eax;
+               regs->bx = leaf.ebx;
+               regs->cx = leaf.ecx;
+               regs->dx = leaf.edx;
+       }
+
+       return ret;
+}
+
 static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
                                      struct es_em_ctxt *ctxt)
 {
        struct pt_regs *regs = ctxt->regs;
        u32 cr4 = native_read_cr4();
        enum es_result ret;
+       int snp_cpuid_ret;
+
+       snp_cpuid_ret = vc_handle_cpuid_snp(regs);
+       if (!snp_cpuid_ret)
+               return ES_OK;
+       if (snp_cpuid_ret != -EOPNOTSUPP)
+               return ES_VMM_ERROR;
 
        ghcb_set_rax(ghcb, regs->ax);
        ghcb_set_rcx(ghcb, regs->cx);
@@ -538,3 +934,67 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
 
        return ES_OK;
 }
+
+struct cc_setup_data {
+       struct setup_data header;
+       u32 cc_blob_address;
+};
+
+/*
+ * Search for a Confidential Computing blob passed in as a setup_data entry
+ * via the Linux Boot Protocol.
+ */
+static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+{
+       struct cc_setup_data *sd = NULL;
+       struct setup_data *hdr;
+
+       hdr = (struct setup_data *)bp->hdr.setup_data;
+
+       while (hdr) {
+               if (hdr->type == SETUP_CC_BLOB) {
+                       sd = (struct cc_setup_data *)hdr;
+                       return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
+               }
+               hdr = (struct setup_data *)hdr->next;
+       }
+
+       return NULL;
+}
+
+/*
+ * Initialize the kernel's copy of the SNP CPUID table, and set up the
+ * pointer that will be used to access it.
+ *
+ * Maintaining a direct mapping of the SNP CPUID table used by firmware would
+ * be possible as an alternative, but the approach is brittle since the
+ * mapping needs to be updated in sync with all the changes to virtual memory
+ * layout and related mapping facilities throughout the boot process.
+ */
+static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+{
+       const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
+       int i;
+
+       if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
+
+       cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
+       if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
+
+       cpuid_table = snp_cpuid_get_table();
+       memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
+
+       /* Initialize CPUID ranges for range-checking. */
+       for (i = 0; i < cpuid_table->count; i++) {
+               const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+               if (fn->eax_in == 0x0)
+                       cpuid_std_range_max = fn->eax;
+               else if (fn->eax_in == 0x40000000)
+                       cpuid_hyp_range_max = fn->eax;
+               else if (fn->eax_in == 0x80000000)
+                       cpuid_ext_range_max = fn->eax;
+       }
+}
index e6d316a01fdd464bc2307e84f8362a8638712778..c05f0124c41096d948fa2be351f92f4c266c2cbe 100644 (file)
 #include <linux/memblock.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/efi.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/svm.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid.h>
+#include <asm/cmdline.h>
 
 #define DR7_RESET_VALUE        0x400
 
+/* AP INIT values as documented in the APM2  section "Processor Initialization State" */
+#define AP_INIT_CS_LIMIT               0xffff
+#define AP_INIT_DS_LIMIT               0xffff
+#define AP_INIT_LDTR_LIMIT             0xffff
+#define AP_INIT_GDTR_LIMIT             0xffff
+#define AP_INIT_IDTR_LIMIT             0xffff
+#define AP_INIT_TR_LIMIT               0xffff
+#define AP_INIT_RFLAGS_DEFAULT         0x2
+#define AP_INIT_DR6_DEFAULT            0xffff0ff0
+#define AP_INIT_GPAT_DEFAULT           0x0007040600070406ULL
+#define AP_INIT_XCR0_DEFAULT           0x1
+#define AP_INIT_X87_FTW_DEFAULT                0x5555
+#define AP_INIT_X87_FCW_DEFAULT                0x0040
+#define AP_INIT_CR0_DEFAULT            0x60000010
+#define AP_INIT_MXCSR_DEFAULT          0x1f80
+
 /* For early boot hypervisor communication in SEV-ES enabled guests */
 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
 
@@ -41,7 +64,10 @@ static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
  * Needs to be in the .data section because we need it NULL before bss is
  * cleared
  */
-static struct ghcb __initdata *boot_ghcb;
+static struct ghcb *boot_ghcb __section(".data");
+
+/* Bitmap of SEV features supported by the hypervisor */
+static u64 sev_hv_features __ro_after_init;
 
 /* #VC handler runtime per-CPU data */
 struct sev_es_runtime_data {
@@ -87,6 +113,15 @@ struct ghcb_state {
 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
 
+static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
+struct sev_config {
+       __u64 debug             : 1,
+             __reserved        : 63;
+};
+
+static struct sev_config sev_cfg __read_mostly;
+
 static __always_inline bool on_vc_stack(struct pt_regs *regs)
 {
        unsigned long sp = regs->sp;
@@ -523,13 +558,68 @@ void noinstr __sev_es_nmi_complete(void)
        __sev_put_ghcb(&state);
 }
 
-static u64 get_jump_table_addr(void)
+static u64 __init get_secrets_page(void)
+{
+       u64 pa_data = boot_params.cc_blob_address;
+       struct cc_blob_sev_info info;
+       void *map;
+
+       /*
+        * The CC blob contains the address of the secrets page, check if the
+        * blob is present.
+        */
+       if (!pa_data)
+               return 0;
+
+       map = early_memremap(pa_data, sizeof(info));
+       if (!map) {
+               pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n");
+               return 0;
+       }
+       memcpy(&info, map, sizeof(info));
+       early_memunmap(map, sizeof(info));
+
+       /* smoke-test the secrets page passed */
+       if (!info.secrets_phys || info.secrets_len != PAGE_SIZE)
+               return 0;
+
+       return info.secrets_phys;
+}
+
+static u64 __init get_snp_jump_table_addr(void)
+{
+       struct snp_secrets_page_layout *layout;
+       void __iomem *mem;
+       u64 pa, addr;
+
+       pa = get_secrets_page();
+       if (!pa)
+               return 0;
+
+       mem = ioremap_encrypted(pa, PAGE_SIZE);
+       if (!mem) {
+               pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
+               return 0;
+       }
+
+       layout = (__force struct snp_secrets_page_layout *)mem;
+
+       addr = layout->os_area.ap_jump_table_pa;
+       iounmap(mem);
+
+       return addr;
+}
+
+static u64 __init get_jump_table_addr(void)
 {
        struct ghcb_state state;
        unsigned long flags;
        struct ghcb *ghcb;
        u64 ret = 0;
 
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return get_snp_jump_table_addr();
+
        local_irq_save(flags);
 
        ghcb = __sev_get_ghcb(&state);
@@ -553,7 +643,496 @@ static u64 get_jump_table_addr(void)
        return ret;
 }
 
-int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
+static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
+{
+       unsigned long vaddr_end;
+       int rc;
+
+       vaddr = vaddr & PAGE_MASK;
+       vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+       while (vaddr < vaddr_end) {
+               rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+               if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
+                       sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+               vaddr = vaddr + PAGE_SIZE;
+       }
+}
+
+static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op)
+{
+       unsigned long paddr_end;
+       u64 val;
+
+       paddr = paddr & PAGE_MASK;
+       paddr_end = paddr + (npages << PAGE_SHIFT);
+
+       while (paddr < paddr_end) {
+               /*
+                * Use the MSR protocol because this function can be called before
+                * the GHCB is established.
+                */
+               sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+               VMGEXIT();
+
+               val = sev_es_rd_ghcb_msr();
+
+               if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
+                        "Wrong PSC response code: 0x%x\n",
+                        (unsigned int)GHCB_RESP_CODE(val)))
+                       goto e_term;
+
+               if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
+                        "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
+                        op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
+                        paddr, GHCB_MSR_PSC_RESP_VAL(val)))
+                       goto e_term;
+
+               paddr = paddr + PAGE_SIZE;
+       }
+
+       return;
+
+e_term:
+       sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+                                        unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+        /*
+         * Ask the hypervisor to mark the memory pages as private in the RMP
+         * table.
+         */
+       early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+       /* Validate the memory pages after they've been added in the RMP table. */
+       pvalidate_pages(vaddr, npages, true);
+}
+
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+                                       unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       /* Invalidate the memory pages before they are marked shared in the RMP table. */
+       pvalidate_pages(vaddr, npages, false);
+
+        /* Ask hypervisor to mark the memory pages shared in the RMP table. */
+       early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
+{
+       unsigned long vaddr, npages;
+
+       vaddr = (unsigned long)__va(paddr);
+       npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+       if (op == SNP_PAGE_STATE_PRIVATE)
+               early_snp_set_memory_private(vaddr, paddr, npages);
+       else if (op == SNP_PAGE_STATE_SHARED)
+               early_snp_set_memory_shared(vaddr, paddr, npages);
+       else
+               WARN(1, "invalid memory op %d\n", op);
+}
+
+static int vmgexit_psc(struct snp_psc_desc *desc)
+{
+       int cur_entry, end_entry, ret = 0;
+       struct snp_psc_desc *data;
+       struct ghcb_state state;
+       struct es_em_ctxt ctxt;
+       unsigned long flags;
+       struct ghcb *ghcb;
+
+       /*
+        * __sev_get_ghcb() needs to run with IRQs disabled because it is using
+        * a per-CPU GHCB.
+        */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+       if (!ghcb) {
+               ret = 1;
+               goto out_unlock;
+       }
+
+       /* Copy the input desc into GHCB shared buffer */
+       data = (struct snp_psc_desc *)ghcb->shared_buffer;
+       memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
+
+       /*
+        * As per the GHCB specification, the hypervisor can resume the guest
+        * before processing all the entries. Check whether all the entries
+        * are processed. If not, then keep retrying. Note, the hypervisor
+        * will update the data memory directly to indicate the status, so
+        * reference the data->hdr everywhere.
+        *
+        * The strategy here is to wait for the hypervisor to change the page
+        * state in the RMP table before guest accesses the memory pages. If the
+        * page state change was not successful, then later memory access will
+        * result in a crash.
+        */
+       cur_entry = data->hdr.cur_entry;
+       end_entry = data->hdr.end_entry;
+
+       while (data->hdr.cur_entry <= data->hdr.end_entry) {
+               ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
+
+               /* This will advance the shared buffer data points to. */
+               ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+
+               /*
+                * Page State Change VMGEXIT can pass error code through
+                * exit_info_2.
+                */
+               if (WARN(ret || ghcb->save.sw_exit_info_2,
+                        "SNP: PSC failed ret=%d exit_info_2=%llx\n",
+                        ret, ghcb->save.sw_exit_info_2)) {
+                       ret = 1;
+                       goto out;
+               }
+
+               /* Verify that reserved bit is not set */
+               if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
+                       ret = 1;
+                       goto out;
+               }
+
+               /*
+                * Sanity check that entry processing is not going backwards.
+                * This will happen only if hypervisor is tricking us.
+                */
+               if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
+"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
+                        end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
+                       ret = 1;
+                       goto out;
+               }
+       }
+
+out:
+       __sev_put_ghcb(&state);
+
+out_unlock:
+       local_irq_restore(flags);
+
+       return ret;
+}
+
+static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
+                             unsigned long vaddr_end, int op)
+{
+       struct psc_hdr *hdr;
+       struct psc_entry *e;
+       unsigned long pfn;
+       int i;
+
+       hdr = &data->hdr;
+       e = data->entries;
+
+       memset(data, 0, sizeof(*data));
+       i = 0;
+
+       while (vaddr < vaddr_end) {
+               if (is_vmalloc_addr((void *)vaddr))
+                       pfn = vmalloc_to_pfn((void *)vaddr);
+               else
+                       pfn = __pa(vaddr) >> PAGE_SHIFT;
+
+               e->gfn = pfn;
+               e->operation = op;
+               hdr->end_entry = i;
+
+               /*
+                * Current SNP implementation doesn't keep track of the RMP page
+                * size so use 4K for simplicity.
+                */
+               e->pagesize = RMP_PG_SIZE_4K;
+
+               vaddr = vaddr + PAGE_SIZE;
+               e++;
+               i++;
+       }
+
+       if (vmgexit_psc(data))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+static void set_pages_state(unsigned long vaddr, unsigned int npages, int op)
+{
+       unsigned long vaddr_end, next_vaddr;
+       struct snp_psc_desc *desc;
+
+       desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);
+       if (!desc)
+               panic("SNP: failed to allocate memory for PSC descriptor\n");
+
+       vaddr = vaddr & PAGE_MASK;
+       vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+       while (vaddr < vaddr_end) {
+               /* Calculate the last vaddr that fits in one struct snp_psc_desc. */
+               next_vaddr = min_t(unsigned long, vaddr_end,
+                                  (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr);
+
+               __set_pages_state(desc, vaddr, next_vaddr, op);
+
+               vaddr = next_vaddr;
+       }
+
+       kfree(desc);
+}
+
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       pvalidate_pages(vaddr, npages, false);
+
+       set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+       pvalidate_pages(vaddr, npages, true);
+}
+
+static int snp_set_vmsa(void *va, bool vmsa)
+{
+       u64 attrs;
+
+       /*
+        * Running at VMPL0 allows the kernel to change the VMSA bit for a page
+        * using the RMPADJUST instruction. However, for the instruction to
+        * succeed it must target the permissions of a lesser privileged
+        * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
+        * instruction in the AMD64 APM Volume 3).
+        */
+       attrs = 1;
+       if (vmsa)
+               attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+       return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+}
+
+#define __ATTR_BASE            (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
+#define INIT_CS_ATTRIBS                (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
+#define INIT_DS_ATTRIBS                (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
+
+#define INIT_LDTR_ATTRIBS      (SVM_SELECTOR_P_MASK | 2)
+#define INIT_TR_ATTRIBS                (SVM_SELECTOR_P_MASK | 3)
+
+static void *snp_alloc_vmsa_page(void)
+{
+       struct page *p;
+
+       /*
+        * Allocate VMSA page to work around the SNP erratum where the CPU will
+        * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
+        * collides with the RMP entry of VMSA page. The recommended workaround
+        * is to not use a large page.
+        *
+        * Allocate an 8k page which is also 8k-aligned.
+        */
+       p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+       if (!p)
+               return NULL;
+
+       split_page(p, 1);
+
+       /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
+       __free_page(p);
+
+       return page_address(p + 1);
+}
+
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
+{
+       int err;
+
+       err = snp_set_vmsa(vmsa, false);
+       if (err)
+               pr_err("clear VMSA page failed (%u), leaking page\n", err);
+       else
+               free_page((unsigned long)vmsa);
+}
+
+static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
+{
+       struct sev_es_save_area *cur_vmsa, *vmsa;
+       struct ghcb_state state;
+       unsigned long flags;
+       struct ghcb *ghcb;
+       u8 sipi_vector;
+       int cpu, ret;
+       u64 cr4;
+
+       /*
+        * The hypervisor SNP feature support check has happened earlier, just check
+        * the AP_CREATION one here.
+        */
+       if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
+               return -EOPNOTSUPP;
+
+       /*
+        * Verify the desired start IP against the known trampoline start IP
+        * to catch any future new trampolines that may be introduced that
+        * would require a new protected guest entry point.
+        */
+       if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
+                     "Unsupported SNP start_ip: %lx\n", start_ip))
+               return -EINVAL;
+
+       /* Override start_ip with known protected guest start IP */
+       start_ip = real_mode_header->sev_es_trampoline_start;
+
+       /* Find the logical CPU for the APIC ID */
+       for_each_present_cpu(cpu) {
+               if (arch_match_cpu_phys_id(cpu, apic_id))
+                       break;
+       }
+       if (cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       cur_vmsa = per_cpu(sev_vmsa, cpu);
+
+       /*
+        * A new VMSA is created each time because there is no guarantee that
+        * the current VMSA is the kernels or that the vCPU is not running. If
+        * an attempt was done to use the current VMSA with a running vCPU, a
+        * #VMEXIT of that vCPU would wipe out all of the settings being done
+        * here.
+        */
+       vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page();
+       if (!vmsa)
+               return -ENOMEM;
+
+       /* CR4 should maintain the MCE value */
+       cr4 = native_read_cr4() & X86_CR4_MCE;
+
+       /* Set the CS value based on the start_ip converted to a SIPI vector */
+       sipi_vector             = (start_ip >> 12);
+       vmsa->cs.base           = sipi_vector << 12;
+       vmsa->cs.limit          = AP_INIT_CS_LIMIT;
+       vmsa->cs.attrib         = INIT_CS_ATTRIBS;
+       vmsa->cs.selector       = sipi_vector << 8;
+
+       /* Set the RIP value based on start_ip */
+       vmsa->rip               = start_ip & 0xfff;
+
+       /* Set AP INIT defaults as documented in the APM */
+       vmsa->ds.limit          = AP_INIT_DS_LIMIT;
+       vmsa->ds.attrib         = INIT_DS_ATTRIBS;
+       vmsa->es                = vmsa->ds;
+       vmsa->fs                = vmsa->ds;
+       vmsa->gs                = vmsa->ds;
+       vmsa->ss                = vmsa->ds;
+
+       vmsa->gdtr.limit        = AP_INIT_GDTR_LIMIT;
+       vmsa->ldtr.limit        = AP_INIT_LDTR_LIMIT;
+       vmsa->ldtr.attrib       = INIT_LDTR_ATTRIBS;
+       vmsa->idtr.limit        = AP_INIT_IDTR_LIMIT;
+       vmsa->tr.limit          = AP_INIT_TR_LIMIT;
+       vmsa->tr.attrib         = INIT_TR_ATTRIBS;
+
+       vmsa->cr4               = cr4;
+       vmsa->cr0               = AP_INIT_CR0_DEFAULT;
+       vmsa->dr7               = DR7_RESET_VALUE;
+       vmsa->dr6               = AP_INIT_DR6_DEFAULT;
+       vmsa->rflags            = AP_INIT_RFLAGS_DEFAULT;
+       vmsa->g_pat             = AP_INIT_GPAT_DEFAULT;
+       vmsa->xcr0              = AP_INIT_XCR0_DEFAULT;
+       vmsa->mxcsr             = AP_INIT_MXCSR_DEFAULT;
+       vmsa->x87_ftw           = AP_INIT_X87_FTW_DEFAULT;
+       vmsa->x87_fcw           = AP_INIT_X87_FCW_DEFAULT;
+
+       /* SVME must be set. */
+       vmsa->efer              = EFER_SVME;
+
+       /*
+        * Set the SNP-specific fields for this VMSA:
+        *   VMPL level
+        *   SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
+        */
+       vmsa->vmpl              = 0;
+       vmsa->sev_features      = sev_status >> 2;
+
+       /* Switch the page over to a VMSA page now that it is initialized */
+       ret = snp_set_vmsa(vmsa, true);
+       if (ret) {
+               pr_err("set VMSA page failed (%u)\n", ret);
+               free_page((unsigned long)vmsa);
+
+               return -EINVAL;
+       }
+
+       /* Issue VMGEXIT AP Creation NAE event */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+
+       vc_ghcb_invalidate(ghcb);
+       ghcb_set_rax(ghcb, vmsa->sev_features);
+       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
+       ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
+       ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
+
+       sev_es_wr_ghcb_msr(__pa(ghcb));
+       VMGEXIT();
+
+       if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
+           lower_32_bits(ghcb->save.sw_exit_info_1)) {
+               pr_err("SNP AP Creation error\n");
+               ret = -EINVAL;
+       }
+
+       __sev_put_ghcb(&state);
+
+       local_irq_restore(flags);
+
+       /* Perform cleanup if there was an error */
+       if (ret) {
+               snp_cleanup_vmsa(vmsa);
+               vmsa = NULL;
+       }
+
+       /* Free up any previous VMSA page */
+       if (cur_vmsa)
+               snp_cleanup_vmsa(cur_vmsa);
+
+       /* Record the current VMSA page */
+       per_cpu(sev_vmsa, cpu) = vmsa;
+
+       return ret;
+}
+
+void snp_set_wakeup_secondary_cpu(void)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       /*
+        * Always set this override if SNP is enabled. This makes it the
+        * required method to start APs under SNP. If the hypervisor does
+        * not support AP creation, then no APs will be started.
+        */
+       apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
+}
+
+int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
 {
        u16 startup_cs, startup_ip;
        phys_addr_t jump_table_pa;
@@ -644,15 +1223,39 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
        return ret;
 }
 
-/*
- * This function runs on the first #VC exception after the kernel
- * switched to virtual addresses.
- */
-static bool __init sev_es_setup_ghcb(void)
+static void snp_register_per_cpu_ghcb(void)
+{
+       struct sev_es_runtime_data *data;
+       struct ghcb *ghcb;
+
+       data = this_cpu_read(runtime_data);
+       ghcb = &data->ghcb_page;
+
+       snp_register_ghcb_early(__pa(ghcb));
+}
+
+void setup_ghcb(void)
 {
+       if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+               return;
+
        /* First make sure the hypervisor talks a supported protocol. */
        if (!sev_es_negotiate_protocol())
-               return false;
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+       /*
+        * Check whether the runtime #VC exception handler is active. It uses
+        * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
+        *
+        * If SNP is active, register the per-CPU GHCB page so that the runtime
+        * exception handler can use it.
+        */
+       if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
+               if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+                       snp_register_per_cpu_ghcb();
+
+               return;
+       }
 
        /*
         * Clear the boot_ghcb. The first exception comes in before the bss
@@ -663,7 +1266,9 @@ static bool __init sev_es_setup_ghcb(void)
        /* Alright - Make the boot-ghcb public */
        boot_ghcb = &boot_ghcb_page;
 
-       return true;
+       /* SNP guest requires that GHCB GPA must be registered. */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               snp_register_ghcb_early(__pa(&boot_ghcb_page));
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -766,6 +1371,17 @@ void __init sev_es_init_vc_handling(void)
        if (!sev_es_check_cpu_features())
                panic("SEV-ES CPU Features missing");
 
+       /*
+        * SNP is supported in v2 of the GHCB spec which mandates support for HV
+        * features.
+        */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+               sev_hv_features = get_hv_features();
+
+               if (!(sev_hv_features & GHCB_HV_FT_SNP))
+                       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+       }
+
        /* Enable SEV-ES special handling */
        static_branch_enable(&sev_es_enable_key);
 
@@ -1337,7 +1953,7 @@ DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
                show_regs(regs);
 
                /* Ask hypervisor to sev_es_terminate */
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
 
                /* If that fails and we get here - just panic */
                panic("Returned from Terminate-Request to Hypervisor\n");
@@ -1383,10 +1999,6 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
        struct es_em_ctxt ctxt;
        enum es_result result;
 
-       /* Do initial setup or terminate the guest */
-       if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
-
        vc_ghcb_invalidate(boot_ghcb);
 
        result = vc_init_em_ctxt(&ctxt, regs, exit_code);
@@ -1425,6 +2037,215 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
 fail:
        show_regs(regs);
 
-       while (true)
-               halt();
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the kernel
+ * in the following ways, depending on how it is booted:
+ *
+ * - when booted via the boot/decompress kernel:
+ *   - via boot_params
+ *
+ * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
+ *   - via a setup_data entry, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       /* Boot kernel would have passed the CC blob via boot_params. */
+       if (bp->cc_blob_address) {
+               cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
+               goto found_cc_info;
+       }
+
+       /*
+        * If kernel was booted directly, without the use of the
+        * boot/decompression kernel, the CC blob may have been passed via
+        * setup_data instead.
+        */
+       cc_info = find_cc_blob_setup_data(bp);
+       if (!cc_info)
+               return NULL;
+
+found_cc_info:
+       if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+               snp_abort();
+
+       return cc_info;
+}
+
+bool __init snp_init(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       if (!bp)
+               return false;
+
+       cc_info = find_cc_blob(bp);
+       if (!cc_info)
+               return false;
+
+       setup_cpuid_table(cc_info);
+
+       /*
+        * The CC blob will be used later to access the secrets page. Cache
+        * it here like the boot kernel does.
+        */
+       bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+       return true;
+}
+
+void __init snp_abort(void)
+{
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+}
+
+static void dump_cpuid_table(void)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+       int i = 0;
+
+       pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
+               cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
+
+       for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
+               const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+               pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
+                       i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
+                       fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
+       }
+}
+
+/*
+ * It is useful from an auditing/testing perspective to provide an easy way
+ * for the guest owner to know that the CPUID table has been initialized as
+ * expected, but that initialization happens too early in boot to print any
+ * sort of indicator, and there's not really any other good place to do it,
+ * so do it here.
+ */
+static int __init report_cpuid_table(void)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+       if (!cpuid_table->count)
+               return 0;
+
+       pr_info("Using SNP CPUID table, %d entries present.\n",
+               cpuid_table->count);
+
+       if (sev_cfg.debug)
+               dump_cpuid_table();
+
+       return 0;
+}
+arch_initcall(report_cpuid_table);
+
+static int __init init_sev_config(char *str)
+{
+       char *s;
+
+       while ((s = strsep(&str, ","))) {
+               if (!strcmp(s, "debug")) {
+                       sev_cfg.debug = true;
+                       continue;
+               }
+
+               pr_info("SEV command-line option '%s' was not recognized\n", s);
+       }
+
+       return 1;
+}
+__setup("sev=", init_sev_config);
+
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err)
+{
+       struct ghcb_state state;
+       struct es_em_ctxt ctxt;
+       unsigned long flags;
+       struct ghcb *ghcb;
+       int ret;
+
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return -ENODEV;
+
+       if (!fw_err)
+               return -EINVAL;
+
+       /*
+        * __sev_get_ghcb() needs to run with IRQs disabled because it is using
+        * a per-CPU GHCB.
+        */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+       if (!ghcb) {
+               ret = -EIO;
+               goto e_restore_irq;
+       }
+
+       vc_ghcb_invalidate(ghcb);
+
+       if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
+               ghcb_set_rax(ghcb, input->data_gpa);
+               ghcb_set_rbx(ghcb, input->data_npages);
+       }
+
+       ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+       if (ret)
+               goto e_put;
+
+       if (ghcb->save.sw_exit_info_2) {
+               /* Number of expected pages are returned in RBX */
+               if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
+                   ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN)
+                       input->data_npages = ghcb_get_rbx(ghcb);
+
+               *fw_err = ghcb->save.sw_exit_info_2;
+
+               ret = -EIO;
+       }
+
+e_put:
+       __sev_put_ghcb(&state);
+e_restore_irq:
+       local_irq_restore(flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(snp_issue_guest_request);
+
+static struct platform_device sev_guest_device = {
+       .name           = "sev-guest",
+       .id             = -1,
+};
+
+static int __init snp_init_platform_device(void)
+{
+       struct sev_guest_platform_data data;
+       u64 gpa;
+
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return -ENODEV;
+
+       gpa = get_secrets_page();
+       if (!gpa)
+               return -ENODEV;
+
+       data.secrets_gpa = gpa;
+       if (platform_device_add_data(&sev_guest_device, &data, sizeof(data)))
+               return -ENODEV;
+
+       if (platform_device_register(&sev_guest_device))
+               return -ENODEV;
+
+       pr_info("SNP guest platform device initialized.\n");
+       return 0;
 }
+device_initcall(snp_init_platform_device);
index e439eb14325fa131057e93426e5f78aacad96262..9c7265b524c73ac26c0bf419ab2d0a0d5745d3bf 100644 (file)
@@ -93,7 +93,7 @@ static bool restore_sigcontext(struct pt_regs *regs,
                return false;
 
 #ifdef CONFIG_X86_32
-       set_user_gs(regs, sc.gs);
+       loadsegment(gs, sc.gs);
        regs->fs = sc.fs;
        regs->es = sc.es;
        regs->ds = sc.ds;
@@ -146,8 +146,10 @@ __unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask)
 {
 #ifdef CONFIG_X86_32
-       unsafe_put_user(get_user_gs(regs),
-                                 (unsigned int __user *)&sc->gs, Efault);
+       unsigned int gs;
+       savesegment(gs, gs);
+
+       unsafe_put_user(gs,       (unsigned int __user *)&sc->gs, Efault);
        unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
        unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
        unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
index b52407c56000e11c6a97a1d0b414e535cbc6b5dd..879ef8c72f5c0f073f47d37e7584ac65a3d0cdc8 100644 (file)
@@ -149,8 +149,10 @@ static inline void signal_compat_build_tests(void)
 
        BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18);
        BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x24);
        BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10);
        BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_flags) != 0x18);
 
        CHECK_CSI_OFFSET(_sigpoll);
        CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
index 2ef14772dc047d433e54e97a030026a69f6439df..5e7f9532a10d07760b053d6d56bcd30e463b902b 100644 (file)
@@ -56,7 +56,6 @@
 #include <linux/numa.h>
 #include <linux/pgtable.h>
 #include <linux/overflow.h>
-#include <linux/syscore_ops.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -82,6 +81,7 @@
 #include <asm/spec-ctrl.h>
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
+#include <asm/sev.h>
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -187,7 +187,7 @@ static void smp_callin(void)
         */
        set_cpu_sibling_map(raw_smp_processor_id());
 
-       init_freq_invariance(true, false);
+       ap_init_aperfmperf();
 
        /*
         * Get our bogomips.
@@ -1082,6 +1082,11 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
        unsigned long boot_error = 0;
        unsigned long timeout;
 
+#ifdef CONFIG_X86_64
+       /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
+       if (apic->wakeup_secondary_cpu_64)
+               start_ip = real_mode_header->trampoline_start64;
+#endif
        idle->thread.sp = (unsigned long)task_pt_regs(idle);
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
        initial_code = (unsigned long)start_secondary;
@@ -1123,11 +1128,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 
        /*
         * Wake up a CPU in difference cases:
-        * - Use the method in the APIC driver if it's defined
+        * - Use a method from the APIC driver if one defined, with wakeup
+        *   straight to 64-bit mode preferred over wakeup to RM.
         * Otherwise,
         * - Use an INIT boot APIC message for APs or NMI for BSP.
         */
-       if (apic->wakeup_secondary_cpu)
+       if (apic->wakeup_secondary_cpu_64)
+               boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+       else if (apic->wakeup_secondary_cpu)
                boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
        else
                boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
@@ -1397,7 +1405,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
        smp_prepare_cpus_common();
 
-       init_freq_invariance(false, false);
        smp_sanity_check();
 
        switch (apic_intr_mode) {
@@ -1430,6 +1437,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
        smp_quirk_init_udelay();
 
        speculative_store_bypass_ht_init();
+
+       snp_set_wakeup_secondary_cpu();
 }
 
 void arch_thaw_secondary_cpus_begin(void)
@@ -1847,357 +1856,3 @@ void native_play_dead(void)
 }
 
 #endif
-
-#ifdef CONFIG_X86_64
-/*
- * APERF/MPERF frequency ratio computation.
- *
- * The scheduler wants to do frequency invariant accounting and needs a <1
- * ratio to account for the 'current' frequency, corresponding to
- * freq_curr / freq_max.
- *
- * Since the frequency freq_curr on x86 is controlled by micro-controller and
- * our P-state setting is little more than a request/hint, we need to observe
- * the effective frequency 'BusyMHz', i.e. the average frequency over a time
- * interval after discarding idle time. This is given by:
- *
- *   BusyMHz = delta_APERF / delta_MPERF * freq_base
- *
- * where freq_base is the max non-turbo P-state.
- *
- * The freq_max term has to be set to a somewhat arbitrary value, because we
- * can't know which turbo states will be available at a given point in time:
- * it all depends on the thermal headroom of the entire package. We set it to
- * the turbo level with 4 cores active.
- *
- * Benchmarks show that's a good compromise between the 1C turbo ratio
- * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
- * which would ignore the entire turbo range (a conspicuous part, making
- * freq_curr/freq_max always maxed out).
- *
- * An exception to the heuristic above is the Atom uarch, where we choose the
- * highest turbo level for freq_max since Atom's are generally oriented towards
- * power efficiency.
- *
- * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
- * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
- */
-
-DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
-
-static DEFINE_PER_CPU(u64, arch_prev_aperf);
-static DEFINE_PER_CPU(u64, arch_prev_mperf);
-static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
-static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
-
-void arch_set_max_freq_ratio(bool turbo_disabled)
-{
-       arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
-                                       arch_turbo_freq_ratio;
-}
-EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
-
-static bool turbo_disabled(void)
-{
-       u64 misc_en;
-       int err;
-
-       err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
-       if (err)
-               return false;
-
-       return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
-}
-
-static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
-       int err;
-
-       err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
-       if (err)
-               return false;
-
-       err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
-       *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
-
-       return true;
-}
-
-#define X86_MATCH(model)                                       \
-       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,            \
-               INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
-
-static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
-       X86_MATCH(XEON_PHI_KNL),
-       X86_MATCH(XEON_PHI_KNM),
-       {}
-};
-
-static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
-       X86_MATCH(SKYLAKE_X),
-       {}
-};
-
-static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
-       X86_MATCH(ATOM_GOLDMONT),
-       X86_MATCH(ATOM_GOLDMONT_D),
-       X86_MATCH(ATOM_GOLDMONT_PLUS),
-       {}
-};
-
-static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
-                               int num_delta_fratio)
-{
-       int fratio, delta_fratio, found;
-       int err, i;
-       u64 msr;
-
-       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
-       if (err)
-               return false;
-
-       fratio = (msr >> 8) & 0xFF;
-       i = 16;
-       found = 0;
-       do {
-               if (found >= num_delta_fratio) {
-                       *turbo_freq = fratio;
-                       return true;
-               }
-
-               delta_fratio = (msr >> (i + 5)) & 0x7;
-
-               if (delta_fratio) {
-                       found += 1;
-                       fratio -= delta_fratio;
-               }
-
-               i += 8;
-       } while (i < 64);
-
-       return true;
-}
-
-static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
-{
-       u64 ratios, counts;
-       u32 group_size;
-       int err, i;
-
-       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
-       if (err)
-               return false;
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
-       if (err)
-               return false;
-
-       for (i = 0; i < 64; i += 8) {
-               group_size = (counts >> i) & 0xFF;
-               if (group_size >= size) {
-                       *turbo_freq = (ratios >> i) & 0xFF;
-                       return true;
-               }
-       }
-
-       return false;
-}
-
-static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
-       u64 msr;
-       int err;
-
-       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
-       if (err)
-               return false;
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
-       *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
-
-       /* The CPU may have less than 4 cores */
-       if (!*turbo_freq)
-               *turbo_freq = msr & 0xFF;         /* 1C turbo    */
-
-       return true;
-}
-
-static bool intel_set_max_freq_ratio(void)
-{
-       u64 base_freq, turbo_freq;
-       u64 turbo_ratio;
-
-       if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
-               goto out;
-
-       if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
-           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
-               goto out;
-
-       if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
-           knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
-               goto out;
-
-       if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
-           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
-               goto out;
-
-       if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
-               goto out;
-
-       return false;
-
-out:
-       /*
-        * Some hypervisors advertise X86_FEATURE_APERFMPERF
-        * but then fill all MSR's with zeroes.
-        * Some CPUs have turbo boost but don't declare any turbo ratio
-        * in MSR_TURBO_RATIO_LIMIT.
-        */
-       if (!base_freq || !turbo_freq) {
-               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
-               return false;
-       }
-
-       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
-       if (!turbo_ratio) {
-               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
-               return false;
-       }
-
-       arch_turbo_freq_ratio = turbo_ratio;
-       arch_set_max_freq_ratio(turbo_disabled());
-
-       return true;
-}
-
-static void init_counter_refs(void)
-{
-       u64 aperf, mperf;
-
-       rdmsrl(MSR_IA32_APERF, aperf);
-       rdmsrl(MSR_IA32_MPERF, mperf);
-
-       this_cpu_write(arch_prev_aperf, aperf);
-       this_cpu_write(arch_prev_mperf, mperf);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static struct syscore_ops freq_invariance_syscore_ops = {
-       .resume = init_counter_refs,
-};
-
-static void register_freq_invariance_syscore_ops(void)
-{
-       /* Bail out if registered already. */
-       if (freq_invariance_syscore_ops.node.prev)
-               return;
-
-       register_syscore_ops(&freq_invariance_syscore_ops);
-}
-#else
-static inline void register_freq_invariance_syscore_ops(void) {}
-#endif
-
-void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-       bool ret = false;
-
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return;
-
-       if (secondary) {
-               if (static_branch_likely(&arch_scale_freq_key)) {
-                       init_counter_refs();
-               }
-               return;
-       }
-
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-               ret = intel_set_max_freq_ratio();
-       else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-               if (!cppc_ready) {
-                       return;
-               }
-               ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
-       }
-
-       if (ret) {
-               init_counter_refs();
-               static_branch_enable(&arch_scale_freq_key);
-               register_freq_invariance_syscore_ops();
-               pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
-       } else {
-               pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
-       }
-}
-
-static void disable_freq_invariance_workfn(struct work_struct *work)
-{
-       static_branch_disable(&arch_scale_freq_key);
-}
-
-static DECLARE_WORK(disable_freq_invariance_work,
-                   disable_freq_invariance_workfn);
-
-DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
-
-void arch_scale_freq_tick(void)
-{
-       u64 freq_scale;
-       u64 aperf, mperf;
-       u64 acnt, mcnt;
-
-       if (!arch_scale_freq_invariant())
-               return;
-
-       rdmsrl(MSR_IA32_APERF, aperf);
-       rdmsrl(MSR_IA32_MPERF, mperf);
-
-       acnt = aperf - this_cpu_read(arch_prev_aperf);
-       mcnt = mperf - this_cpu_read(arch_prev_mperf);
-
-       this_cpu_write(arch_prev_aperf, aperf);
-       this_cpu_write(arch_prev_mperf, mperf);
-
-       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
-               goto error;
-
-       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
-               goto error;
-
-       freq_scale = div64_u64(acnt, mcnt);
-       if (!freq_scale)
-               goto error;
-
-       if (freq_scale > SCHED_CAPACITY_SCALE)
-               freq_scale = SCHED_CAPACITY_SCALE;
-
-       this_cpu_write(arch_freq_scale, freq_scale);
-       return;
-
-error:
-       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
-       schedule_work(&disable_freq_invariance_work);
-}
-#endif /* CONFIG_X86_64 */
index 660b78827638fd946ed2d538c30d1896eb9156d8..8cc653ffdccd72904b14225fc1e0bb57f17e7eb7 100644 (file)
@@ -68,9 +68,6 @@ static int __init control_va_addr_alignment(char *str)
        if (*str == 0)
                return 1;
 
-       if (*str == '=')
-               str++;
-
        if (!strcmp(str, "32"))
                va_align.flags = ALIGN_VA_32;
        else if (!strcmp(str, "64"))
@@ -80,11 +77,11 @@ static int __init control_va_addr_alignment(char *str)
        else if (!strcmp(str, "on"))
                va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
        else
-               return 0;
+               pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
 
        return 1;
 }
-__setup("align_va_addr", control_va_addr_alignment);
+__setup("align_va_addr=", control_va_addr_alignment);
 
 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
                unsigned long, prot, unsigned long, flags,
index 1563fb9950059d8724f0b7b252aec316e3349245..d62b2cb85ceae55407d02a7ff5678e0df67f3ec3 100644 (file)
@@ -62,6 +62,7 @@
 #include <asm/insn.h>
 #include <asm/insn-eval.h>
 #include <asm/vdso.h>
+#include <asm/tdx.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -686,13 +687,40 @@ static bool try_fixup_enqcmd_gp(void)
 #endif
 }
 
+static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
+                                   unsigned long error_code, const char *str)
+{
+       if (fixup_exception(regs, trapnr, error_code, 0))
+               return true;
+
+       current->thread.error_code = error_code;
+       current->thread.trap_nr = trapnr;
+
+       /*
+        * To be potentially processing a kprobe fault and to trust the result
+        * from kprobe_running(), we have to be non-preemptible.
+        */
+       if (!preemptible() && kprobe_running() &&
+           kprobe_fault_handler(regs, trapnr))
+               return true;
+
+       return notify_die(DIE_GPF, str, regs, error_code, trapnr, SIGSEGV) == NOTIFY_STOP;
+}
+
+static void gp_user_force_sig_segv(struct pt_regs *regs, int trapnr,
+                                  unsigned long error_code, const char *str)
+{
+       current->thread.error_code = error_code;
+       current->thread.trap_nr = trapnr;
+       show_signal(current, SIGSEGV, "", str, regs, error_code);
+       force_sig(SIGSEGV);
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
        char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
        enum kernel_gp_hint hint = GP_NO_HINT;
-       struct task_struct *tsk;
        unsigned long gp_addr;
-       int ret;
 
        if (user_mode(regs) && try_fixup_enqcmd_gp())
                return;
@@ -711,40 +739,18 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
                return;
        }
 
-       tsk = current;
-
        if (user_mode(regs)) {
                if (fixup_iopl_exception(regs))
                        goto exit;
 
-               tsk->thread.error_code = error_code;
-               tsk->thread.trap_nr = X86_TRAP_GP;
-
                if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
                        goto exit;
 
-               show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
-               force_sig(SIGSEGV);
+               gp_user_force_sig_segv(regs, X86_TRAP_GP, error_code, desc);
                goto exit;
        }
 
-       if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
-               goto exit;
-
-       tsk->thread.error_code = error_code;
-       tsk->thread.trap_nr = X86_TRAP_GP;
-
-       /*
-        * To be potentially processing a kprobe fault and to trust the result
-        * from kprobe_running(), we have to be non-preemptible.
-        */
-       if (!preemptible() &&
-           kprobe_running() &&
-           kprobe_fault_handler(regs, X86_TRAP_GP))
-               goto exit;
-
-       ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
-       if (ret == NOTIFY_STOP)
+       if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
                goto exit;
 
        if (error_code)
@@ -892,14 +898,10 @@ sync:
 }
 #endif
 
-struct bad_iret_stack {
-       void *error_entry_ret;
-       struct pt_regs regs;
-};
-
-asmlinkage __visible noinstr
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
 {
+       struct pt_regs tmp, *new_stack;
+
        /*
         * This is called from entry_64.S early in handling a fault
         * caused by a bad iret to user mode.  To handle the fault
@@ -908,19 +910,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
         * just below the IRET frame) and we want to pretend that the
         * exception came from the IRET target.
         */
-       struct bad_iret_stack tmp, *new_stack =
-               (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+       new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
        /* Copy the IRET target to the temporary storage. */
-       __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
+       __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
 
        /* Copy the remainder of the stack from the current stack. */
-       __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
+       __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
 
        /* Update the entry stack */
        __memcpy(new_stack, &tmp, sizeof(tmp));
 
-       BUG_ON(!user_mode(&new_stack->regs));
+       BUG_ON(!user_mode(new_stack));
        return new_stack;
 }
 #endif
@@ -1343,6 +1344,91 @@ DEFINE_IDTENTRY(exc_device_not_available)
        }
 }
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+#define VE_FAULT_STR "VE fault"
+
+static void ve_raise_fault(struct pt_regs *regs, long error_code)
+{
+       if (user_mode(regs)) {
+               gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
+               return;
+       }
+
+       if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
+               return;
+
+       die_addr(VE_FAULT_STR, regs, error_code, 0);
+}
+
+/*
+ * Virtualization Exceptions (#VE) are delivered to TDX guests due to
+ * specific guest actions which may happen in either user space or the
+ * kernel:
+ *
+ *  * Specific instructions (WBINVD, for example)
+ *  * Specific MSR accesses
+ *  * Specific CPUID leaf accesses
+ *  * Access to specific guest physical addresses
+ *
+ * In the settings that Linux will run in, virtualization exceptions are
+ * never generated on accesses to normal, TD-private memory that has been
+ * accepted (by BIOS or with tdx_enc_status_changed()).
+ *
+ * Syscall entry code has a critical window where the kernel stack is not
+ * yet set up. Any exception in this window leads to hard to debug issues
+ * and can be exploited for privilege escalation. Exceptions in the NMI
+ * entry code also cause issues. Returning from the exception handler with
+ * IRET will re-enable NMIs and nested NMI will corrupt the NMI stack.
+ *
+ * For these reasons, the kernel avoids #VEs during the syscall gap and
+ * the NMI entry code. Entry code paths do not access TD-shared memory,
+ * MMIO regions, use #VE triggering MSRs, instructions, or CPUID leaves
+ * that might generate #VE. VMM can remove memory from TD at any point,
+ * but access to unaccepted (or missing) private memory leads to VM
+ * termination, not to #VE.
+ *
+ * Similarly to page faults and breakpoints, #VEs are allowed in NMI
+ * handlers once the kernel is ready to deal with nested NMIs.
+ *
+ * During #VE delivery, all interrupts, including NMIs, are blocked until
+ * TDGETVEINFO is called. It prevents #VE nesting until the kernel reads
+ * the VE info.
+ *
+ * If a guest kernel action which would normally cause a #VE occurs in
+ * the interrupt-disabled region before TDGETVEINFO, a #DF (fault
+ * exception) is delivered to the guest which will result in an oops.
+ *
+ * The entry code has been audited carefully for following these expectations.
+ * Changes in the entry code have to be audited for correctness vs. this
+ * aspect. Similarly to #PF, #VE in these places will expose kernel to
+ * privilege escalation or may lead to random crashes.
+ */
+DEFINE_IDTENTRY(exc_virtualization_exception)
+{
+       struct ve_info ve;
+
+       /*
+        * NMIs/Machine-checks/Interrupts will be in a disabled state
+        * till TDGETVEINFO TDCALL is executed. This ensures that VE
+        * info cannot be overwritten by a nested #VE.
+        */
+       tdx_get_ve_info(&ve);
+
+       cond_local_irq_enable(regs);
+
+       /*
+        * If tdx_handle_virt_exception() could not process
+        * it successfully, treat it as #GP(0) and handle it.
+        */
+       if (!tdx_handle_virt_exception(regs, &ve))
+               ve_raise_fault(regs, 0);
+
+       cond_local_irq_disable(regs);
+}
+
+#endif
+
 #ifdef CONFIG_X86_32
 DEFINE_IDTENTRY_SW(iret_error)
 {
index c21bcd668284259d8f8833205a936106a8010af6..e9e803a4d44cf6ffe8938071a18b305feafdbf81 100644 (file)
@@ -151,7 +151,7 @@ exit_vm86:
 
        memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
 
-       lazy_load_gs(vm86->regs32.gs);
+       loadsegment(gs, vm86->regs32.gs);
 
        regs->pt.ax = retval;
        return;
@@ -325,7 +325,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
  * Save old state
  */
        vm86->saved_sp0 = tsk->thread.sp0;
-       lazy_save_gs(vm86->regs32.gs);
+       savesegment(gs, vm86->regs32.gs);
 
        /* make room for real-mode segments */
        preempt_disable();
index 0c1ba6aa07651f4d2698b004c35c27aa1b406118..de6d44e07e348fcdae81841f8dd516fdea9781bf 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
 #include <asm/sgx.h>
+#include <asm/cpuid.h>
 #include "cpuid.h"
 #include "lapic.h"
 #include "mmu.h"
@@ -744,24 +745,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
        cpuid_count(entry->function, entry->index,
                    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
 
-       switch (function) {
-       case 4:
-       case 7:
-       case 0xb:
-       case 0xd:
-       case 0xf:
-       case 0x10:
-       case 0x12:
-       case 0x14:
-       case 0x17:
-       case 0x18:
-       case 0x1d:
-       case 0x1e:
-       case 0x1f:
-       case 0x8000001d:
+       if (cpuid_function_is_indexed(function))
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-               break;
-       }
 
        return entry;
 }
index 46f9dfb6046947ac62a2c86ef18e0b6ef591a2f4..a0702b6be3e8979b894ce33c070bc613411b4adc 100644 (file)
@@ -1914,7 +1914,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
        struct hv_send_ipi_ex send_ipi_ex;
        struct hv_send_ipi send_ipi;
        DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
-       unsigned long valid_bank_mask;
+       u64 valid_bank_mask;
        u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
        u32 vector;
        bool all_cpus;
@@ -1956,7 +1956,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
                valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
                all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
 
-               if (hc->var_cnt != bitmap_weight(&valid_bank_mask, 64))
+               if (hc->var_cnt != bitmap_weight((unsigned long *)&valid_bank_mask, 64))
                        return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
                if (all_cpus)
index 311e4e1d7870ee18be21d97eb69eb3865dd8d580..45e1573f8f1d3e0497c14f063c6d431dd255eadf 100644 (file)
@@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
        uint i;
 
        if (pcid == kvm_get_active_pcid(vcpu)) {
-               mmu->invlpg(vcpu, gva, mmu->root.hpa);
+               if (mmu->invlpg)
+                       mmu->invlpg(vcpu, gva, mmu->root.hpa);
                tlb_flush = true;
        }
 
        for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
                if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
                    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
-                       mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+                       if (mmu->invlpg)
+                               mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
                        tlb_flush = true;
                }
        }
@@ -5665,6 +5667,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
        int nr_zapped, batch = 0;
+       bool unstable;
 
 restart:
        list_for_each_entry_safe_reverse(sp, node,
@@ -5696,11 +5699,12 @@ restart:
                        goto restart;
                }
 
-               if (__kvm_mmu_prepare_zap_page(kvm, sp,
-                               &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
-                       batch += nr_zapped;
+               unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
+                               &kvm->arch.zapped_obsolete_pages, &nr_zapped);
+               batch += nr_zapped;
+
+               if (unstable)
                        goto restart;
-               }
        }
 
        /*
index eca39f56c23153556c094104b59ea52a1b1e0d91..0604bc29f0b8c947fdeb3291c3f0ce033271064c 100644 (file)
@@ -171,9 +171,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
        return true;
 }
 
-static int cmp_u64(const void *a, const void *b)
+static int cmp_u64(const void *pa, const void *pb)
 {
-       return *(__u64 *)a - *(__u64 *)b;
+       u64 a = *(u64 *)pa;
+       u64 b = *(u64 *)pb;
+
+       return (a > b) - (a < b);
 }
 
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
index 7c392873626fd6e1cbc17a4559e736201e6325d7..636c77ef55fc3be26d9b6e2cd8b19f5cfecbc4cc 100644 (file)
@@ -562,12 +562,20 @@ e_unpin:
 
 static int sev_es_sync_vmsa(struct vcpu_svm *svm)
 {
-       struct vmcb_save_area *save = &svm->vmcb->save;
+       struct sev_es_save_area *save = svm->sev_es.vmsa;
 
        /* Check some debug related fields before encrypting the VMSA */
-       if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+       if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
                return -EINVAL;
 
+       /*
+        * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+        * the traditional VMSA that is part of the VMCB. Copy the
+        * traditional VMSA as it has been built so far (in prep
+        * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+        */
+       memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save));
+
        /* Sync registgers */
        save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
        save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
@@ -595,14 +603,6 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
        save->xss  = svm->vcpu.arch.ia32_xss;
        save->dr6  = svm->vcpu.arch.dr6;
 
-       /*
-        * SEV-ES will use a VMSA that is pointed to by the VMCB, not
-        * the traditional VMSA that is part of the VMCB. Copy the
-        * traditional VMSA as it has been built so far (in prep
-        * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
-        */
-       memcpy(svm->sev_es.vmsa, save, sizeof(*save));
-
        return 0;
 }
 
@@ -2966,7 +2966,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
                                            sev_enc_bit));
 }
 
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa)
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
 {
        /*
         * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
index 7e45d03cd018a5cc354936fcebc5b14d43c2cbcc..17d334ef54308229fe64141f3bff7562181af66f 100644 (file)
@@ -1270,8 +1270,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
         */
        vmsave(__sme_page_pa(sd->save_area));
        if (sev_es_guest(vcpu->kvm)) {
-               struct vmcb_save_area *hostsa;
-               hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+               struct sev_es_save_area *hostsa;
+               hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
 
                sev_es_prepare_switch_to_guest(hostsa);
        }
@@ -3117,8 +3117,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "tr:",
               save01->tr.selector, save01->tr.attrib,
               save01->tr.limit, save01->tr.base);
-       pr_err("cpl:            %d                efer:         %016llx\n",
-               save->cpl, save->efer);
+       pr_err("vmpl: %d   cpl:  %d               efer:          %016llx\n",
+              save->vmpl, save->cpl, save->efer);
        pr_err("%-15s %016llx %-13s %016llx\n",
               "cr0:", save->cr0, "cr2:", save->cr2);
        pr_err("%-15s %016llx %-13s %016llx\n",
index f76deff71002cbbd3403f43faf7a773bfb14280d..2d83845b9032fe858f60ba026d20fccc399f7526 100644 (file)
@@ -181,7 +181,7 @@ struct svm_nested_state {
 
 struct vcpu_sev_es_state {
        /* SEV-ES support */
-       struct vmcb_save_area *vmsa;
+       struct sev_es_save_area *vmsa;
        struct ghcb *ghcb;
        struct kvm_host_map ghcb_map;
        bool received_first_sipi;
@@ -622,7 +622,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
 void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_vcpu_reset(struct vcpu_svm *svm);
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa);
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
 void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 
 /* vmenter.S */
index 65d15df6212d67e4b936f0b9e764efd2b7737308..0e65d00e2339ff95e022d03ee9012346d2ead8b3 100644 (file)
@@ -54,8 +54,8 @@ static void delay_loop(u64 __loops)
                "       jnz 2b          \n"
                "3:     dec %0          \n"
 
-               : /* we don't need output */
-               :"a" (loops)
+               : "+a" (loops)
+               :
        );
 }
 
index b781d324211bb7ab91b506ea3bf847a7e854d330..21104c41cba04868e54d14f93a3592b9fa2541ba 100644 (file)
@@ -342,9 +342,9 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
  */
 static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
 {
-#ifdef CONFIG_X86_64
        unsigned short sel;
 
+#ifdef CONFIG_X86_64
        switch (seg_reg_idx) {
        case INAT_SEG_REG_IGNORE:
                return 0;
@@ -402,7 +402,8 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
        case INAT_SEG_REG_FS:
                return (unsigned short)(regs->fs & 0xffff);
        case INAT_SEG_REG_GS:
-               return get_user_gs(regs);
+               savesegment(gs, sel);
+               return sel;
        case INAT_SEG_REG_IGNORE:
        default:
                return -EINVAL;
index 2b3eb8c948a3d0d303d206f013ef78f840bf693a..a58f451a7dd32e30215019ca35ffab444c8ccc43 100644 (file)
@@ -11,7 +11,7 @@
 #include <asm/msr.h>
 #include <asm/archrandom.h>
 #include <asm/e820/api.h>
-#include <asm/io.h>
+#include <asm/shared/io.h>
 
 /*
  * When built for the regular kernel, several functions need to be stubbed out
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
deleted file mode 100644 (file)
index e69de29..0000000
index b82ca14ba71826a247268de49e4a8d22d81b216b..4a9fd9029a53646c6ecfc14aad4d07a2e5bedb52 100644 (file)
@@ -153,7 +153,7 @@ static long pm_address(u_char FPU_modrm, u_char segment,
        switch (segment) {
        case PREFIX_GS_ - 1:
                /* user gs handling can be lazy, use special accessors */
-               addr->selector = get_user_gs(FPU_info->regs);
+               savesegment(gs, addr->selector);
                break;
        default:
                addr->selector = PM_REG_(segment);
index fe3d3061fc116a7780e28e3f604aef4aa0ea185d..d957dc15b3712890af49639cfe470208609d9f37 100644 (file)
@@ -20,13 +20,12 @@ CFLAGS_REMOVE_mem_encrypt_identity.o        = -pg
 endif
 
 obj-y                          :=  init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
-                                   pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+                                   pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o
 
 obj-y                          += pat/
 
 # Make sure __phys_addr has no stackprotector
 CFLAGS_physaddr.o              := -fno-stack-protector
-CFLAGS_setup_nx.o              := -fno-stack-protector
 CFLAGS_mem_encrypt_identity.o  := -fno-stack-protector
 
 CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
index 058b2f36b3a6e07f59ca02b6ccba52280694993c..b3ca7d23e4b01c7ae719e954408c234d21d89cdf 100644 (file)
@@ -154,7 +154,7 @@ int __init amd_numa_init(void)
                node_set(nodeid, numa_nodes_parsed);
        }
 
-       if (!nodes_weight(numa_nodes_parsed))
+       if (nodes_empty(numa_nodes_parsed))
                return -ENOENT;
 
        /*
index d0074c6ed31a38ba1778bc776f857a6088b943d3..fad8faa29d042d59ab9ae0f6d89d7aaee5b8a041 100644 (file)
@@ -149,7 +149,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
                unsigned char opcode;
 
                if (user_mode(regs)) {
-                       if (get_user(opcode, instr))
+                       if (get_user(opcode, (unsigned char __user *) instr))
                                break;
                } else {
                        if (get_kernel_nofault(opcode, instr))
index 96d34ebb20a9e1e09e3a9e8eaf41f464a264d87e..61d0ab154f967c0167f0e4d69f41065152009803 100644 (file)
@@ -110,7 +110,6 @@ int force_personality32;
 /*
  * noexec32=on|off
  * Control non executable heap for 32bit processes.
- * To control the stack too use noexec=off
  *
  * on  PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
  * off PROT_READ implies PROT_EXEC
@@ -902,6 +901,8 @@ static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end
 
 static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 {
+       const unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
+
        vmemmap_flush_unused_pmd();
 
        /*
@@ -914,8 +915,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
         * Mark with PAGE_UNUSED the unused parts of the new memmap range
         */
        if (!IS_ALIGNED(start, PMD_SIZE))
-               memset((void *)start, PAGE_UNUSED,
-                       start - ALIGN_DOWN(start, PMD_SIZE));
+               memset((void *)page, PAGE_UNUSED, start - page);
 
        /*
         * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
index 17a492c27306990c2d2d4ed3b0224152ffa2651e..1ad0228f8ceb98032b6ea4e7fac0eb23e1c31d1f 100644 (file)
@@ -242,10 +242,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
         * If the page being mapped is in memory and SEV is active then
         * make sure the memory encryption attribute is enabled in the
         * resulting mapping.
+        * In TDX guests, memory is marked private by default. If encryption
+        * is not requested (using encrypted), explicitly set decrypt
+        * attribute in all IOREMAPPED memory.
         */
        prot = PAGE_KERNEL_IO;
        if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
                prot = pgprot_encrypted(prot);
+       else
+               prot = pgprot_decrypted(prot);
 
        switch (pcm) {
        case _PAGE_CACHE_MODE_UC:
index 50d209939c66cb4950d303fb17acc861d86a4d3e..11350e2fd7366fa3b850f0a24b55eced8c3b7327 100644 (file)
@@ -42,7 +42,14 @@ bool force_dma_unencrypted(struct device *dev)
 
 static void print_mem_encrypt_feature_info(void)
 {
-       pr_info("AMD Memory Encryption Features active:");
+       pr_info("Memory Encryption Features active:");
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+               pr_cont(" Intel TDX\n");
+               return;
+       }
+
+       pr_cont(" AMD");
 
        /* Secure Memory Encryption */
        if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
@@ -62,6 +69,10 @@ static void print_mem_encrypt_feature_info(void)
        if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
                pr_cont(" SEV-ES");
 
+       /* Secure Nested Paging */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               pr_cont(" SEV-SNP");
+
        pr_cont("\n");
 }
 
index 6169053c28541c96e50f5bd7e2276b564571b527..d3c88d9ef8d631a1a0e53c13fe0a3f8429e46324 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
 #include <asm/cmdline.h>
+#include <asm/sev.h>
 
 #include "mm_internal.h"
 
@@ -47,6 +48,36 @@ EXPORT_SYMBOL(sme_me_mask);
 /* Buffer used for early in-place encryption by BSP, no locking needed */
 static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
 
+/*
+ * SNP-specific routine which needs to additionally change the page state from
+ * private to shared before copying the data from the source to destination and
+ * restore after the copy.
+ */
+static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
+                                    unsigned long paddr, bool decrypt)
+{
+       unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+       if (decrypt) {
+               /*
+                * @paddr needs to be accessed decrypted, mark the page shared in
+                * the RMP table before copying it.
+                */
+               early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
+
+               memcpy(dst, src, sz);
+
+               /* Restore the page state after the memcpy. */
+               early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
+       } else {
+               /*
+                * @paddr need to be accessed encrypted, no need for the page state
+                * change.
+                */
+               memcpy(dst, src, sz);
+       }
+}
+
 /*
  * This routine does not change the underlying encryption setting of the
  * page(s) that map this memory. It assumes that eventually the memory is
@@ -95,8 +126,13 @@ static void __init __sme_early_enc_dec(resource_size_t paddr,
                 * Use a temporary buffer, of cache-line multiple size, to
                 * avoid data corruption as documented in the APM.
                 */
-               memcpy(sme_early_buffer, src, len);
-               memcpy(dst, sme_early_buffer, len);
+               if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+                       snp_memcpy(sme_early_buffer, src, len, paddr, enc);
+                       snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
+               } else {
+                       memcpy(sme_early_buffer, src, len);
+                       memcpy(dst, sme_early_buffer, len);
+               }
 
                early_memunmap(dst, len);
                early_memunmap(src, len);
@@ -280,11 +316,24 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 
 static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
 {
+       /*
+        * To maintain the security guarantees of SEV-SNP guests, make sure
+        * to invalidate the memory before encryption attribute is cleared.
+        */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
+               snp_set_memory_shared(vaddr, npages);
 }
 
 /* Return true unconditionally: return value doesn't matter for the SEV side */
 static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
 {
+       /*
+        * After memory is mapped encrypted in the page table, validate it
+        * so that it is consistent with the page table updates.
+        */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc)
+               snp_set_memory_private(vaddr, npages);
+
        if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
                enc_dec_hypercall(vaddr, npages, enc);
 
@@ -322,14 +371,28 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
        clflush_cache_range(__va(pa), size);
 
        /* Encrypt/decrypt the contents in-place */
-       if (enc)
+       if (enc) {
                sme_early_encrypt(pa, size);
-       else
+       } else {
                sme_early_decrypt(pa, size);
 
+               /*
+                * ON SNP, the page state in the RMP table must happen
+                * before the page table updates.
+                */
+               early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
+       }
+
        /* Change the page encryption mask. */
        new_pte = pfn_pte(pfn, new_prot);
        set_pte_atomic(kpte, new_pte);
+
+       /*
+        * If page is set encrypted in the page table, then update the RMP table to
+        * add this page as private.
+        */
+       if (enc)
+               early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
 }
 
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
index b43bc24d2bb6415e4fe5eb7a4bec1914d5767fb1..f415498d3175cf7f67eef6f20c72a5f247a24a2f 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/sections.h>
 #include <asm/cmdline.h>
 #include <asm/coco.h>
+#include <asm/sev.h>
 
 #include "mm_internal.h"
 
@@ -509,8 +510,11 @@ void __init sme_enable(struct boot_params *bp)
        bool active_by_default;
        unsigned long me_mask;
        char buffer[16];
+       bool snp;
        u64 msr;
 
+       snp = snp_init(bp);
+
        /* Check for the SME/SEV support leaf */
        eax = 0x80000000;
        ecx = 0;
@@ -542,6 +546,10 @@ void __init sme_enable(struct boot_params *bp)
        sev_status   = __rdmsr(MSR_AMD64_SEV);
        feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
 
+       /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
+       if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+               snp_abort();
+
        /* Check if memory encryption is enabled */
        if (feature_mask == AMD_SME_BIT) {
                /*
index 933a2ebad471bcb308e90ad9afd73ec13ddb1374..c3317f0650d81c6ff22e23022afd8fa786538823 100644 (file)
@@ -400,7 +400,7 @@ static void leave_uniprocessor(void)
        int cpu;
        int err;
 
-       if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0)
+       if (!cpumask_available(downed_cpus) || cpumask_empty(downed_cpus))
                return;
        pr_notice("Re-enabling CPUs...\n");
        for_each_cpu(cpu, downed_cpus) {
index 1a02b791d273cb1e9981663d67131f01c211753e..9a9305367fdd16e268eadff2c9f72457fe5e7203 100644 (file)
@@ -123,7 +123,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
         * Continue to fill physical nodes with fake nodes until there is no
         * memory left on any of them.
         */
-       while (nodes_weight(physnode_mask)) {
+       while (!nodes_empty(physnode_mask)) {
                for_each_node_mask(i, physnode_mask) {
                        u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
                        u64 start, limit, end;
@@ -270,7 +270,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
         * Fill physical nodes with fake nodes of size until there is no memory
         * left on any of them.
         */
-       while (nodes_weight(physnode_mask)) {
+       while (!nodes_empty(physnode_mask)) {
                for_each_node_mask(i, physnode_mask) {
                        u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
                        u64 start, limit, end;
index 4ba2a3ee4bce1258f0b86a47a47c823e3ff2eb49..d5ef64ddd35e9f38c2a533e22d79c016fd6fcc55 100644 (file)
@@ -101,7 +101,7 @@ int pat_debug_enable;
 static int __init pat_debug_setup(char *str)
 {
        pat_debug_enable = 1;
-       return 0;
+       return 1;
 }
 __setup("debugpat", pat_debug_setup);
 
index 5d5c7bb50ce9e2e9646f783d26adb1e33ea094aa..ffe3b3a087feaaa31bb0ca6b30dab8caf84857fe 100644 (file)
@@ -540,7 +540,7 @@ static inline bool pti_kernel_image_global_ok(void)
         * cases where RANDSTRUCT is in use to help keep the layout a
         * secret.
         */
-       if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
+       if (IS_ENABLED(CONFIG_RANDSTRUCT))
                return false;
 
        return true;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
deleted file mode 100644 (file)
index ed5667f..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/pgtable.h>
-
-#include <asm/proto.h>
-#include <asm/cpufeature.h>
-
-static int disable_nx;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-       if (!str)
-               return -EINVAL;
-       if (!strncmp(str, "on", 2)) {
-               disable_nx = 0;
-       } else if (!strncmp(str, "off", 3)) {
-               disable_nx = 1;
-       }
-       x86_configure_nx();
-       return 0;
-}
-early_param("noexec", noexec_setup);
-
-void x86_configure_nx(void)
-{
-       if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
-               __supported_pte_mask |= _PAGE_NX;
-       else
-               __supported_pte_mask &= ~_PAGE_NX;
-}
-
-void __init x86_report_nx(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_NX)) {
-               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-                      "missing in CPU!\n");
-       } else {
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-               if (disable_nx) {
-                       printk(KERN_INFO "NX (Execute Disable) protection: "
-                              "disabled by kernel command line option\n");
-               } else {
-                       printk(KERN_INFO "NX (Execute Disable) protection: "
-                              "active\n");
-               }
-#else
-               /* 32bit non-PAE kernel, NX cannot be used */
-               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-                      "cannot be enabled: non-PAE kernel!\n");
-#endif
-       }
-}
index 97b63e35e1528b116a8d5dc7a4e78598088d6357..a498b847d7403af4d0a4258e117b6cf4795b2e5f 100644 (file)
@@ -25,6 +25,8 @@
 #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
 #define PIRQ_VERSION 0x0100
 
+#define IRT_SIGNATURE  (('$' << 0) + ('I' << 8) + ('R' << 16) + ('T' << 24))
+
 static int broken_hp_bios_irq9;
 static int acer_tm360_irqrouting;
 
@@ -68,30 +70,99 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq;
  *  and perform checksum verification.
  */
 
-static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
+static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr,
+                                                                u8 *limit)
 {
        struct irq_routing_table *rt;
        int i;
        u8 sum;
 
-       rt = (struct irq_routing_table *) addr;
+       rt = (struct irq_routing_table *)addr;
        if (rt->signature != PIRQ_SIGNATURE ||
            rt->version != PIRQ_VERSION ||
            rt->size % 16 ||
-           rt->size < sizeof(struct irq_routing_table))
+           rt->size < sizeof(struct irq_routing_table) ||
+           (limit && rt->size > limit - addr))
                return NULL;
        sum = 0;
        for (i = 0; i < rt->size; i++)
                sum += addr[i];
        if (!sum) {
-               DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
-                       rt);
+               DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%lx\n",
+                   __pa(rt));
                return rt;
        }
        return NULL;
 }
 
+/*
+ * Handle the $IRT PCI IRQ Routing Table format used by AMI for its BCP
+ * (BIOS Configuration Program) external tool meant for tweaking BIOS
+ * structures without the need to rebuild it from sources.  The $IRT
+ * format has been invented by AMI before Microsoft has come up with its
+ * $PIR format and a $IRT table is therefore there in some systems that
+ * lack a $PIR table.
+ *
+ * It uses the same PCI BIOS 2.1 format for interrupt routing entries
+ * themselves but has a different simpler header prepended instead,
+ * occupying 8 bytes, where a `$IRT' signature is followed by one byte
+ * specifying the total number of interrupt routing entries allocated in
+ * the table, then one byte specifying the actual number of entries used
+ * (which the BCP tool can take advantage of when modifying the table),
+ * and finally a 16-bit word giving the IRQs devoted exclusively to PCI.
+ * Unlike with the $PIR table there is no alignment guarantee.
+ *
+ * Given the similarity of the two formats the $IRT one is trivial to
+ * convert to the $PIR one, which we do here, except that obviously we
+ * have no information as to the router device to use, but we can handle
+ * it by matching PCI device IDs actually seen on the bus against ones
+ * that our individual routers recognise.
+ *
+ * Reportedly there is another $IRT table format where a 16-bit word
+ * follows the header instead that points to interrupt routing entries
+ * in a $PIR table provided elsewhere.  In that case this code will not
+ * be reached though as the $PIR table will have been chosen instead.
+ */
+static inline struct irq_routing_table *pirq_convert_irt_table(u8 *addr,
+                                                              u8 *limit)
+{
+       struct irt_routing_table *ir;
+       struct irq_routing_table *rt;
+       u16 size;
+       u8 sum;
+       int i;
+
+       ir = (struct irt_routing_table *)addr;
+       if (ir->signature != IRT_SIGNATURE || !ir->used || ir->size < ir->used)
+               return NULL;
+
+       size = sizeof(*ir) + ir->used * sizeof(ir->slots[0]);
+       if (size > limit - addr)
+               return NULL;
+
+       DBG(KERN_DEBUG "PCI: $IRT Interrupt Routing Table found at 0x%lx\n",
+           __pa(ir));
 
+       size = sizeof(*rt) + ir->used * sizeof(rt->slots[0]);
+       rt = kzalloc(size, GFP_KERNEL);
+       if (!rt)
+               return NULL;
+
+       rt->signature = PIRQ_SIGNATURE;
+       rt->version = PIRQ_VERSION;
+       rt->size = size;
+       rt->exclusive_irqs = ir->exclusive_irqs;
+       for (i = 0; i < ir->used; i++)
+               rt->slots[i] = ir->slots[i];
+
+       addr = (u8 *)rt;
+       sum = 0;
+       for (i = 0; i < size; i++)
+               sum += addr[i];
+       rt->checksum = -sum;
+
+       return rt;
+}
 
 /*
  *  Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
@@ -99,17 +170,29 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
 
 static struct irq_routing_table * __init pirq_find_routing_table(void)
 {
+       u8 * const bios_start = (u8 *)__va(0xf0000);
+       u8 * const bios_end = (u8 *)__va(0x100000);
        u8 *addr;
        struct irq_routing_table *rt;
 
        if (pirq_table_addr) {
-               rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr));
+               rt = pirq_check_routing_table((u8 *)__va(pirq_table_addr),
+                                             NULL);
                if (rt)
                        return rt;
                printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
        }
-       for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
-               rt = pirq_check_routing_table(addr);
+       for (addr = bios_start;
+            addr < bios_end - sizeof(struct irq_routing_table);
+            addr += 16) {
+               rt = pirq_check_routing_table(addr, bios_end);
+               if (rt)
+                       return rt;
+       }
+       for (addr = bios_start;
+            addr < bios_end - sizeof(struct irt_routing_table);
+            addr++) {
+               rt = pirq_convert_irt_table(addr, bios_end);
                if (rt)
                        return rt;
        }
@@ -135,7 +218,8 @@ static void __init pirq_peer_trick(void)
 #ifdef DEBUG
                {
                        int j;
-                       DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+                       DBG(KERN_DEBUG "%02x:%02x.%x slot=%02x",
+                           e->bus, e->devfn / 8, e->devfn % 8, e->slot);
                        for (j = 0; j < 4; j++)
                                DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
                        DBG("\n");
@@ -253,6 +337,15 @@ static void write_pc_conf_nybble(u8 base, u8 index, u8 val)
        pc_conf_set(reg, x);
 }
 
+/*
+ * FinALi pirq rules are as follows:
+ *
+ * - bit 0 selects between INTx Routing Table Mapping Registers,
+ *
+ * - bit 3 selects the nibble within the INTx Routing Table Mapping Register,
+ *
+ * - bits 7:4 map to bits 3:0 of the PCI INTx Sensitivity Register.
+ */
 static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
                           int pirq)
 {
@@ -260,11 +353,13 @@ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
                0, 9, 3, 10, 4, 5, 7, 6, 0, 11, 0, 12, 0, 14, 0, 15
        };
        unsigned long flags;
+       u8 index;
        u8 x;
 
+       index = (pirq & 1) << 1 | (pirq & 8) >> 3;
        raw_spin_lock_irqsave(&pc_conf_lock, flags);
        pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
-       x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1)];
+       x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index)];
        pc_conf_set(PC_CONF_FINALI_LOCK, 0);
        raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
        return x;
@@ -278,13 +373,15 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
        };
        u8 val = irqmap[irq];
        unsigned long flags;
+       u8 index;
 
        if (!val)
                return 0;
 
+       index = (pirq & 1) << 1 | (pirq & 8) >> 3;
        raw_spin_lock_irqsave(&pc_conf_lock, flags);
        pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
-       write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1, val);
+       write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index, val);
        pc_conf_set(PC_CONF_FINALI_LOCK, 0);
        raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
        return 1;
@@ -293,7 +390,7 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
 static int pirq_finali_lvl(struct pci_dev *router, struct pci_dev *dev,
                           int pirq, int irq)
 {
-       u8 mask = ~(1u << (pirq - 1));
+       u8 mask = ~((pirq & 0xf0u) >> 4);
        unsigned long flags;
        u8 trig;
 
@@ -579,6 +676,81 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
        return 1;
 }
 
+
+/*
+ *     PIRQ routing for the SiS85C497 AT Bus Controller & Megacell (ATM)
+ *     ISA bridge used with the SiS 85C496/497 486 Green PC VESA/ISA/PCI
+ *     Chipset.
+ *
+ *     There are four PCI INTx#-to-IRQ Link registers provided in the
+ *     SiS85C497 part of the peculiar combined 85C496/497 configuration
+ *     space decoded by the SiS85C496 PCI & CPU Memory Controller (PCM)
+ *     host bridge, at 0xc0/0xc1/0xc2/0xc3 respectively for the PCI INT
+ *     A/B/C/D lines.  Bit 7 enables the respective link if set and bits
+ *     3:0 select the 8259A IRQ line as follows:
+ *
+ *     0000 : Reserved
+ *     0001 : Reserved
+ *     0010 : Reserved
+ *     0011 : IRQ3
+ *     0100 : IRQ4
+ *     0101 : IRQ5
+ *     0110 : IRQ6
+ *     0111 : IRQ7
+ *     1000 : Reserved
+ *     1001 : IRQ9
+ *     1010 : IRQ10
+ *     1011 : IRQ11
+ *     1100 : IRQ12
+ *     1101 : Reserved
+ *     1110 : IRQ14
+ *     1111 : IRQ15
+ *
+ *     We avoid using a reserved value for disabled links, hence the
+ *     choice of IRQ15 for that case.
+ *
+ *     References:
+ *
+ *     "486 Green PC VESA/ISA/PCI Chipset, SiS 85C496/497", Rev 3.0,
+ *     Silicon Integrated Systems Corp., July 1995
+ */
+
+#define PCI_SIS497_INTA_TO_IRQ_LINK    0xc0u
+
+#define PIRQ_SIS497_IRQ_MASK           0x0fu
+#define PIRQ_SIS497_IRQ_ENABLE         0x80u
+
+static int pirq_sis497_get(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq)
+{
+       int reg;
+       u8 x;
+
+       reg = pirq;
+       if (reg >= 1 && reg <= 4)
+               reg += PCI_SIS497_INTA_TO_IRQ_LINK - 1;
+
+       pci_read_config_byte(router, reg, &x);
+       return (x & PIRQ_SIS497_IRQ_ENABLE) ? (x & PIRQ_SIS497_IRQ_MASK) : 0;
+}
+
+static int pirq_sis497_set(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq, int irq)
+{
+       int reg;
+       u8 x;
+
+       reg = pirq;
+       if (reg >= 1 && reg <= 4)
+               reg += PCI_SIS497_INTA_TO_IRQ_LINK - 1;
+
+       pci_read_config_byte(router, reg, &x);
+       x &= ~(PIRQ_SIS497_IRQ_MASK | PIRQ_SIS497_IRQ_ENABLE);
+       x |= irq ? (PIRQ_SIS497_IRQ_ENABLE | irq) : PIRQ_SIS497_IRQ_MASK;
+       pci_write_config_byte(router, reg, x);
+       return 1;
+}
+
 /*
  *     PIRQ routing for SiS 85C503 router used in several SiS chipsets.
  *     We have to deal with the following issues here:
@@ -640,11 +812,12 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
  *                             bit 6-4 are probably unused, not like 5595
  */
 
-#define PIRQ_SIS_IRQ_MASK      0x0f
-#define PIRQ_SIS_IRQ_DISABLE   0x80
-#define PIRQ_SIS_USB_ENABLE    0x40
+#define PIRQ_SIS503_IRQ_MASK   0x0f
+#define PIRQ_SIS503_IRQ_DISABLE        0x80
+#define PIRQ_SIS503_USB_ENABLE 0x40
 
-static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+static int pirq_sis503_get(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq)
 {
        u8 x;
        int reg;
@@ -653,10 +826,11 @@ static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
        if (reg >= 0x01 && reg <= 0x04)
                reg += 0x40;
        pci_read_config_byte(router, reg, &x);
-       return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+       return (x & PIRQ_SIS503_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS503_IRQ_MASK);
 }
 
-static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+static int pirq_sis503_set(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq, int irq)
 {
        u8 x;
        int reg;
@@ -665,8 +839,8 @@ static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
        if (reg >= 0x01 && reg <= 0x04)
                reg += 0x40;
        pci_read_config_byte(router, reg, &x);
-       x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE);
-       x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE;
+       x &= ~(PIRQ_SIS503_IRQ_MASK | PIRQ_SIS503_IRQ_DISABLE);
+       x |= irq ? irq : PIRQ_SIS503_IRQ_DISABLE;
        pci_write_config_byte(router, reg, x);
        return 1;
 }
@@ -958,13 +1132,19 @@ static __init int serverworks_router_probe(struct irq_router *r,
 
 static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
 {
-       if (device != PCI_DEVICE_ID_SI_503)
-               return 0;
-
-       r->name = "SIS";
-       r->get = pirq_sis_get;
-       r->set = pirq_sis_set;
-       return 1;
+       switch (device) {
+       case PCI_DEVICE_ID_SI_496:
+               r->name = "SiS85C497";
+               r->get = pirq_sis497_get;
+               r->set = pirq_sis497_set;
+               return 1;
+       case PCI_DEVICE_ID_SI_503:
+               r->name = "SiS85C503";
+               r->get = pirq_sis503_get;
+               r->set = pirq_sis503_set;
+               return 1;
+       }
+       return 0;
 }
 
 static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
@@ -1084,10 +1264,32 @@ static struct pci_dev *pirq_router_dev;
  *     chipset" ?
  */
 
+static bool __init pirq_try_router(struct irq_router *r,
+                                  struct irq_routing_table *rt,
+                                  struct pci_dev *dev)
+{
+       struct irq_router_handler *h;
+
+       DBG(KERN_DEBUG "PCI: Trying IRQ router for [%04x:%04x]\n",
+           dev->vendor, dev->device);
+
+       for (h = pirq_routers; h->vendor; h++) {
+               /* First look for a router match */
+               if (rt->rtr_vendor == h->vendor &&
+                   h->probe(r, dev, rt->rtr_device))
+                       return true;
+               /* Fall back to a device match */
+               if (dev->vendor == h->vendor &&
+                   h->probe(r, dev, dev->device))
+                       return true;
+       }
+       return false;
+}
+
 static void __init pirq_find_router(struct irq_router *r)
 {
        struct irq_routing_table *rt = pirq_table;
-       struct irq_router_handler *h;
+       struct pci_dev *dev;
 
 #ifdef CONFIG_PCI_BIOS
        if (!rt->signature) {
@@ -1106,50 +1308,94 @@ static void __init pirq_find_router(struct irq_router *r)
        DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
            rt->rtr_vendor, rt->rtr_device);
 
-       pirq_router_dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
-                                                     rt->rtr_devfn);
-       if (!pirq_router_dev) {
-               DBG(KERN_DEBUG "PCI: Interrupt router not found at "
-                       "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
-               return;
+       /* Use any vendor:device provided by the routing table or try all.  */
+       if (rt->rtr_vendor) {
+               dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
+                                                 rt->rtr_devfn);
+               if (dev && pirq_try_router(r, rt, dev))
+                       pirq_router_dev = dev;
+       } else {
+               dev = NULL;
+               for_each_pci_dev(dev) {
+                       if (pirq_try_router(r, rt, dev)) {
+                               pirq_router_dev = dev;
+                               break;
+                       }
+               }
        }
 
-       for (h = pirq_routers; h->vendor; h++) {
-               /* First look for a router match */
-               if (rt->rtr_vendor == h->vendor &&
-                       h->probe(r, pirq_router_dev, rt->rtr_device))
-                       break;
-               /* Fall back to a device match */
-               if (pirq_router_dev->vendor == h->vendor &&
-                       h->probe(r, pirq_router_dev, pirq_router_dev->device))
-                       break;
-       }
-       dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
-                pirq_router.name,
-                pirq_router_dev->vendor, pirq_router_dev->device);
+       if (pirq_router_dev)
+               dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
+                        pirq_router.name,
+                        pirq_router_dev->vendor, pirq_router_dev->device);
+       else
+               DBG(KERN_DEBUG "PCI: Interrupt router not found at "
+                   "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
 
        /* The device remains referenced for the kernel lifetime */
 }
 
-static struct irq_info *pirq_get_info(struct pci_dev *dev)
+/*
+ * We're supposed to match on the PCI device only and not the function,
+ * but some BIOSes build their tables with the PCI function included
+ * for motherboard devices, so if a complete match is found, then give
+ * it precedence over a slot match.
+ */
+static struct irq_info *pirq_get_dev_info(struct pci_dev *dev)
 {
        struct irq_routing_table *rt = pirq_table;
        int entries = (rt->size - sizeof(struct irq_routing_table)) /
                sizeof(struct irq_info);
+       struct irq_info *slotinfo = NULL;
        struct irq_info *info;
 
        for (info = rt->slots; entries--; info++)
-               if (info->bus == dev->bus->number &&
-                       PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
-                       return info;
-       return NULL;
+               if (info->bus == dev->bus->number) {
+                       if (info->devfn == dev->devfn)
+                               return info;
+                       if (!slotinfo &&
+                           PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+                               slotinfo = info;
+               }
+       return slotinfo;
+}
+
+/*
+ * Buses behind bridges are typically not listed in the PIRQ routing table.
+ * Do the usual dance then and walk the tree of bridges up adjusting the
+ * pin number accordingly on the way until the originating root bus device
+ * has been reached and then use its routing information.
+ */
+static struct irq_info *pirq_get_info(struct pci_dev *dev, u8 *pin)
+{
+       struct pci_dev *temp_dev = dev;
+       struct irq_info *info;
+       u8 temp_pin = *pin;
+       u8 dpin = temp_pin;
+
+       info = pirq_get_dev_info(dev);
+       while (!info && temp_dev->bus->parent) {
+               struct pci_dev *bridge = temp_dev->bus->self;
+
+               temp_pin = pci_swizzle_interrupt_pin(temp_dev, temp_pin);
+               info = pirq_get_dev_info(bridge);
+               if (info)
+                       dev_warn(&dev->dev,
+                                "using bridge %s INT %c to get INT %c\n",
+                                pci_name(bridge),
+                                'A' + temp_pin - 1, 'A' + dpin - 1);
+
+               temp_dev = bridge;
+       }
+       *pin = temp_pin;
+       return info;
 }
 
 static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 {
-       u8 pin;
        struct irq_info *info;
        int i, pirq, newirq;
+       u8 dpin, pin;
        int irq = 0;
        u32 mask;
        struct irq_router *r = &pirq_router;
@@ -1157,8 +1403,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
        char *msg = NULL;
 
        /* Find IRQ pin */
-       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-       if (!pin) {
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &dpin);
+       if (!dpin) {
                dev_dbg(&dev->dev, "no interrupt pin\n");
                return 0;
        }
@@ -1171,20 +1417,21 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
        if (!pirq_table)
                return 0;
 
-       info = pirq_get_info(dev);
+       pin = dpin;
+       info = pirq_get_info(dev, &pin);
        if (!info) {
                dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
-                       'A' + pin - 1);
+                       'A' + dpin - 1);
                return 0;
        }
        pirq = info->irq[pin - 1].link;
        mask = info->irq[pin - 1].bitmap;
        if (!pirq) {
-               dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1);
+               dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + dpin - 1);
                return 0;
        }
        dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
-               'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs);
+               'A' + dpin - 1, pirq, mask, pirq_table->exclusive_irqs);
        mask &= pcibios_irq_mask;
 
        /* Work around broken HP Pavilion Notebooks which assign USB to
@@ -1226,7 +1473,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                                newirq = i;
                }
        }
-       dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq);
+       dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + dpin - 1, newirq);
 
        /* Check if it is hardcoded */
        if ((pirq & 0xf0) == 0xf0) {
@@ -1260,15 +1507,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                        return 0;
                }
        }
-       dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);
+       dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n",
+                msg, 'A' + dpin - 1, irq);
 
        /* Update IRQ for all devices with the same pirq value */
        for_each_pci_dev(dev2) {
-               pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
-               if (!pin)
+               pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &dpin);
+               if (!dpin)
                        continue;
 
-               info = pirq_get_info(dev2);
+               pin = dpin;
+               info = pirq_get_info(dev2, &pin);
                if (!info)
                        continue;
                if (info->irq[pin - 1].link == pirq) {
index 147c30a81f15b8fc0fbdb61f632ecf42f36f04ce..1591d67e0bcde3e1b6a97789e1f05ed0b6ea2a5b 100644 (file)
@@ -93,6 +93,9 @@ static const unsigned long * const efi_tables[] = {
 #ifdef CONFIG_LOAD_UEFI_KEYS
        &efi.mokvar_table,
 #endif
+#ifdef CONFIG_EFI_COCO_SECRET
+       &efi.coco_secret,
+#endif
 };
 
 u64 efi_setup;         /* efi setup_data physical address */
index 1e9ff28bc2e04c270ab594fa82f11dbc3590bc0d..a60af0230e27b32526ade6f5b3906e81d19327cd 100644 (file)
@@ -244,8 +244,10 @@ static inline bool uv_nmi_action_is(const char *action)
 /* Setup which NMI support is present in system */
 static void uv_nmi_setup_mmrs(void)
 {
+       bool new_nmi_method_only = false;
+
        /* First determine arch specific MMRs to handshake with BIOS */
-       if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {
+       if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {      /* UV2,3,4 setup */
                uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
                uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
                uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
@@ -255,26 +257,25 @@ static void uv_nmi_setup_mmrs(void)
                uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
                uvh_nmi_mmrx_req_shift = 62;
 
-       } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) {
+       } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */
                uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
                uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
                uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
                uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
 
-               uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
-               uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
-               uvh_nmi_mmrx_req_shift = 62;
+               new_nmi_method_only = true;             /* Newer nmi always valid on UV5+ */
+               uvh_nmi_mmrx_req = 0;                   /* no request bit to clear */
 
        } else {
-               pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n",
-                       __func__);
+               pr_err("UV:%s:NMI support not available on this system\n", __func__);
                return;
        }
 
        /* Then find out if new NMI is supported */
-       if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) {
-               uv_write_local_mmr(uvh_nmi_mmrx_req,
-                                       1UL << uvh_nmi_mmrx_req_shift);
+       if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) {
+               if (uvh_nmi_mmrx_req)
+                       uv_write_local_mmr(uvh_nmi_mmrx_req,
+                                               1UL << uvh_nmi_mmrx_req_shift);
                nmi_mmr = uvh_nmi_mmrx;
                nmi_mmr_clear = uvh_nmi_mmrx_clear;
                nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
@@ -985,7 +986,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 
        /* Clear global flags */
        if (master) {
-               if (cpumask_weight(uv_nmi_cpu_mask))
+               if (!cpumask_empty(uv_nmi_cpu_mask))
                        uv_nmi_cleanup_mask();
                atomic_set(&uv_nmi_cpus_in_nmi, -1);
                atomic_set(&uv_nmi_cpu, -1);
index c5e29db02a4693f007559dc70aacae585c8297f9..41d7669a97ad167f50485fab3ac1ebafe255ddfc 100644 (file)
@@ -67,7 +67,7 @@ void __init reserve_real_mode(void)
        memblock_reserve(0, SZ_1M);
 }
 
-static void sme_sev_setup_real_mode(struct trampoline_header *th)
+static void __init sme_sev_setup_real_mode(struct trampoline_header *th)
 {
 #ifdef CONFIG_AMD_MEM_ENCRYPT
        if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
index 8c1db5bf5d78a9aa7cc4b4bef35f2e31b6998998..2eb62be6d2562bf4044ffb8e1f444b8892aed041 100644 (file)
@@ -24,6 +24,7 @@ SYM_DATA_START(real_mode_header)
        .long   pa_sev_es_trampoline_start
 #endif
 #ifdef CONFIG_X86_64
+       .long   pa_trampoline_start64
        .long   pa_trampoline_pgd;
 #endif
        /* ACPI S3 wakeup */
index cc8391f86cdb61469b2b775a64490e470b2ef534..e38d61d6562e4611c9150b935b018bee46717fe5 100644 (file)
@@ -70,7 +70,7 @@ SYM_CODE_START(trampoline_start)
        movw    $__KERNEL_DS, %dx       # Data segment descriptor
 
        # Enable protected mode
-       movl    $X86_CR0_PE, %eax       # protected mode (PE) bit
+       movl    $(CR0_STATE & ~X86_CR0_PG), %eax
        movl    %eax, %cr0              # into protected mode
 
        # flush prefetch and jump to startup_32
@@ -143,13 +143,24 @@ SYM_CODE_START(startup_32)
        movl    %eax, %cr3
 
        # Set up EFER
+       movl    $MSR_EFER, %ecx
+       rdmsr
+       /*
+        * Skip writing to EFER if the register already has desired
+        * value (to avoid #VE for the TDX guest).
+        */
+       cmp     pa_tr_efer, %eax
+       jne     .Lwrite_efer
+       cmp     pa_tr_efer + 4, %edx
+       je      .Ldone_efer
+.Lwrite_efer:
        movl    pa_tr_efer, %eax
        movl    pa_tr_efer + 4, %edx
-       movl    $MSR_EFER, %ecx
        wrmsr
 
-       # Enable paging and in turn activate Long Mode
-       movl    $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax
+.Ldone_efer:
+       # Enable paging and in turn activate Long Mode.
+       movl    $CR0_STATE, %eax
        movl    %eax, %cr0
 
        /*
@@ -161,6 +172,19 @@ SYM_CODE_START(startup_32)
        ljmpl   $__KERNEL_CS, $pa_startup_64
 SYM_CODE_END(startup_32)
 
+SYM_CODE_START(pa_trampoline_compat)
+       /*
+        * In compatibility mode.  Prep ESP and DX for startup_32, then disable
+        * paging and complete the switch to legacy 32-bit mode.
+        */
+       movl    $rm_stack_end, %esp
+       movw    $__KERNEL_DS, %dx
+
+       movl    $(CR0_STATE & ~X86_CR0_PG), %eax
+       movl    %eax, %cr0
+       ljmpl   $__KERNEL32_CS, $pa_startup_32
+SYM_CODE_END(pa_trampoline_compat)
+
        .section ".text64","ax"
        .code64
        .balign 4
@@ -169,6 +193,20 @@ SYM_CODE_START(startup_64)
        jmpq    *tr_start(%rip)
 SYM_CODE_END(startup_64)
 
+SYM_CODE_START(trampoline_start64)
+       /*
+        * APs start here on a direct transfer from 64-bit BIOS with identity
+        * mapped page tables.  Load the kernel's GDT in order to gear down to
+        * 32-bit mode (to handle 4-level vs. 5-level paging), and to (re)load
+        * segment registers.  Load the zero IDT so any fault triggers a
+        * shutdown instead of jumping back into BIOS.
+        */
+       lidt    tr_idt(%rip)
+       lgdt    tr_gdt64(%rip)
+
+       ljmpl   *tr_compat(%rip)
+SYM_CODE_END(trampoline_start64)
+
        .section ".rodata","a"
        # Duplicate the global descriptor table
        # so the kernel can live anywhere
@@ -182,6 +220,17 @@ SYM_DATA_START(tr_gdt)
        .quad   0x00cf93000000ffff      # __KERNEL_DS
 SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)
 
+SYM_DATA_START(tr_gdt64)
+       .short  tr_gdt_end - tr_gdt - 1 # gdt limit
+       .long   pa_tr_gdt
+       .long   0
+SYM_DATA_END(tr_gdt64)
+
+SYM_DATA_START(tr_compat)
+       .long   pa_trampoline_compat
+       .short  __KERNEL32_CS
+SYM_DATA_END(tr_compat)
+
        .bss
        .balign PAGE_SIZE
 SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
index 5033e640f957edf7f690db623da0397cd81c906f..4331c32c47f84438b932a41ac07bf0dc4050213a 100644 (file)
@@ -1,4 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
        .section ".rodata","a"
        .balign 16
-SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0)
+
+/*
+ * When a bootloader hands off to the kernel in 32-bit mode an
+ * IDT with a 2-byte limit and 4-byte base is needed. When a boot
+ * loader hands off to a kernel 64-bit mode the base address
+ * extends to 8-bytes. Reserve enough space for either scenario.
+ */
+SYM_DATA_START_LOCAL(tr_idt)
+       .short  0
+       .quad   0
+SYM_DATA_END(tr_idt)
index 1d6437e6d2ba3bcf5701a8c49fb117dc36c3e8a2..a6f4d8388ad888fbd10996c548efeab2d195e599 100644 (file)
@@ -62,8 +62,12 @@ static void send_morse(const char *pattern)
        }
 }
 
+struct port_io_ops pio_ops;
+
 void main(void)
 {
+       init_default_io_ops();
+
        /* Kill machine if structures are wrong */
        if (wakeup_header.real_magic != 0x12345678)
                while (1)
diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
new file mode 100644 (file)
index 0000000..49a5435
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/asm-offsets.h>
+#include <asm/tdx.h>
+
+/*
+ * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
+ */
+#define tdcall         .byte 0x66,0x0f,0x01,0xcc
+#define seamcall       .byte 0x66,0x0f,0x01,0xcf
+
+/*
+ * TDX_MODULE_CALL - common helper macro for both
+ *                 TDCALL and SEAMCALL instructions.
+ *
+ * TDCALL   - used by TDX guests to make requests to the
+ *            TDX module and hypercalls to the VMM.
+ * SEAMCALL - used by TDX hosts to make requests to the
+ *            TDX module.
+ */
+.macro TDX_MODULE_CALL host:req
+       /*
+        * R12 will be used as temporary storage for struct tdx_module_output
+        * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL
+        * services supported by this function, it can be reused.
+        */
+
+       /* Callee saved, so preserve it */
+       push %r12
+
+       /*
+        * Push output pointer to stack.
+        * After the operation, it will be fetched into R12 register.
+        */
+       push %r9
+
+       /* Mangle function call ABI into TDCALL/SEAMCALL ABI: */
+       /* Move Leaf ID to RAX */
+       mov %rdi, %rax
+       /* Move input 4 to R9 */
+       mov %r8,  %r9
+       /* Move input 3 to R8 */
+       mov %rcx, %r8
+       /* Move input 1 to RCX */
+       mov %rsi, %rcx
+       /* Leave input param 2 in RDX */
+
+       .if \host
+       seamcall
+       /*
+        * SEAMCALL instruction is essentially a VMExit from VMX root
+        * mode to SEAM VMX root mode.  VMfailInvalid (CF=1) indicates
+        * that the targeted SEAM firmware is not loaded or disabled,
+        * or P-SEAMLDR is busy with another SEAMCALL.  %rax is not
+        * changed in this case.
+        *
+        * Set %rax to TDX_SEAMCALL_VMFAILINVALID for VMfailInvalid.
+        * This value will never be used as actual SEAMCALL error code as
+        * it is from the Reserved status code class.
+        */
+       jnc .Lno_vmfailinvalid
+       mov $TDX_SEAMCALL_VMFAILINVALID, %rax
+.Lno_vmfailinvalid:
+
+       .else
+       tdcall
+       .endif
+
+       /*
+        * Fetch output pointer from stack to R12 (It is used
+        * as temporary storage)
+        */
+       pop %r12
+
+       /*
+        * Since this macro can be invoked with NULL as an output pointer,
+        * check if caller provided an output struct before storing output
+        * registers.
+        *
+        * Update output registers, even if the call failed (RAX != 0).
+        * Other registers may contain details of the failure.
+        */
+       test %r12, %r12
+       jz .Lno_output_struct
+
+       /* Copy result registers to output struct: */
+       movq %rcx, TDX_MODULE_rcx(%r12)
+       movq %rdx, TDX_MODULE_rdx(%r12)
+       movq %r8,  TDX_MODULE_r8(%r12)
+       movq %r9,  TDX_MODULE_r9(%r12)
+       movq %r10, TDX_MODULE_r10(%r12)
+       movq %r11, TDX_MODULE_r11(%r12)
+
+.Lno_output_struct:
+       /* Restore the state of R12 register */
+       pop %r12
+.endm
index 5038edb79ad518750a6f665b61ec0d2a28d6e21b..ca85d1409917aeb62b63f215e88c8ddbaf318c46 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/pci.h>
 #include <linux/gfp.h>
 #include <linux/edd.h>
-#include <linux/objtool.h>
 
 #include <xen/xen.h>
 #include <xen/events.h>
@@ -165,7 +164,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 
        *bx &= maskebx;
 }
-STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */
 
 static bool __init xen_check_mwait(void)
 {
index 688aa8b6ae29a2f57075996ad05788d084b49466..ba7af2eca755b7f08e4fe9a4c31f350defe80c2d 100644 (file)
@@ -260,8 +260,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
                return 0;
 
        ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
-       if (ctxt == NULL)
+       if (ctxt == NULL) {
+               cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
+               cpumask_clear_cpu(cpu, cpu_callout_mask);
                return -ENOMEM;
+       }
 
        gdt = get_cpu_gdt_rw(cpu);
 
index bd113bc6e1925f6fdbe4e49c3350d24f3a4d6e99..0b0f0172cced55565d35c3b2201a9be1776e1e47 100644 (file)
@@ -4,6 +4,7 @@ config XTENSA
        select ARCH_32BIT_OFF_T
        select ARCH_HAS_BINFMT_FLAT if !MMU
        select ARCH_HAS_CURRENT_STACK_POINTER
+       select ARCH_HAS_DEBUG_VM_PGTABLE
        select ARCH_HAS_DMA_PREP_COHERENT if MMU
        select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU
@@ -29,8 +30,10 @@ config XTENSA
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
        select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
+       select HAVE_ARCH_KCSAN
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
+       select HAVE_CONTEXT_TRACKING
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS
        select HAVE_EXIT_THREAD
@@ -42,6 +45,7 @@ config XTENSA
        select HAVE_PERF_EVENTS
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
+       select HAVE_VIRT_CPU_ACCOUNTING_GEN
        select IRQ_DOMAIN
        select MODULES_USE_ELF_RELA
        select PERF_USE_VMALLOC
@@ -79,6 +83,7 @@ config STACKTRACE_SUPPORT
 
 config MMU
        def_bool n
+       select PFAULT
 
 config HAVE_XTENSA_GPIO32
        def_bool n
@@ -178,6 +183,16 @@ config XTENSA_FAKE_NMI
 
          If unsure, say N.
 
+config PFAULT
+       bool "Handle protection faults" if EXPERT && !MMU
+       default y
+       help
+         Handle protection faults. MMU configurations must enable it.
+         noMMU configurations may disable it if used memory map never
+         generates protection faults or faults are always fatal.
+
+         If unsure, say Y.
+
 config XTENSA_UNALIGNED_USER
        bool "Unaligned memory access in user space"
        help
@@ -773,6 +788,9 @@ endmenu
 
 menu "Power management options"
 
+config ARCH_HIBERNATION_POSSIBLE
+       def_bool y
+
 source "kernel/power/Kconfig"
 
 endmenu
index e3d717c7bfa1f6f5a0d4c0725aef0b8749a55dcd..162d10af36f33cf6a963b5ebb3a915a2ea5480f1 100644 (file)
@@ -16,6 +16,7 @@ CFLAGS_REMOVE_inffast.o = -pg
 endif
 
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 CFLAGS_REMOVE_inflate.o += -fstack-protector -fstack-protector-strong
 CFLAGS_REMOVE_zmem.o += -fstack-protector -fstack-protector-strong
index d6f8d4ddc2bca04e850ba75b10677cf8611d6619..898ea397e9bc989673ebd0e99761cc30a8b103bb 100644 (file)
 
 #include <asm/core.h>
 
-#define mb()  ({ __asm__ __volatile__("memw" : : : "memory"); })
-#define rmb() barrier()
-#define wmb() mb()
+#define __mb()  ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define __rmb() barrier()
+#define __wmb() __mb()
+
+#ifdef CONFIG_SMP
+#define __smp_mb() __mb()
+#define __smp_rmb() __rmb()
+#define __smp_wmb() __wmb()
+#endif
 
 #if XCHAL_HAVE_S32C1I
 #define __smp_mb__before_atomic()              barrier()
index cd225896c40f4bfa5e675ee439d27a95b0230a22..e02ec5833389479c1c3054f51a3fe90a2f62437b 100644 (file)
@@ -99,7 +99,7 @@ static inline unsigned long __fls(unsigned long word)
 #if XCHAL_HAVE_EXCLUSIVE
 
 #define BIT_OP(op, insn, inv)                                          \
-static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
+static inline void arch_##op##_bit(unsigned int bit, volatile unsigned long *p)\
 {                                                                      \
        unsigned long tmp;                                              \
        unsigned long mask = 1UL << (bit & 31);                         \
@@ -119,7 +119,7 @@ static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
 
 #define TEST_AND_BIT_OP(op, insn, inv)                                 \
 static inline int                                                      \
-test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)       \
+arch_test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)  \
 {                                                                      \
        unsigned long tmp, value;                                       \
        unsigned long mask = 1UL << (bit & 31);                         \
@@ -142,7 +142,7 @@ test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)    \
 #elif XCHAL_HAVE_S32C1I
 
 #define BIT_OP(op, insn, inv)                                          \
-static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
+static inline void arch_##op##_bit(unsigned int bit, volatile unsigned long *p)\
 {                                                                      \
        unsigned long tmp, value;                                       \
        unsigned long mask = 1UL << (bit & 31);                         \
@@ -163,7 +163,7 @@ static inline void op##_bit(unsigned int bit, volatile unsigned long *p)\
 
 #define TEST_AND_BIT_OP(op, insn, inv)                                 \
 static inline int                                                      \
-test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)       \
+arch_test_and_##op##_bit(unsigned int bit, volatile unsigned long *p)  \
 {                                                                      \
        unsigned long tmp, value;                                       \
        unsigned long mask = 1UL << (bit & 31);                         \
@@ -205,6 +205,8 @@ BIT_OPS(change, "xor", )
 #undef BIT_OP
 #undef TEST_AND_BIT_OP
 
+#include <asm-generic/bitops/instrumented-atomic.h>
+
 #include <asm-generic/bitops/le.h>
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
index 0fbe2a740b8d6ff494e8f8dbe896f0f0a67e85b0..3b1a0d5d2169dccc8c22b4baa0958f1fb85ca6db 100644 (file)
@@ -142,11 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
 typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
        __attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
 
-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
-extern void coprocessor_flush(struct thread_info*, int);
-
-extern void coprocessor_release_all(struct thread_info*);
-extern void coprocessor_flush_all(struct thread_info*);
+struct thread_info;
+void coprocessor_flush(struct thread_info *ti, int cp_index);
+void coprocessor_release_all(struct thread_info *ti);
+void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);
+void local_coprocessors_flush_release_all(void);
 
 #endif /* XTENSA_HAVE_COPROCESSORS */
 
index 4489a27d527a801314e791fcd041c07b6f1fe1ac..76bc63127c66ef02403d4b7d5c67bd28a0d10456 100644 (file)
@@ -246,6 +246,13 @@ extern unsigned long __get_wchan(struct task_struct *p);
         v; \
         })
 
+#define xtensa_xsr(x, sr) \
+       ({ \
+        unsigned int __v__ = (unsigned int)(x); \
+        __asm__ __volatile__ ("xsr %0, " __stringify(sr) : "+a"(__v__)); \
+        __v__; \
+        })
+
 #if XCHAL_HAVE_EXTERN_REGS
 
 static inline void set_er(unsigned long value, unsigned long addr)
index a8c42d08e281cd075b192c926ac545fe39a151de..3bc6b9afa99391ddde4d2c5ccf06b468b02a7eed 100644 (file)
@@ -29,7 +29,7 @@ extern char _Level5InterruptVector_text_end[];
 extern char _Level6InterruptVector_text_start[];
 extern char _Level6InterruptVector_text_end[];
 #endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SECONDARY_RESET_VECTOR
 extern char _SecondaryResetVector_text_start[];
 extern char _SecondaryResetVector_text_end[];
 #endif
index f6fcbba1d02fcd3e725d169fa1fc8391cab7f67c..326db1c1d5d8d4c44bacc1f27a4cc39ee4fe4742 100644 (file)
@@ -52,12 +52,21 @@ struct thread_info {
        __u32                   cpu;            /* current CPU */
        __s32                   preempt_count;  /* 0 => preemptable,< 0 => BUG*/
 
-       unsigned long           cpenable;
 #if XCHAL_HAVE_EXCLUSIVE
        /* result of the most recent exclusive store */
        unsigned long           atomctl8;
 #endif
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+       /* Address where PS.WOE was enabled by the ABI probing code */
+       unsigned long           ps_woe_fix_addr;
+#endif
 
+       /*
+        * If i-th bit is set then coprocessor state is loaded into the
+        * coprocessor i on CPU cp_owner_cpu.
+        */
+       unsigned long           cpenable;
+       u32                     cp_owner_cpu;
        /* Allocate storage for extra user states and coprocessor states. */
 #if XTENSA_HAVE_COPROCESSORS
        xtregs_coprocessor_t    xtregs_cp;
index 233ec75e60c696814219e7d37eeb5e585b0f8259..3f2462f2d0270c5c41f3517562e5e14081837772 100644 (file)
 
 extern unsigned long ccount_freq;
 
-typedef unsigned long long cycles_t;
-
-#define get_cycles()   (0)
-
 void local_timer_setup(unsigned cpu);
 
 /*
@@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare)
        xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER);
 }
 
+#include <asm-generic/timex.h>
+
 #endif /* _XTENSA_TIMEX_H */
index 6fa47cd8e02d44ef88b93b235edf094dec0971f4..6f74ccc0c7eadc6a184132f643ea8f380ae29742 100644 (file)
@@ -12,6 +12,8 @@
 
 #include <asm/ptrace.h>
 
+typedef void xtensa_exception_handler(struct pt_regs *regs);
+
 /*
  * Per-CPU exception handling data structure.
  * EXCSAVE1 points to it.
@@ -25,31 +27,47 @@ struct exc_table {
        void *fixup;
        /* For passing a parameter to fixup */
        void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+       /* Pointers to owner struct thread_info */
+       struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
        /* Fast user exception handlers */
        void *fast_user_handler[EXCCAUSE_N];
        /* Fast kernel exception handlers */
        void *fast_kernel_handler[EXCCAUSE_N];
        /* Default C-Handlers */
-       void *default_handler[EXCCAUSE_N];
+       xtensa_exception_handler *default_handler[EXCCAUSE_N];
 };
 
-/*
- * handler must be either of the following:
- *  void (*)(struct pt_regs *regs);
- *  void (*)(struct pt_regs *regs, unsigned long exccause);
- */
-extern void * __init trap_set_handler(int cause, void *handler);
-extern void do_unhandled(struct pt_regs *regs, unsigned long exccause);
-void fast_second_level_miss(void);
+DECLARE_PER_CPU(struct exc_table, exc_table);
+
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler);
+
+asmlinkage void fast_illegal_instruction_user(void);
+asmlinkage void fast_syscall_user(void);
+asmlinkage void fast_alloca(void);
+asmlinkage void fast_unaligned(void);
+asmlinkage void fast_second_level_miss(void);
+asmlinkage void fast_store_prohibited(void);
+asmlinkage void fast_coprocessor(void);
+
+asmlinkage void kernel_exception(void);
+asmlinkage void user_exception(void);
+asmlinkage void system_call(struct pt_regs *regs);
+
+void do_IRQ(int hwirq, struct pt_regs *regs);
+void do_page_fault(struct pt_regs *regs);
+void do_unhandled(struct pt_regs *regs);
 
 /* Initialize minimal exc_table structure sufficient for basic paging */
 static inline void __init early_trap_init(void)
 {
-       static struct exc_table exc_table __initdata = {
+       static struct exc_table init_exc_table __initdata = {
                .fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
                        fast_second_level_miss,
        };
-       __asm__ __volatile__("wsr  %0, excsave1\n" : : "a" (&exc_table));
+       xtensa_set_sr(&init_exc_table, excsave1);
 }
 
 void secondary_trap_init(void);
index 5fd6cd15e0fb191c212b9027f4bd2d9cc16ed371..897c1c7410589826be38626288be77afa3dc41e9 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o
 
 # In the Xtensa architecture, assembly generates literals which must always
 # precede the L32R instruction with a relative offset less than 256 kB.
index 37278e2785fb0e0344e22fb0ecfdc6fbe6700f1d..da38de20ae598b1a76b1785bb11a7cce53737d73 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/ptrace.h>
 #include <linux/mm.h>
 #include <linux/kbuild.h>
+#include <linux/suspend.h>
 
 #include <asm/ptrace.h>
 #include <asm/traps.h>
@@ -87,14 +88,19 @@ int main(void)
        OFFSET(TI_STSTUS, thread_info, status);
        OFFSET(TI_CPU, thread_info, cpu);
        OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+       OFFSET(TI_PS_WOE_FIX_ADDR, thread_info, ps_woe_fix_addr);
+#endif
 
        /* struct thread_info (offset from start_struct) */
        DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
        DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
-       DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
 #if XCHAL_HAVE_EXCLUSIVE
        DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
 #endif
+       DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+       DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+       DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
 #if XTENSA_HAVE_COPROCESSORS
        DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
        DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,11 +143,22 @@ int main(void)
        DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
        DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
        DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+       DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+              offsetof(struct exc_table, coprocessor_owner));
+#endif
        DEFINE(EXC_TABLE_FAST_USER,
               offsetof(struct exc_table, fast_user_handler));
        DEFINE(EXC_TABLE_FAST_KERNEL,
               offsetof(struct exc_table, fast_kernel_handler));
        DEFINE(EXC_TABLE_DEFAULT, offsetof(struct exc_table, default_handler));
 
+#ifdef CONFIG_HIBERNATION
+       DEFINE(PBE_ADDRESS, offsetof(struct pbe, address));
+       DEFINE(PBE_ORIG_ADDRESS, offsetof(struct pbe, orig_address));
+       DEFINE(PBE_NEXT, offsetof(struct pbe, next));
+       DEFINE(PBE_SIZE, sizeof(struct pbe));
+#endif
+
        return 0;
 }
index c7b9f12896f20a6870588cd50e9849e11c992dcf..ef33e76e07d835bc527b24f33fabfd5fe4abb03c 100644 (file)
 #include <asm/current.h>
 #include <asm/regs.h>
 
+/*
+ * Rules for coprocessor state manipulation on SMP:
+ *
+ * - a task may have live coprocessors only on one CPU.
+ *
+ * - whether coprocessor context of task T is live on some CPU is
+ *   denoted by T's thread_info->cpenable.
+ *
+ * - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
+ *   is valid in the T's thread_info. Zero thread_info->cpenable means that
+ *   coprocessor context is valid in the T's thread_info.
+ *
+ * - if a coprocessor context of task T is live on CPU X, only CPU X changes
+ *   T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
+ *   This is done by making sure that for the task T with live coprocessor
+ *   on CPU X cpenable SR is 0 when T runs on any other CPU Y.
+ *   When fast_coprocessor exception is taken on CPU Y it goes to the
+ *   C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
+ */
+
 #if XTENSA_HAVE_COPROCESSORS
 
 /*
                .align 4;                                               \
        .Lsave_cp_regs_cp##x:                                           \
                xchal_cp##x##_store a2 a3 a4 a5 a6;                     \
-               jx      a0;                                             \
+               ret;                                                    \
        .endif
 
-#define SAVE_CP_REGS_TAB(x)                                            \
-       .if XTENSA_HAVE_COPROCESSOR(x);                                 \
-               .long .Lsave_cp_regs_cp##x;                             \
-       .else;                                                          \
-               .long 0;                                                \
-       .endif;                                                         \
-       .long THREAD_XTREGS_CP##x
-
-
 #define LOAD_CP_REGS(x)                                                        \
        .if XTENSA_HAVE_COPROCESSOR(x);                                 \
                .align 4;                                               \
        .Lload_cp_regs_cp##x:                                           \
                xchal_cp##x##_load a2 a3 a4 a5 a6;                      \
-               jx      a0;                                             \
+               ret;                                                    \
        .endif
 
-#define LOAD_CP_REGS_TAB(x)                                            \
+#define CP_REGS_TAB(x)                                                 \
        .if XTENSA_HAVE_COPROCESSOR(x);                                 \
+               .long .Lsave_cp_regs_cp##x;                             \
                .long .Lload_cp_regs_cp##x;                             \
        .else;                                                          \
-               .long 0;                                                \
+               .long 0, 0;                                             \
        .endif;                                                         \
        .long THREAD_XTREGS_CP##x
 
+#define CP_REGS_TAB_SAVE 0
+#define CP_REGS_TAB_LOAD 4
+#define CP_REGS_TAB_OFFSET 8
+
        __XTENSA_HANDLER
 
        SAVE_CP_REGS(0)
        LOAD_CP_REGS(7)
 
        .align 4
-.Lsave_cp_regs_jump_table:
-       SAVE_CP_REGS_TAB(0)
-       SAVE_CP_REGS_TAB(1)
-       SAVE_CP_REGS_TAB(2)
-       SAVE_CP_REGS_TAB(3)
-       SAVE_CP_REGS_TAB(4)
-       SAVE_CP_REGS_TAB(5)
-       SAVE_CP_REGS_TAB(6)
-       SAVE_CP_REGS_TAB(7)
-
-.Lload_cp_regs_jump_table:
-       LOAD_CP_REGS_TAB(0)
-       LOAD_CP_REGS_TAB(1)
-       LOAD_CP_REGS_TAB(2)
-       LOAD_CP_REGS_TAB(3)
-       LOAD_CP_REGS_TAB(4)
-       LOAD_CP_REGS_TAB(5)
-       LOAD_CP_REGS_TAB(6)
-       LOAD_CP_REGS_TAB(7)
+.Lcp_regs_jump_table:
+       CP_REGS_TAB(0)
+       CP_REGS_TAB(1)
+       CP_REGS_TAB(2)
+       CP_REGS_TAB(3)
+       CP_REGS_TAB(4)
+       CP_REGS_TAB(5)
+       CP_REGS_TAB(6)
+       CP_REGS_TAB(7)
 
 /*
  * Entry condition:
 
 ENTRY(fast_coprocessor)
 
+       s32i    a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+       /*
+        * Check if any coprocessor context is live on another CPU
+        * and if so go through the C-level coprocessor exception handler
+        * to flush it to memory.
+        */
+       GET_THREAD_INFO (a0, a2)
+       l32i    a3, a0, THREAD_CPENABLE
+       beqz    a3, .Lload_local
+
+       /*
+        * Pairs with smp_wmb in local_coprocessor_release_all
+        * and with both memws below.
+        */
+       memw
+       l32i    a3, a0, THREAD_CPU
+       l32i    a0, a0, THREAD_CP_OWNER_CPU
+       beq     a0, a3, .Lload_local
+
+       rsr     a0, ps
+       l32i    a3, a2, PT_AREG3
+       bbci.l  a0, PS_UM_BIT, 1f
+       call0   user_exception
+1:     call0   kernel_exception
+#endif
+
        /* Save remaining registers a1-a3 and SAR */
 
-       s32i    a3, a2, PT_AREG3
+.Lload_local:
        rsr     a3, sar
        s32i    a1, a2, PT_AREG1
        s32i    a3, a2, PT_SAR
@@ -125,13 +159,15 @@ ENTRY(fast_coprocessor)
        rsr     a2, depc
        s32i    a2, a1, PT_AREG2
 
-       /*
-        * The hal macros require up to 4 temporary registers. We use a3..a6.
-        */
+       /* The hal macros require up to 4 temporary registers. We use a3..a6. */
 
        s32i    a4, a1, PT_AREG4
        s32i    a5, a1, PT_AREG5
        s32i    a6, a1, PT_AREG6
+       s32i    a7, a1, PT_AREG7
+       s32i    a8, a1, PT_AREG8
+       s32i    a9, a1, PT_AREG9
+       s32i    a10, a1, PT_AREG10
 
        /* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
 
@@ -148,58 +184,74 @@ ENTRY(fast_coprocessor)
        wsr     a0, cpenable
        rsync
 
-       /* Retrieve previous owner. (a3 still holds CP number) */
+       /* Get coprocessor save/load table entry (a7). */
 
-       movi    a0, coprocessor_owner   # list of owners
-       addx4   a0, a3, a0              # entry for CP
-       l32i    a4, a0, 0
+       movi    a7, .Lcp_regs_jump_table
+       addx8   a7, a3, a7
+       addx4   a7, a3, a7
 
-       beqz    a4, 1f                  # skip 'save' if no previous owner
+       /* Retrieve previous owner (a8). */
 
-       /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
+       rsr     a0, excsave1            # exc_table
+       addx4   a0, a3, a0              # entry for CP
+       l32i    a8, a0, EXC_TABLE_COPROCESSOR_OWNER
+
+       /* Set new owner (a9). */
 
-       l32i    a5, a4, THREAD_CPENABLE
-       xor     a5, a5, a2              # (1 << cp-id) still in a2
-       s32i    a5, a4, THREAD_CPENABLE
+       GET_THREAD_INFO (a9, a1)
+       l32i    a4, a9, THREAD_CPU
+       s32i    a9, a0, EXC_TABLE_COPROCESSOR_OWNER
+       s32i    a4, a9, THREAD_CP_OWNER_CPU
 
        /*
-        * Get context save area and 'call' save routine. 
-        * (a4 still holds previous owner (thread_info), a3 CP number)
+        * Enable coprocessor for the new owner. (a2 = 1 << CP number)
+        * This can be done before loading context into the coprocessor.
         */
+       l32i    a4, a9, THREAD_CPENABLE
+       or      a4, a4, a2
 
-       movi    a5, .Lsave_cp_regs_jump_table
-       movi    a0, 2f                  # a0: 'return' address
-       addx8   a3, a3, a5              # a3: coprocessor number
-       l32i    a2, a3, 4               # a2: xtregs offset
-       l32i    a3, a3, 0               # a3: jump address
-       add     a2, a2, a4
-       jx      a3
+       /*
+        * Make sure THREAD_CP_OWNER_CPU is in memory before updating
+        * THREAD_CPENABLE
+        */
+       memw                            # (2)
+       s32i    a4, a9, THREAD_CPENABLE
 
-       /* Note that only a0 and a1 were preserved. */
+       beqz    a8, 1f                  # skip 'save' if no previous owner
 
-2:     rsr     a3, exccause
-       addi    a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
-       movi    a0, coprocessor_owner
-       addx4   a0, a3, a0
+       /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
 
-       /* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
+       l32i    a10, a8, THREAD_CPENABLE
+       xor     a10, a10, a2
 
-1:     GET_THREAD_INFO (a4, a1)
-       s32i    a4, a0, 0
+       /* Get context save area and call save routine. */
 
-       /* Get context save area and 'call' load routine. */
+       l32i    a2, a7, CP_REGS_TAB_OFFSET
+       l32i    a3, a7, CP_REGS_TAB_SAVE
+       add     a2, a2, a8
+       callx0  a3
 
-       movi    a5, .Lload_cp_regs_jump_table
-       movi    a0, 1f
-       addx8   a3, a3, a5
-       l32i    a2, a3, 4               # a2: xtregs offset
-       l32i    a3, a3, 0               # a3: jump address
-       add     a2, a2, a4
-       jx      a3
+       /*
+        * Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
+        * before updating THREAD_CPENABLE
+        */
+       memw                            # (3)
+       s32i    a10, a8, THREAD_CPENABLE
+1:
+       /* Get context save area and call load routine. */
+
+       l32i    a2, a7, CP_REGS_TAB_OFFSET
+       l32i    a3, a7, CP_REGS_TAB_LOAD
+       add     a2, a2, a9
+       callx0  a3
 
        /* Restore all registers and return from exception handler. */
 
-1:     l32i    a6, a1, PT_AREG6
+       l32i    a10, a1, PT_AREG10
+       l32i    a9, a1, PT_AREG9
+       l32i    a8, a1, PT_AREG8
+       l32i    a7, a1, PT_AREG7
+       l32i    a6, a1, PT_AREG6
        l32i    a5, a1, PT_AREG5
        l32i    a4, a1, PT_AREG4
 
@@ -230,29 +282,21 @@ ENDPROC(fast_coprocessor)
 
 ENTRY(coprocessor_flush)
 
-       /* reserve 4 bytes on stack to save a0 */
-       abi_entry(4)
-
-       s32i    a0, a1, 0
-       movi    a0, .Lsave_cp_regs_jump_table
-       addx8   a3, a3, a0
-       l32i    a4, a3, 4
-       l32i    a3, a3, 0
-       add     a2, a2, a4
-       beqz    a3, 1f
-       callx0  a3
-1:     l32i    a0, a1, 0
-
-       abi_ret(4)
+       abi_entry_default
+
+       movi    a4, .Lcp_regs_jump_table
+       addx8   a4, a3, a4
+       addx4   a3, a3, a4
+       l32i    a4, a3, CP_REGS_TAB_SAVE
+       beqz    a4, 1f
+       l32i    a3, a3, CP_REGS_TAB_OFFSET
+       add     a2, a2, a3
+       mov     a7, a0
+       callx0  a4
+       mov     a0, a7
+1:
+       abi_ret_default
 
 ENDPROC(coprocessor_flush)
 
-       .data
-
-ENTRY(coprocessor_owner)
-
-       .fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
 #endif /* XTENSA_HAVE_COPROCESSORS */
index 6b6eff658795c82f179a98d642c5fd449c367920..e3eae648ba2e924f263f4b0feb4ca4ceb4ecbdff 100644 (file)
 #include <asm/tlbflush.h>
 #include <variant/tie-asm.h>
 
-/* Unimplemented features. */
-
-#undef KERNEL_STACK_OVERFLOW_CHECK
-
-/* Not well tested.
- *
- * - fast_coprocessor
- */
-
 /*
  * Macro to find first bit set in WINDOWBASE from the left + 1
  *
@@ -178,28 +169,26 @@ _user_exception:
 
        /* Save only live registers. */
 
-UABI_W _bbsi.l a2, 1, 1f
+UABI_W _bbsi.l a2, 1, .Lsave_window_registers
        s32i    a4, a1, PT_AREG4
        s32i    a5, a1, PT_AREG5
        s32i    a6, a1, PT_AREG6
        s32i    a7, a1, PT_AREG7
-UABI_W _bbsi.l a2, 2, 1f
+UABI_W _bbsi.l a2, 2, .Lsave_window_registers
        s32i    a8, a1, PT_AREG8
        s32i    a9, a1, PT_AREG9
        s32i    a10, a1, PT_AREG10
        s32i    a11, a1, PT_AREG11
-UABI_W _bbsi.l a2, 3, 1f
+UABI_W _bbsi.l a2, 3, .Lsave_window_registers
        s32i    a12, a1, PT_AREG12
        s32i    a13, a1, PT_AREG13
        s32i    a14, a1, PT_AREG14
        s32i    a15, a1, PT_AREG15
 
 #if defined(USER_SUPPORT_WINDOWED)
-       _bnei   a2, 1, 1f               # only one valid frame?
+       /* If only one valid frame skip saving regs. */
 
-       /* Only one valid frame, skip saving regs. */
-
-       j       2f
+       beqi    a2, 1, common_exception
 
        /* Save the remaining registers.
         * We have to save all registers up to the first '1' from
@@ -208,8 +197,8 @@ UABI_W      _bbsi.l a2, 3, 1f
         * All register frames starting from the top field to the marked '1'
         * must be saved.
         */
-
-1:     addi    a3, a2, -1              # eliminate '1' in bit 0: yyyyxxww0
+.Lsave_window_registers:
+       addi    a3, a2, -1              # eliminate '1' in bit 0: yyyyxxww0
        neg     a3, a3                  # yyyyxxww0 -> YYYYXXWW1+1
        and     a3, a3, a2              # max. only one bit is set
 
@@ -250,7 +239,7 @@ UABI_W      _bbsi.l a2, 3, 1f
 
        /* We are back to the original stack pointer (a1) */
 #endif
-2:     /* Now, jump to the common exception handler. */
+       /* Now, jump to the common exception handler. */
 
        j       common_exception
 
@@ -350,15 +339,6 @@ KABI_W     _bbsi.l a2, 3, 1f
        l32i    a0, a1, PT_AREG0        # restore saved a0
        wsr     a0, depc
 
-#ifdef KERNEL_STACK_OVERFLOW_CHECK
-
-       /*  Stack overflow check, for debugging  */
-       extui   a2, a1, TASK_SIZE_BITS,XX
-       movi    a3, SIZE??
-       _bge    a2, a3, out_of_stack_panic
-
-#endif
-
 /*
  * This is the common exception handler.
  * We get here from the user exception handler or simply by falling through
@@ -442,7 +422,6 @@ KABI_W      or      a3, a3, a0
        moveqz  a3, a0, a2              # a3 = LOCKLEVEL iff interrupt
 KABI_W movi    a2, PS_WOE_MASK
 KABI_W or      a3, a3, a2
-       rsr     a2, exccause
 #endif
 
        /* restore return address (or 0 if return to userspace) */
@@ -469,42 +448,56 @@ KABI_W    or      a3, a3, a2
 
        save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
        
+#ifdef CONFIG_TRACE_IRQFLAGS
+       rsr             abi_tmp0, ps
+       extui           abi_tmp0, abi_tmp0, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+       beqz            abi_tmp0, 1f
+       abi_call        trace_hardirqs_off
+1:
+#endif
+#ifdef CONFIG_CONTEXT_TRACKING
+       l32i            abi_tmp0, a1, PT_PS
+       bbci.l          abi_tmp0, PS_UM_BIT, 1f
+       abi_call        context_tracking_user_exit
+1:
+#endif
+
        /* Go to second-level dispatcher. Set up parameters to pass to the
         * exception handler and call the exception handler.
         */
 
-       rsr     a4, excsave1
-       addx4   a4, a2, a4
-       l32i    a4, a4, EXC_TABLE_DEFAULT               # load handler
-       mov     abi_arg1, a2                    # pass EXCCAUSE
-       mov     abi_arg0, a1                    # pass stack frame
+       l32i            abi_arg1, a1, PT_EXCCAUSE       # pass EXCCAUSE
+       rsr             abi_tmp0, excsave1
+       addx4           abi_tmp0, abi_arg1, abi_tmp0
+       l32i            abi_tmp0, abi_tmp0, EXC_TABLE_DEFAULT   # load handler
+       mov             abi_arg0, a1                    # pass stack frame
 
        /* Call the second-level handler */
 
-       abi_callx       a4
+       abi_callx       abi_tmp0
 
        /* Jump here for exception exit */
        .global common_exception_return
 common_exception_return:
 
 #if XTENSA_FAKE_NMI
-       l32i    abi_tmp0, a1, PT_EXCCAUSE
-       movi    abi_tmp1, EXCCAUSE_MAPPED_NMI
-       l32i    abi_saved1, a1, PT_PS
-       beq     abi_tmp0, abi_tmp1, .Lrestore_state
+       l32i            abi_tmp0, a1, PT_EXCCAUSE
+       movi            abi_tmp1, EXCCAUSE_MAPPED_NMI
+       l32i            abi_saved1, a1, PT_PS
+       beq             abi_tmp0, abi_tmp1, .Lrestore_state
 #endif
 .Ltif_loop:
-       irq_save a2, a3
+       irq_save        abi_tmp0, abi_tmp1
 #ifdef CONFIG_TRACE_IRQFLAGS
        abi_call        trace_hardirqs_off
 #endif
 
        /* Jump if we are returning from kernel exceptions. */
 
-       l32i    abi_saved1, a1, PT_PS
-       GET_THREAD_INFO(a2, a1)
-       l32i    a4, a2, TI_FLAGS
-       _bbci.l abi_saved1, PS_UM_BIT, .Lexit_tif_loop_kernel
+       l32i            abi_saved1, a1, PT_PS
+       GET_THREAD_INFO(abi_tmp0, a1)
+       l32i            abi_saved0, abi_tmp0, TI_FLAGS
+       _bbci.l         abi_saved1, PS_UM_BIT, .Lexit_tif_loop_kernel
 
        /* Specific to a user exception exit:
         * We need to check some flags for signal handling and rescheduling,
@@ -513,75 +506,80 @@ common_exception_return:
         * Note that we don't disable interrupts here. 
         */
 
-       _bbsi.l a4, TIF_NEED_RESCHED, .Lresched
-       movi    a2, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL
-       bnone   a4, a2, .Lexit_tif_loop_user
+       _bbsi.l         abi_saved0, TIF_NEED_RESCHED, .Lresched
+       movi            abi_tmp0, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL
+       bnone           abi_saved0, abi_tmp0, .Lexit_tif_loop_user
 
-       l32i    a4, a1, PT_DEPC
-       bgeui   a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
+       l32i            abi_tmp0, a1, PT_DEPC
+       bgeui           abi_tmp0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
 
        /* Call do_signal() */
 
 #ifdef CONFIG_TRACE_IRQFLAGS
        abi_call        trace_hardirqs_on
 #endif
-       rsil    a2, 0
-       mov     abi_arg0, a1
+       rsil            abi_tmp0, 0
+       mov             abi_arg0, a1
        abi_call        do_notify_resume        # int do_notify_resume(struct pt_regs*)
-       j       .Ltif_loop
+       j               .Ltif_loop
 
 .Lresched:
 #ifdef CONFIG_TRACE_IRQFLAGS
        abi_call        trace_hardirqs_on
 #endif
-       rsil    a2, 0
+       rsil            abi_tmp0, 0
        abi_call        schedule        # void schedule (void)
-       j       .Ltif_loop
+       j               .Ltif_loop
 
 .Lexit_tif_loop_kernel:
 #ifdef CONFIG_PREEMPTION
-       _bbci.l a4, TIF_NEED_RESCHED, .Lrestore_state
+       _bbci.l         abi_saved0, TIF_NEED_RESCHED, .Lrestore_state
 
        /* Check current_thread_info->preempt_count */
 
-       l32i    a4, a2, TI_PRE_COUNT
-       bnez    a4, .Lrestore_state
+       l32i            abi_tmp1, abi_tmp0, TI_PRE_COUNT
+       bnez            abi_tmp1, .Lrestore_state
        abi_call        preempt_schedule_irq
 #endif
-       j       .Lrestore_state
+       j               .Lrestore_state
 
 .Lexit_tif_loop_user:
+#ifdef CONFIG_CONTEXT_TRACKING
+       abi_call        context_tracking_user_enter
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-       _bbci.l a4, TIF_DB_DISABLED, 1f
+       _bbci.l         abi_saved0, TIF_DB_DISABLED, 1f
        abi_call        restore_dbreak
 1:
 #endif
 #ifdef CONFIG_DEBUG_TLB_SANITY
-       l32i    a4, a1, PT_DEPC
-       bgeui   a4, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
+       l32i            abi_tmp0, a1, PT_DEPC
+       bgeui           abi_tmp0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lrestore_state
        abi_call        check_tlb_sanity
 #endif
 
 .Lrestore_state:
 #ifdef CONFIG_TRACE_IRQFLAGS
-       extui   a4, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
-       bgei    a4, LOCKLEVEL, 1f
+       extui           abi_tmp0, abi_saved1, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+       bgei            abi_tmp0, LOCKLEVEL, 1f
        abi_call        trace_hardirqs_on
 1:
 #endif
-       /* Restore optional registers. */
+       /*
+        * Restore optional registers.
+        * abi_arg* are used as temporary registers here.
+        */
 
-       load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+       load_xtregs_opt a1 abi_tmp0 abi_arg0 abi_arg1 abi_arg2 abi_arg3 PT_XTREGS_OPT
 
        /* Restore SCOMPARE1 */
 
 #if XCHAL_HAVE_S32C1I
-       l32i    a2, a1, PT_SCOMPARE1
-       wsr     a2, scompare1
+       l32i            abi_tmp0, a1, PT_SCOMPARE1
+       wsr             abi_tmp0, scompare1
 #endif
-       wsr     abi_saved1, ps          /* disable interrupts */
-
-       _bbci.l abi_saved1, PS_UM_BIT, kernel_exception_exit
+       wsr             abi_saved1, ps          /* disable interrupts */
+       _bbci.l         abi_saved1, PS_UM_BIT, kernel_exception_exit
 
 user_exception_exit:
 
@@ -795,7 +793,7 @@ ENDPROC(kernel_exception)
 ENTRY(debug_exception)
 
        rsr     a0, SREG_EPS + XCHAL_DEBUGLEVEL
-       bbsi.l  a0, PS_EXCM_BIT, 1f     # exception mode
+       bbsi.l  a0, PS_EXCM_BIT, .Ldebug_exception_in_exception # exception mode
 
        /* Set EPC1 and EXCCAUSE */
 
@@ -814,10 +812,10 @@ ENTRY(debug_exception)
 
        /* Switch to kernel/user stack, restore jump vector, and save a0 */
 
-       bbsi.l  a2, PS_UM_BIT, 2f       # jump if user mode
-
+       bbsi.l  a2, PS_UM_BIT, .Ldebug_exception_user   # jump if user mode
        addi    a2, a1, -16 - PT_KERNEL_SIZE    # assume kernel stack
-3:
+
+.Ldebug_exception_continue:
        l32i    a0, a3, DT_DEBUG_SAVE
        s32i    a1, a2, PT_AREG1
        s32i    a0, a2, PT_AREG0
@@ -845,10 +843,12 @@ ENTRY(debug_exception)
        bbsi.l  a2, PS_UM_BIT, _user_exception
        j       _kernel_exception
 
-2:     rsr     a2, excsave1
+.Ldebug_exception_user:
+       rsr     a2, excsave1
        l32i    a2, a2, EXC_TABLE_KSTK  # load kernel stack pointer
-       j       3b
+       j       .Ldebug_exception_continue
 
+.Ldebug_exception_in_exception:
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        /* Debug exception while in exception mode. This may happen when
         * window overflow/underflow handler or fast exception handler hits
@@ -856,8 +856,8 @@ ENTRY(debug_exception)
         * breakpoints, single-step faulting instruction and restore data
         * breakpoints.
         */
-1:
-       bbci.l  a0, PS_UM_BIT, 1b       # jump if kernel mode
+
+       bbci.l  a0, PS_UM_BIT, .Ldebug_exception_in_exception   # jump if kernel mode
 
        rsr     a0, debugcause
        bbsi.l  a0, DEBUGCAUSE_DBREAK_BIT, .Ldebug_save_dbreak
@@ -901,7 +901,7 @@ ENTRY(debug_exception)
        rfi     XCHAL_DEBUGLEVEL
 #else
        /* Debug exception while in exception mode. Should not happen. */
-1:     j       1b      // FIXME!!
+       j       .Ldebug_exception_in_exception  // FIXME!!
 #endif
 
 ENDPROC(debug_exception)
@@ -1056,6 +1056,11 @@ ENTRY(fast_illegal_instruction_user)
        movi    a3, PS_WOE_MASK
        or      a0, a0, a3
        wsr     a0, ps
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+       GET_THREAD_INFO(a3, a2)
+       rsr     a0, epc1
+       s32i    a0, a3, TI_PS_WOE_FIX_ADDR
+#endif
        l32i    a3, a2, PT_AREG3
        l32i    a0, a2, PT_AREG0
        rsr     a2, depc
@@ -1630,12 +1635,13 @@ ENTRY(fast_second_level_miss)
 
        GET_CURRENT(a1,a2)
        l32i    a0, a1, TASK_MM         # tsk->mm
-       beqz    a0, 9f
+       beqz    a0, .Lfast_second_level_miss_no_mm
 
-8:     rsr     a3, excvaddr            # fault address
+.Lfast_second_level_miss_continue:
+       rsr     a3, excvaddr            # fault address
        _PGD_OFFSET(a0, a3, a1)
        l32i    a0, a0, 0               # read pmdval
-       beqz    a0, 2f
+       beqz    a0, .Lfast_second_level_miss_no_pmd
 
        /* Read ptevaddr and convert to top of page-table page.
         *
@@ -1678,12 +1684,13 @@ ENTRY(fast_second_level_miss)
        addi    a3, a3, DTLB_WAY_PGD
        add     a1, a1, a3              # ... + way_number
 
-3:     wdtlb   a0, a1
+.Lfast_second_level_miss_wdtlb:
+       wdtlb   a0, a1
        dsync
 
        /* Exit critical section. */
-
-4:     rsr     a3, excsave1
+.Lfast_second_level_miss_skip_wdtlb:
+       rsr     a3, excsave1
        movi    a0, 0
        s32i    a0, a3, EXC_TABLE_FIXUP
 
@@ -1707,19 +1714,21 @@ ENTRY(fast_second_level_miss)
        esync
        rfde
 
-9:     l32i    a0, a1, TASK_ACTIVE_MM  # unlikely case mm == 0
-       bnez    a0, 8b
+.Lfast_second_level_miss_no_mm:
+       l32i    a0, a1, TASK_ACTIVE_MM  # unlikely case mm == 0
+       bnez    a0, .Lfast_second_level_miss_continue
 
        /* Even more unlikely case active_mm == 0.
         * We can get here with NMI in the middle of context_switch that
         * touches vmalloc area.
         */
        movi    a0, init_mm
-       j       8b
+       j       .Lfast_second_level_miss_continue
 
+.Lfast_second_level_miss_no_pmd:
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
-2:     /* Special case for cache aliasing.
+       /* Special case for cache aliasing.
         * We (should) only get here if a clear_user_page, copy_user_page
         * or the aliased cache flush functions got preemptively interrupted 
         * by another task. Re-establish temporary mapping to the 
@@ -1729,24 +1738,24 @@ ENTRY(fast_second_level_miss)
        /* We shouldn't be in a double exception */
 
        l32i    a0, a2, PT_DEPC
-       bgeui   a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 2f
+       bgeui   a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lfast_second_level_miss_slow
 
        /* Make sure the exception originated in the special functions */
 
        movi    a0, __tlbtemp_mapping_start
        rsr     a3, epc1
-       bltu    a3, a0, 2f
+       bltu    a3, a0, .Lfast_second_level_miss_slow
        movi    a0, __tlbtemp_mapping_end
-       bgeu    a3, a0, 2f
+       bgeu    a3, a0, .Lfast_second_level_miss_slow
 
        /* Check if excvaddr was in one of the TLBTEMP_BASE areas. */
 
        movi    a3, TLBTEMP_BASE_1
        rsr     a0, excvaddr
-       bltu    a0, a3, 2f
+       bltu    a0, a3, .Lfast_second_level_miss_slow
 
        addi    a1, a0, -TLBTEMP_SIZE
-       bgeu    a1, a3, 2f
+       bgeu    a1, a3, .Lfast_second_level_miss_slow
 
        /* Check if we have to restore an ITLB mapping. */
 
@@ -1772,19 +1781,19 @@ ENTRY(fast_second_level_miss)
 
        mov     a0, a6
        movnez  a0, a7, a3
-       j       3b
+       j       .Lfast_second_level_miss_wdtlb
 
        /* ITLB entry. We only use dst in a6. */
 
 1:     witlb   a6, a1
        isync
-       j       4b
+       j       .Lfast_second_level_miss_skip_wdtlb
 
 
 #endif // DCACHE_WAY_SIZE > PAGE_SIZE
 
-
-2:     /* Invalid PGD, default exception handling */
+       /* Invalid PGD, default exception handling */
+.Lfast_second_level_miss_slow:
 
        rsr     a1, depc
        s32i    a1, a2, PT_AREG2
@@ -1824,12 +1833,13 @@ ENTRY(fast_store_prohibited)
 
        GET_CURRENT(a1,a2)
        l32i    a0, a1, TASK_MM         # tsk->mm
-       beqz    a0, 9f
+       beqz    a0, .Lfast_store_no_mm
 
-8:     rsr     a1, excvaddr            # fault address
+.Lfast_store_continue:
+       rsr     a1, excvaddr            # fault address
        _PGD_OFFSET(a0, a1, a3)
        l32i    a0, a0, 0
-       beqz    a0, 2f
+       beqz    a0, .Lfast_store_slow
 
        /*
         * Note that we test _PAGE_WRITABLE_BIT only if PTE is present
@@ -1839,8 +1849,8 @@ ENTRY(fast_store_prohibited)
        _PTE_OFFSET(a0, a1, a3)
        l32i    a3, a0, 0               # read pteval
        movi    a1, _PAGE_CA_INVALID
-       ball    a3, a1, 2f
-       bbci.l  a3, _PAGE_WRITABLE_BIT, 2f
+       ball    a3, a1, .Lfast_store_slow
+       bbci.l  a3, _PAGE_WRITABLE_BIT, .Lfast_store_slow
 
        movi    a1, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HW_WRITE
        or      a3, a3, a1
@@ -1868,7 +1878,6 @@ ENTRY(fast_store_prohibited)
        l32i    a2, a2, PT_DEPC
 
        bgeui   a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-
        rsr     a2, depc
        rfe
 
@@ -1878,11 +1887,17 @@ ENTRY(fast_store_prohibited)
        esync
        rfde
 
-9:     l32i    a0, a1, TASK_ACTIVE_MM  # unlikely case mm == 0
-       j       8b
-
-2:     /* If there was a problem, handle fault in C */
+.Lfast_store_no_mm:
+       l32i    a0, a1, TASK_ACTIVE_MM  # unlikely case mm == 0
+       j       .Lfast_store_continue
 
+       /* If there was a problem, handle fault in C */
+.Lfast_store_slow:
+       rsr     a1, excvaddr
+       pdtlb   a0, a1
+       bbci.l  a0, DTLB_HIT_BIT, 1f
+       idtlb   a0
+1:
        rsr     a3, depc        # still holds a2
        s32i    a3, a2, PT_AREG2
        mov     a1, a2
@@ -2071,8 +2086,16 @@ ENTRY(_switch_to)
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
        l32i    a3, a5, THREAD_CPENABLE
-       xsr     a3, cpenable
-       s32i    a3, a4, THREAD_CPENABLE
+#ifdef CONFIG_SMP
+       beqz    a3, 1f
+       memw                    # pairs with memw (2) in fast_coprocessor
+       l32i    a6, a5, THREAD_CP_OWNER_CPU
+       l32i    a7, a5, THREAD_CPU
+       beq     a6, a7, 1f      # load 0 into CPENABLE if current CPU is not the owner
+       movi    a3, 0
+1:
+#endif
+       wsr     a3, cpenable
 #endif
 
 #if XCHAL_HAVE_EXCLUSIVE
@@ -2147,3 +2170,95 @@ ENTRY(ret_from_kernel_thread)
        j               common_exception_return
 
 ENDPROC(ret_from_kernel_thread)
+
+#ifdef CONFIG_HIBERNATION
+
+       .bss
+       .align  4
+.Lsaved_regs:
+#if defined(__XTENSA_WINDOWED_ABI__)
+       .fill   2, 4
+#elif defined(__XTENSA_CALL0_ABI__)
+       .fill   6, 4
+#else
+#error Unsupported Xtensa ABI
+#endif
+       .align  XCHAL_NCP_SA_ALIGN
+.Lsaved_user_regs:
+       .fill   XTREGS_USER_SIZE, 1
+
+       .previous
+
+ENTRY(swsusp_arch_suspend)
+
+       abi_entry_default
+
+       movi            a2, .Lsaved_regs
+       movi            a3, .Lsaved_user_regs
+       s32i            a0, a2, 0
+       s32i            a1, a2, 4
+       save_xtregs_user a3 a4 a5 a6 a7 a8 0
+#if defined(__XTENSA_WINDOWED_ABI__)
+       spill_registers_kernel
+#elif defined(__XTENSA_CALL0_ABI__)
+       s32i            a12, a2, 8
+       s32i            a13, a2, 12
+       s32i            a14, a2, 16
+       s32i            a15, a2, 20
+#else
+#error Unsupported Xtensa ABI
+#endif
+       abi_call        swsusp_save
+       mov             a2, abi_rv
+       abi_ret_default
+
+ENDPROC(swsusp_arch_suspend)
+
+ENTRY(swsusp_arch_resume)
+
+       abi_entry_default
+
+#if defined(__XTENSA_WINDOWED_ABI__)
+       spill_registers_kernel
+#endif
+
+       movi            a2, restore_pblist
+       l32i            a2, a2, 0
+
+.Lcopy_pbe:
+       l32i            a3, a2, PBE_ADDRESS
+       l32i            a4, a2, PBE_ORIG_ADDRESS
+
+       __loopi         a3, a9, PAGE_SIZE, 16
+       l32i            a5, a3, 0
+       l32i            a6, a3, 4
+       l32i            a7, a3, 8
+       l32i            a8, a3, 12
+       addi            a3, a3, 16
+       s32i            a5, a4, 0
+       s32i            a6, a4, 4
+       s32i            a7, a4, 8
+       s32i            a8, a4, 12
+       addi            a4, a4, 16
+       __endl          a3, a9
+
+       l32i            a2, a2, PBE_NEXT
+       bnez            a2, .Lcopy_pbe
+
+       movi            a2, .Lsaved_regs
+       movi            a3, .Lsaved_user_regs
+       l32i            a0, a2, 0
+       l32i            a1, a2, 4
+       load_xtregs_user a3 a4 a5 a6 a7 a8 0
+#if defined(__XTENSA_CALL0_ABI__)
+       l32i            a12, a2, 8
+       l32i            a13, a2, 12
+       l32i            a14, a2, 16
+       l32i            a15, a2, 20
+#endif
+       movi            a2, 0
+       abi_ret_default
+
+ENDPROC(swsusp_arch_resume)
+
+#endif
diff --git a/arch/xtensa/kernel/hibernate.c b/arch/xtensa/kernel/hibernate.c
new file mode 100644 (file)
index 0000000..0698432
--- /dev/null
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/coprocessor.h>
+
+int pfn_is_nosave(unsigned long pfn)
+{
+       unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+       unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end));
+
+       return  (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+       WARN_ON(num_online_cpus() != 1);
+#if XTENSA_HAVE_COPROCESSORS
+       local_coprocessors_flush_release_all();
+#endif
+}
+
+void notrace restore_processor_state(void)
+{
+}
index e8bfbca5f0014faa0910728aa39707d14fb201af..7e38292dd07abff7d016c7249715c73f827e15c0 100644 (file)
@@ -47,6 +47,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/regs.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/traps.h>
 
 extern void ret_from_fork(void);
 extern void ret_from_kernel_thread(void);
@@ -63,52 +64,114 @@ EXPORT_SYMBOL(__stack_chk_guard);
 
 #if XTENSA_HAVE_COPROCESSORS
 
-void coprocessor_release_all(struct thread_info *ti)
+void local_coprocessors_flush_release_all(void)
 {
-       unsigned long cpenable;
-       int i;
+       struct thread_info **coprocessor_owner;
+       struct thread_info *unique_owner[XCHAL_CP_MAX];
+       int n = 0;
+       int i, j;
 
-       /* Make sure we don't switch tasks during this operation. */
+       coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+       xtensa_set_sr(XCHAL_CP_MASK, cpenable);
 
-       preempt_disable();
+       for (i = 0; i < XCHAL_CP_MAX; i++) {
+               struct thread_info *ti = coprocessor_owner[i];
 
-       /* Walk through all cp owners and release it for the requested one. */
+               if (ti) {
+                       coprocessor_flush(ti, i);
 
-       cpenable = ti->cpenable;
+                       for (j = 0; j < n; j++)
+                               if (unique_owner[j] == ti)
+                                       break;
+                       if (j == n)
+                               unique_owner[n++] = ti;
 
-       for (i = 0; i < XCHAL_CP_MAX; i++) {
-               if (coprocessor_owner[i] == ti) {
-                       coprocessor_owner[i] = 0;
-                       cpenable &= ~(1 << i);
+                       coprocessor_owner[i] = NULL;
                }
        }
+       for (i = 0; i < n; i++) {
+               /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+               smp_wmb();
+               unique_owner[i]->cpenable = 0;
+       }
+       xtensa_set_sr(0, cpenable);
+}
 
-       ti->cpenable = cpenable;
+static void local_coprocessor_release_all(void *info)
+{
+       struct thread_info *ti = info;
+       struct thread_info **coprocessor_owner;
+       int i;
+
+       coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+
+       /* Walk through all cp owners and release it for the requested one. */
+
+       for (i = 0; i < XCHAL_CP_MAX; i++) {
+               if (coprocessor_owner[i] == ti)
+                       coprocessor_owner[i] = NULL;
+       }
+       /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+       smp_wmb();
+       ti->cpenable = 0;
        if (ti == current_thread_info())
                xtensa_set_sr(0, cpenable);
+}
 
-       preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+       if (ti->cpenable) {
+               /* pairs with memw (2) in fast_coprocessor */
+               smp_rmb();
+               smp_call_function_single(ti->cp_owner_cpu,
+                                        local_coprocessor_release_all,
+                                        ti, true);
+       }
 }
 
-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
 {
-       unsigned long cpenable, old_cpenable;
+       struct thread_info *ti = info;
+       struct thread_info **coprocessor_owner;
+       unsigned long old_cpenable;
        int i;
 
-       preempt_disable();
-
-       old_cpenable = xtensa_get_sr(cpenable);
-       cpenable = ti->cpenable;
-       xtensa_set_sr(cpenable, cpenable);
+       coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+       old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
 
        for (i = 0; i < XCHAL_CP_MAX; i++) {
-               if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+               if (coprocessor_owner[i] == ti)
                        coprocessor_flush(ti, i);
-               cpenable >>= 1;
        }
        xtensa_set_sr(old_cpenable, cpenable);
+}
 
-       preempt_enable();
+void coprocessor_flush_all(struct thread_info *ti)
+{
+       if (ti->cpenable) {
+               /* pairs with memw (2) in fast_coprocessor */
+               smp_rmb();
+               smp_call_function_single(ti->cp_owner_cpu,
+                                        local_coprocessor_flush_all,
+                                        ti, true);
+       }
+}
+
+static void local_coprocessor_flush_release_all(void *info)
+{
+       local_coprocessor_flush_all(info);
+       local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+       if (ti->cpenable) {
+               /* pairs with memw (2) in fast_coprocessor */
+               smp_rmb();
+               smp_call_function_single(ti->cp_owner_cpu,
+                                        local_coprocessor_flush_release_all,
+                                        ti, true);
+       }
 }
 
 #endif
@@ -140,8 +203,7 @@ void flush_thread(void)
 {
 #if XTENSA_HAVE_COPROCESSORS
        struct thread_info *ti = current_thread_info();
-       coprocessor_flush_all(ti);
-       coprocessor_release_all(ti);
+       coprocessor_flush_release_all(ti);
 #endif
        flush_ptrace_hw_breakpoint(current);
 }
index 323c678a691ff6dc5dfd97bb340daadd0856b6c2..22cdaa6729d3e281815efc3e76c6b93ac671131c 100644 (file)
@@ -171,8 +171,7 @@ static int tie_set(struct task_struct *target,
 
 #if XTENSA_HAVE_COPROCESSORS
        /* Flush all coprocessors before we overwrite them. */
-       coprocessor_flush_all(ti);
-       coprocessor_release_all(ti);
+       coprocessor_flush_release_all(ti);
        ti->xtregs_cp.cp0 = newregs->cp0;
        ti->xtregs_cp.cp1 = newregs->cp1;
        ti->xtregs_cp.cp2 = newregs->cp2;
index 07e56e3a9a8b2526a2190c3c8a79576780f9e312..8362388c8719b4e4f628e821b6c920a7be5de264 100644 (file)
@@ -40,14 +40,13 @@ static inline int probed_compare_swap(int *v, int cmp, int set)
 
 /* Handle probed exception */
 
-static void __init do_probed_exception(struct pt_regs *regs,
-                                      unsigned long exccause)
+static void __init do_probed_exception(struct pt_regs *regs)
 {
        if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */
                regs->pc += 3;          /* skip the s32c1i instruction */
-               rcw_exc = exccause;
+               rcw_exc = regs->exccause;
        } else {
-               do_unhandled(regs, exccause);
+               do_unhandled(regs);
        }
 }
 
index 6f68649e86ba5af13bc130a03ba5c79e1b7b561e..c9ffd42db873638bb634619c66dc9711eeaebb23 100644 (file)
@@ -162,8 +162,7 @@ setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs)
                return err;
 
 #if XTENSA_HAVE_COPROCESSORS
-       coprocessor_flush_all(ti);
-       coprocessor_release_all(ti);
+       coprocessor_flush_release_all(ti);
        err |= __copy_to_user(&frame->xtregs.cp, &ti->xtregs_cp,
                              sizeof (frame->xtregs.cp));
 #endif
index 1254da07ead1f42712918b90c54b45ebf83cb5bb..4dc109dd6214e27c47241ac1fcc081b3923dcfbd 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/thread_info.h>
 
 #include <asm/cacheflush.h>
+#include <asm/coprocessor.h>
 #include <asm/kdebug.h>
 #include <asm/mmu_context.h>
 #include <asm/mxregs.h>
@@ -272,6 +273,12 @@ int __cpu_disable(void)
         */
        set_cpu_online(cpu, false);
 
+#if XTENSA_HAVE_COPROCESSORS
+       /*
+        * Flush coprocessor contexts that are active on the current CPU.
+        */
+       local_coprocessors_flush_release_all();
+#endif
        /*
         * OK - migrate IRQs away from this CPU
         */
index 9345007d474d311429e3b891ab2f1927dc2b03f9..0c25e035ff107b83f40c659e4ee1d7caf419910e 100644 (file)
  * Machine specific interrupt handlers
  */
 
-extern void kernel_exception(void);
-extern void user_exception(void);
-
-extern void fast_illegal_instruction_user(void);
-extern void fast_syscall_user(void);
-extern void fast_alloca(void);
-extern void fast_unaligned(void);
-extern void fast_second_level_miss(void);
-extern void fast_store_prohibited(void);
-extern void fast_coprocessor(void);
-
-extern void do_illegal_instruction (struct pt_regs*);
-extern void do_interrupt (struct pt_regs*);
-extern void do_nmi(struct pt_regs *);
-extern void do_unaligned_user (struct pt_regs*);
-extern void do_multihit (struct pt_regs*, unsigned long);
-extern void do_page_fault (struct pt_regs*, unsigned long);
-extern void do_debug (struct pt_regs*);
-extern void system_call (struct pt_regs*);
+static void do_illegal_instruction(struct pt_regs *regs);
+static void do_div0(struct pt_regs *regs);
+static void do_interrupt(struct pt_regs *regs);
+#if XTENSA_FAKE_NMI
+static void do_nmi(struct pt_regs *regs);
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+static void do_unaligned_user(struct pt_regs *regs);
+#endif
+static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
+static void do_debug(struct pt_regs *regs);
 
 /*
  * The vector table must be preceded by a save area (which
@@ -78,7 +73,8 @@ extern void system_call (struct pt_regs*);
 #define USER           0x02
 
 #define COPROCESSOR(x)                                                 \
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor },\
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
 
 typedef struct {
        int cause;
@@ -100,7 +96,7 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
 #ifdef SUPPORT_WINDOWED
 { EXCCAUSE_ALLOCA,             USER|KRNL, fast_alloca },
 #endif
-/* EXCCAUSE_INTEGER_DIVIDE_BY_ZERO unhandled */
+{ EXCCAUSE_INTEGER_DIVIDE_BY_ZERO, 0,     do_div0 },
 /* EXCCAUSE_PRIVILEGED unhandled */
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
 #ifdef CONFIG_XTENSA_UNALIGNED_USER
@@ -110,21 +106,21 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
 { EXCCAUSE_UNALIGNED,          KRNL,      fast_unaligned },
 #endif
 #ifdef CONFIG_MMU
-{ EXCCAUSE_ITLB_MISS,          0,         do_page_fault },
-{ EXCCAUSE_ITLB_MISS,          USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_ITLB_MISS,                  0,         do_page_fault },
+{ EXCCAUSE_ITLB_MISS,                  USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_DTLB_MISS,                  USER|KRNL, fast_second_level_miss},
+{ EXCCAUSE_DTLB_MISS,                  0,         do_page_fault },
+{ EXCCAUSE_STORE_CACHE_ATTRIBUTE,      USER|KRNL, fast_store_prohibited },
+#endif /* CONFIG_MMU */
+#ifdef CONFIG_PFAULT
 { EXCCAUSE_ITLB_MULTIHIT,              0,         do_multihit },
-{ EXCCAUSE_ITLB_PRIVILEGE,     0,         do_page_fault },
-/* EXCCAUSE_SIZE_RESTRICTION unhandled */
+{ EXCCAUSE_ITLB_PRIVILEGE,             0,         do_page_fault },
 { EXCCAUSE_FETCH_CACHE_ATTRIBUTE,      0,         do_page_fault },
-{ EXCCAUSE_DTLB_MISS,          USER|KRNL, fast_second_level_miss},
-{ EXCCAUSE_DTLB_MISS,          0,         do_page_fault },
 { EXCCAUSE_DTLB_MULTIHIT,              0,         do_multihit },
-{ EXCCAUSE_DTLB_PRIVILEGE,     0,         do_page_fault },
-/* EXCCAUSE_DTLB_SIZE_RESTRICTION unhandled */
-{ EXCCAUSE_STORE_CACHE_ATTRIBUTE,      USER|KRNL, fast_store_prohibited },
+{ EXCCAUSE_DTLB_PRIVILEGE,             0,         do_page_fault },
 { EXCCAUSE_STORE_CACHE_ATTRIBUTE,      0,         do_page_fault },
 { EXCCAUSE_LOAD_CACHE_ATTRIBUTE,       0,         do_page_fault },
-#endif /* CONFIG_MMU */
+#endif
 /* XCCHAL_EXCCAUSE_FLOATING_POINT unhandled */
 #if XTENSA_HAVE_COPROCESSOR(0)
 COPROCESSOR(0),
@@ -179,7 +175,7 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err)
  * Unhandled Exceptions. Kill user task or panic if in kernel space.
  */
 
-void do_unhandled(struct pt_regs *regs, unsigned long exccause)
+void do_unhandled(struct pt_regs *regs)
 {
        __die_if_kernel("Caught unhandled exception - should not happen",
                        regs, SIGKILL);
@@ -189,7 +185,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
                            "(pid = %d, pc = %#010lx) - should not happen\n"
                            "\tEXCCAUSE is %ld\n",
                            current->comm, task_pid_nr(current), regs->pc,
-                           exccause);
+                           regs->exccause);
        force_sig(SIGILL);
 }
 
@@ -197,7 +193,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
  * Multi-hit exception. This if fatal!
  */
 
-void do_multihit(struct pt_regs *regs, unsigned long exccause)
+static void do_multihit(struct pt_regs *regs)
 {
        die("Caught multihit exception", regs, SIGKILL);
 }
@@ -206,8 +202,6 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
  * IRQ handler.
  */
 
-extern void do_IRQ(int, struct pt_regs *);
-
 #if XTENSA_FAKE_NMI
 
 #define IS_POW2(v) (((v) & ((v) - 1)) == 0)
@@ -240,14 +234,10 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
 
 DEFINE_PER_CPU(unsigned long, nmi_count);
 
-void do_nmi(struct pt_regs *regs)
+static void do_nmi(struct pt_regs *regs)
 {
-       struct pt_regs *old_regs;
-
-       if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
-               trace_hardirqs_off();
+       struct pt_regs *old_regs = set_irq_regs(regs);
 
-       old_regs = set_irq_regs(regs);
        nmi_enter();
        ++*this_cpu_ptr(&nmi_count);
        check_valid_nmi();
@@ -257,7 +247,7 @@ void do_nmi(struct pt_regs *regs)
 }
 #endif
 
-void do_interrupt(struct pt_regs *regs)
+static void do_interrupt(struct pt_regs *regs)
 {
        static const unsigned int_level_mask[] = {
                0,
@@ -269,12 +259,9 @@ void do_interrupt(struct pt_regs *regs)
                XCHAL_INTLEVEL6_MASK,
                XCHAL_INTLEVEL7_MASK,
        };
-       struct pt_regs *old_regs;
+       struct pt_regs *old_regs = set_irq_regs(regs);
        unsigned unhandled = ~0u;
 
-       trace_hardirqs_off();
-
-       old_regs = set_irq_regs(regs);
        irq_enter();
 
        for (;;) {
@@ -306,13 +293,47 @@ void do_interrupt(struct pt_regs *regs)
        set_irq_regs(old_regs);
 }
 
+static bool check_div0(struct pt_regs *regs)
+{
+       static const u8 pattern[] = {'D', 'I', 'V', '0'};
+       const u8 *p;
+       u8 buf[5];
+
+       if (user_mode(regs)) {
+               if (copy_from_user(buf, (void __user *)regs->pc + 2, 5))
+                       return false;
+               p = buf;
+       } else {
+               p = (const u8 *)regs->pc + 2;
+       }
+
+       return memcmp(p, pattern, sizeof(pattern)) == 0 ||
+               memcmp(p + 1, pattern, sizeof(pattern)) == 0;
+}
+
 /*
  * Illegal instruction. Fatal if in kernel space.
  */
 
-void
-do_illegal_instruction(struct pt_regs *regs)
+static void do_illegal_instruction(struct pt_regs *regs)
 {
+#ifdef CONFIG_USER_ABI_CALL0_PROBE
+       /*
+        * When call0 application encounters an illegal instruction fast
+        * exception handler will attempt to set PS.WOE and retry failing
+        * instruction.
+        * If we get here we know that that instruction is also illegal
+        * with PS.WOE set, so it's not related to the windowed option
+        * hence PS.WOE may be cleared.
+        */
+       if (regs->pc == current_thread_info()->ps_woe_fix_addr)
+               regs->ps &= ~PS_WOE_MASK;
+#endif
+       if (check_div0(regs)) {
+               do_div0(regs);
+               return;
+       }
+
        __die_if_kernel("Illegal instruction in kernel", regs, SIGKILL);
 
        /* If in user mode, send SIGILL signal to current process. */
@@ -322,6 +343,11 @@ do_illegal_instruction(struct pt_regs *regs)
        force_sig(SIGILL);
 }
 
+static void do_div0(struct pt_regs *regs)
+{
+       __die_if_kernel("Unhandled division by 0 in kernel", regs, SIGKILL);
+       force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->pc);
+}
 
 /*
  * Handle unaligned memory accesses from user space. Kill task.
@@ -331,8 +357,7 @@ do_illegal_instruction(struct pt_regs *regs)
  */
 
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-void
-do_unaligned_user (struct pt_regs *regs)
+static void do_unaligned_user(struct pt_regs *regs)
 {
        __die_if_kernel("Unhandled unaligned exception in kernel",
                        regs, SIGKILL);
@@ -347,14 +372,20 @@ do_unaligned_user (struct pt_regs *regs)
 }
 #endif
 
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+       coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
 /* Handle debug events.
  * When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
  * preemption disabled to avoid rescheduling and keep mapping of hardware
  * breakpoint structures to debug registers intact, so that
  * DEBUGCAUSE.DBNUM could be used in case of data breakpoint hit.
  */
-void
-do_debug(struct pt_regs *regs)
+static void do_debug(struct pt_regs *regs)
 {
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        int ret = check_hw_breakpoint(regs);
@@ -381,7 +412,8 @@ do_debug(struct pt_regs *regs)
 
 /* Set exception C handler - for temporary use when probing exceptions */
 
-void * __init trap_set_handler(int cause, void *handler)
+xtensa_exception_handler *
+__init trap_set_handler(int cause, xtensa_exception_handler *handler)
 {
        void *previous = per_cpu(exc_table, 0).default_handler[cause];
 
@@ -392,8 +424,7 @@ void * __init trap_set_handler(int cause, void *handler)
 
 static void trap_init_excsave(void)
 {
-       unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table);
-       __asm__ __volatile__("wsr  %0, excsave1\n" : : "a" (excsave1));
+       xtensa_set_sr(this_cpu_ptr(&exc_table), excsave1);
 }
 
 static void trap_init_debug(void)
index 5848c133f7ea871967c308060316a0482334734e..d4e9c397e3fdefb35fcd4a57953b04b2d7149007 100644 (file)
@@ -8,3 +8,5 @@ lib-y   += memcopy.o memset.o checksum.o \
           divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \
           usercopy.o strncpy_user.o strnlen_user.o
 lib-$(CONFIG_PCI) += pci-auto.o
+lib-$(CONFIG_KCSAN) += kcsan-stubs.o
+KCSAN_SANITIZE_kcsan-stubs.o := n
diff --git a/arch/xtensa/lib/kcsan-stubs.c b/arch/xtensa/lib/kcsan-stubs.c
new file mode 100644 (file)
index 0000000..2b08faa
--- /dev/null
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bug.h>
+#include <linux/types.h>
+
+void __atomic_store_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+u64 __atomic_load_8(const volatile void *p, int i)
+{
+       BUG();
+}
+
+u64 __atomic_exchange_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+bool __atomic_compare_exchange_8(volatile void *p1, void *p2, u64 v, bool b, int i1, int i2)
+{
+       BUG();
+}
+
+u64 __atomic_fetch_add_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+u64 __atomic_fetch_sub_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+u64 __atomic_fetch_and_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+u64 __atomic_fetch_or_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+u64 __atomic_fetch_xor_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
+
+u64 __atomic_fetch_nand_8(volatile void *p, u64 v, int i)
+{
+       BUG();
+}
index 582d817979ed51c79219014864869da96bb5a294..b20d206bcb719878bad55842a5f8aa932fe10f8d 100644 (file)
@@ -402,13 +402,13 @@ WEAK(memmove)
         */
        # copy 16 bytes per iteration for word-aligned dst and word-aligned src
 #if XCHAL_HAVE_LOOPS
-       loopnez a7, .backLoop1done
+       loopnez a7, .LbackLoop1done
 #else /* !XCHAL_HAVE_LOOPS */
-       beqz    a7, .backLoop1done
+       beqz    a7, .LbackLoop1done
        slli    a8, a7, 4
        sub     a8, a3, a8      # a8 = start of first 16B source chunk
 #endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1:
+.LbackLoop1:
        addi    a3, a3, -16
        l32i    a7, a3, 12
        l32i    a6, a3,  8
@@ -420,9 +420,9 @@ WEAK(memmove)
        s32i    a7, a5,  4
        s32i    a6, a5,  0
 #if !XCHAL_HAVE_LOOPS
-       bne     a3, a8, .backLoop1  # continue loop if a3:src != a8:src_start
+       bne     a3, a8, .LbackLoop1  # continue loop if a3:src != a8:src_start
 #endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1done:
+.LbackLoop1done:
        bbci.l  a4, 3, .Lback2
        # copy 8 bytes
        addi    a3, a3, -8
@@ -479,13 +479,13 @@ WEAK(memmove)
 #endif
        l32i    a6, a3, 0       # load first word
 #if XCHAL_HAVE_LOOPS
-       loopnez a7, .backLoop2done
+       loopnez a7, .LbackLoop2done
 #else /* !XCHAL_HAVE_LOOPS */
-       beqz    a7, .backLoop2done
+       beqz    a7, .LbackLoop2done
        slli    a10, a7, 4
        sub     a10, a3, a10    # a10 = start of first 16B source chunk
 #endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2:
+.LbackLoop2:
        addi    a3, a3, -16
        l32i    a7, a3, 12
        l32i    a8, a3,  8
@@ -501,9 +501,9 @@ WEAK(memmove)
        __src_b a9, a6, a9
        s32i    a9, a5,  0
 #if !XCHAL_HAVE_LOOPS
-       bne     a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
+       bne     a3, a10, .LbackLoop2 # continue loop if a3:src != a10:src_start
 #endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2done:
+.LbackLoop2done:
        bbci.l  a4, 3, .Lback12
        # copy 8 bytes
        addi    a3, a3, -8
index f7fb08ae768f2ebc1811f88d76cf2e920cb662f5..44153a335951afc7529190741b965476f744e4d7 100644 (file)
@@ -4,7 +4,8 @@
 #
 
 obj-y                  := init.o misc.o
-obj-$(CONFIG_MMU)      += cache.o fault.o ioremap.o mmu.o tlb.o
+obj-$(CONFIG_PFAULT)   += fault.o
+obj-$(CONFIG_MMU)      += cache.o ioremap.o mmu.o tlb.o
 obj-$(CONFIG_HIGHMEM)  += highmem.o
 obj-$(CONFIG_KASAN)    += kasan_init.o
 
index 06d0973a0d7472e4b7777231fa775b17a2ac9200..16f0a5ff57991c1135382594de7bb2ce5197cd70 100644 (file)
 #include <asm/cacheflush.h>
 #include <asm/hardirq.h>
 
-DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
 void bad_page_fault(struct pt_regs*, unsigned long, int);
 
+static void vmalloc_fault(struct pt_regs *regs, unsigned int address)
+{
+#ifdef CONFIG_MMU
+       /* Synchronize this task's top level page-table
+        * with the 'reference' page table.
+        */
+       struct mm_struct *act_mm = current->active_mm;
+       int index = pgd_index(address);
+       pgd_t *pgd, *pgd_k;
+       p4d_t *p4d, *p4d_k;
+       pud_t *pud, *pud_k;
+       pmd_t *pmd, *pmd_k;
+       pte_t *pte_k;
+
+       if (act_mm == NULL)
+               goto bad_page_fault;
+
+       pgd = act_mm->pgd + index;
+       pgd_k = init_mm.pgd + index;
+
+       if (!pgd_present(*pgd_k))
+               goto bad_page_fault;
+
+       pgd_val(*pgd) = pgd_val(*pgd_k);
+
+       p4d = p4d_offset(pgd, address);
+       p4d_k = p4d_offset(pgd_k, address);
+       if (!p4d_present(*p4d) || !p4d_present(*p4d_k))
+               goto bad_page_fault;
+
+       pud = pud_offset(p4d, address);
+       pud_k = pud_offset(p4d_k, address);
+       if (!pud_present(*pud) || !pud_present(*pud_k))
+               goto bad_page_fault;
+
+       pmd = pmd_offset(pud, address);
+       pmd_k = pmd_offset(pud_k, address);
+       if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
+               goto bad_page_fault;
+
+       pmd_val(*pmd) = pmd_val(*pmd_k);
+       pte_k = pte_offset_kernel(pmd_k, address);
+
+       if (!pte_present(*pte_k))
+               goto bad_page_fault;
+       return;
+
+bad_page_fault:
+       bad_page_fault(regs, address, SIGKILL);
+#else
+       WARN_ONCE(1, "%s in noMMU configuration\n", __func__);
+#endif
+}
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -49,8 +101,10 @@ void do_page_fault(struct pt_regs *regs)
        /* We fault-in kernel-space virtual memory on-demand. The
         * 'reference' page table is init_mm.pgd.
         */
-       if (address >= TASK_SIZE && !user_mode(regs))
-               goto vmalloc_fault;
+       if (address >= TASK_SIZE && !user_mode(regs)) {
+               vmalloc_fault(regs, address);
+               return;
+       }
 
        /* If we're in an interrupt or have no user
         * context, we must not take the fault..
@@ -114,7 +168,7 @@ good_area:
 
        if (fault_signal_pending(fault, regs)) {
                if (!user_mode(regs))
-                       goto bad_page_fault;
+                       bad_page_fault(regs, address, SIGKILL);
                return;
        }
 
@@ -181,56 +235,6 @@ do_sigbus:
        if (!user_mode(regs))
                bad_page_fault(regs, address, SIGBUS);
        return;
-
-vmalloc_fault:
-       {
-               /* Synchronize this task's top level page-table
-                * with the 'reference' page table.
-                */
-               struct mm_struct *act_mm = current->active_mm;
-               int index = pgd_index(address);
-               pgd_t *pgd, *pgd_k;
-               p4d_t *p4d, *p4d_k;
-               pud_t *pud, *pud_k;
-               pmd_t *pmd, *pmd_k;
-               pte_t *pte_k;
-
-               if (act_mm == NULL)
-                       goto bad_page_fault;
-
-               pgd = act_mm->pgd + index;
-               pgd_k = init_mm.pgd + index;
-
-               if (!pgd_present(*pgd_k))
-                       goto bad_page_fault;
-
-               pgd_val(*pgd) = pgd_val(*pgd_k);
-
-               p4d = p4d_offset(pgd, address);
-               p4d_k = p4d_offset(pgd_k, address);
-               if (!p4d_present(*p4d) || !p4d_present(*p4d_k))
-                       goto bad_page_fault;
-
-               pud = pud_offset(p4d, address);
-               pud_k = pud_offset(p4d_k, address);
-               if (!pud_present(*pud) || !pud_present(*pud_k))
-                       goto bad_page_fault;
-
-               pmd = pmd_offset(pud, address);
-               pmd_k = pmd_offset(pud_k, address);
-               if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
-                       goto bad_page_fault;
-
-               pmd_val(*pmd) = pmd_val(*pmd_k);
-               pte_k = pte_offset_kernel(pmd_k, address);
-
-               if (!pte_present(*pte_k))
-                       goto bad_page_fault;
-               return;
-       }
-bad_page_fault:
-       bad_page_fault(regs, address, SIGKILL);
-       return;
 }
 
 
index 38acda4f04e85d5db3d91bb0a0f97819bf6dc7a9..92e158c69c103353e1d9a17105876cbc6e67f8f3 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/initialize_mmu.h>
 #include <asm/io.h>
 
+DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
+
 #if defined(CONFIG_HIGHMEM)
 static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
 {
index be3aaaad8bee0ee61c8756aa7c25dae043e09f28..fd84d489175897a07f1f09ee7bc261989bc45f02 100644 (file)
@@ -38,9 +38,6 @@
 #define ISS_NET_TIMER_VALUE (HZ / 10)
 
 
-static DEFINE_SPINLOCK(opened_lock);
-static LIST_HEAD(opened);
-
 static DEFINE_SPINLOCK(devices_lock);
 static LIST_HEAD(devices);
 
@@ -59,17 +56,27 @@ struct tuntap_info {
 /* ------------------------------------------------------------------------- */
 
 
+struct iss_net_private;
+
+struct iss_net_ops {
+       int (*open)(struct iss_net_private *lp);
+       void (*close)(struct iss_net_private *lp);
+       int (*read)(struct iss_net_private *lp, struct sk_buff **skb);
+       int (*write)(struct iss_net_private *lp, struct sk_buff **skb);
+       unsigned short (*protocol)(struct sk_buff *skb);
+       int (*poll)(struct iss_net_private *lp);
+};
+
 /* This structure contains out private information for the driver. */
 
 struct iss_net_private {
        struct list_head device_list;
-       struct list_head opened_list;
 
        spinlock_t lock;
        struct net_device *dev;
        struct platform_device pdev;
        struct timer_list tl;
-       struct net_device_stats stats;
+       struct rtnl_link_stats64 stats;
 
        struct timer_list timer;
        unsigned int timer_val;
@@ -82,12 +89,7 @@ struct iss_net_private {
                        struct tuntap_info tuntap;
                } info;
 
-               int (*open)(struct iss_net_private *lp);
-               void (*close)(struct iss_net_private *lp);
-               int (*read)(struct iss_net_private *lp, struct sk_buff **skb);
-               int (*write)(struct iss_net_private *lp, struct sk_buff **skb);
-               unsigned short (*protocol)(struct sk_buff *skb);
-               int (*poll)(struct iss_net_private *lp);
+               const struct iss_net_ops *net_ops;
        } tp;
 
 };
@@ -215,6 +217,15 @@ static int tuntap_poll(struct iss_net_private *lp)
        return simc_poll(lp->tp.info.tuntap.fd);
 }
 
+static const struct iss_net_ops tuntap_ops = {
+       .open           = tuntap_open,
+       .close          = tuntap_close,
+       .read           = tuntap_read,
+       .write          = tuntap_write,
+       .protocol       = tuntap_protocol,
+       .poll           = tuntap_poll,
+};
+
 /*
  * ethX=tuntap,[mac address],device name
  */
@@ -257,13 +268,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init)
        lp->mtu = TRANSPORT_TUNTAP_MTU;
 
        lp->tp.info.tuntap.fd = -1;
-
-       lp->tp.open = tuntap_open;
-       lp->tp.close = tuntap_close;
-       lp->tp.read = tuntap_read;
-       lp->tp.write = tuntap_write;
-       lp->tp.protocol = tuntap_protocol;
-       lp->tp.poll = tuntap_poll;
+       lp->tp.net_ops = &tuntap_ops;
 
        return 1;
 }
@@ -278,14 +283,16 @@ static int iss_net_rx(struct net_device *dev)
 
        /* Check if there is any new data. */
 
-       if (lp->tp.poll(lp) == 0)
+       if (lp->tp.net_ops->poll(lp) == 0)
                return 0;
 
        /* Try to allocate memory, if it fails, try again next round. */
 
        skb = dev_alloc_skb(dev->mtu + 2 + ETH_HEADER_OTHER);
        if (skb == NULL) {
+               spin_lock_bh(&lp->lock);
                lp->stats.rx_dropped++;
+               spin_unlock_bh(&lp->lock);
                return 0;
        }
 
@@ -295,15 +302,17 @@ static int iss_net_rx(struct net_device *dev)
 
        skb->dev = dev;
        skb_reset_mac_header(skb);
-       pkt_len = lp->tp.read(lp, &skb);
+       pkt_len = lp->tp.net_ops->read(lp, &skb);
        skb_put(skb, pkt_len);
 
        if (pkt_len > 0) {
                skb_trim(skb, pkt_len);
-               skb->protocol = lp->tp.protocol(skb);
+               skb->protocol = lp->tp.net_ops->protocol(skb);
 
+               spin_lock_bh(&lp->lock);
                lp->stats.rx_bytes += skb->len;
                lp->stats.rx_packets++;
+               spin_unlock_bh(&lp->lock);
                netif_rx(skb);
                return pkt_len;
        }
@@ -311,38 +320,24 @@ static int iss_net_rx(struct net_device *dev)
        return pkt_len;
 }
 
-static int iss_net_poll(void)
+static int iss_net_poll(struct iss_net_private *lp)
 {
-       struct list_head *ele;
        int err, ret = 0;
 
-       spin_lock(&opened_lock);
-
-       list_for_each(ele, &opened) {
-               struct iss_net_private *lp;
-
-               lp = list_entry(ele, struct iss_net_private, opened_list);
-
-               if (!netif_running(lp->dev))
-                       break;
-
-               spin_lock(&lp->lock);
-
-               while ((err = iss_net_rx(lp->dev)) > 0)
-                       ret++;
+       if (!netif_running(lp->dev))
+               return 0;
 
-               spin_unlock(&lp->lock);
+       while ((err = iss_net_rx(lp->dev)) > 0)
+               ret++;
 
-               if (err < 0) {
-                       pr_err("Device '%s' read returned %d, shutting it down\n",
-                              lp->dev->name, err);
-                       dev_close(lp->dev);
-               } else {
-                       /* FIXME reactivate_fd(lp->fd, ISS_ETH_IRQ); */
-               }
+       if (err < 0) {
+               pr_err("Device '%s' read returned %d, shutting it down\n",
+                      lp->dev->name, err);
+               dev_close(lp->dev);
+       } else {
+               /* FIXME reactivate_fd(lp->fd, ISS_ETH_IRQ); */
        }
 
-       spin_unlock(&opened_lock);
        return ret;
 }
 
@@ -351,10 +346,8 @@ static void iss_net_timer(struct timer_list *t)
 {
        struct iss_net_private *lp = from_timer(lp, t, timer);
 
-       iss_net_poll();
-       spin_lock(&lp->lock);
+       iss_net_poll(lp);
        mod_timer(&lp->timer, jiffies + lp->timer_val);
-       spin_unlock(&lp->lock);
 }
 
 
@@ -363,11 +356,9 @@ static int iss_net_open(struct net_device *dev)
        struct iss_net_private *lp = netdev_priv(dev);
        int err;
 
-       spin_lock_bh(&lp->lock);
-
-       err = lp->tp.open(lp);
+       err = lp->tp.net_ops->open(lp);
        if (err < 0)
-               goto out;
+               return err;
 
        netif_start_queue(dev);
 
@@ -378,36 +369,21 @@ static int iss_net_open(struct net_device *dev)
        while ((err = iss_net_rx(dev)) > 0)
                ;
 
-       spin_unlock_bh(&lp->lock);
-       spin_lock_bh(&opened_lock);
-       list_add(&lp->opened_list, &opened);
-       spin_unlock_bh(&opened_lock);
-       spin_lock_bh(&lp->lock);
-
        timer_setup(&lp->timer, iss_net_timer, 0);
        lp->timer_val = ISS_NET_TIMER_VALUE;
        mod_timer(&lp->timer, jiffies + lp->timer_val);
 
-out:
-       spin_unlock_bh(&lp->lock);
        return err;
 }
 
 static int iss_net_close(struct net_device *dev)
 {
        struct iss_net_private *lp = netdev_priv(dev);
-       netif_stop_queue(dev);
-       spin_lock_bh(&lp->lock);
-
-       spin_lock(&opened_lock);
-       list_del(&opened);
-       spin_unlock(&opened_lock);
 
+       netif_stop_queue(dev);
        del_timer_sync(&lp->timer);
+       lp->tp.net_ops->close(lp);
 
-       lp->tp.close(lp);
-
-       spin_unlock_bh(&lp->lock);
        return 0;
 }
 
@@ -417,13 +393,14 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
        int len;
 
        netif_stop_queue(dev);
-       spin_lock_bh(&lp->lock);
 
-       len = lp->tp.write(lp, &skb);
+       len = lp->tp.net_ops->write(lp, &skb);
 
        if (len == skb->len) {
+               spin_lock_bh(&lp->lock);
                lp->stats.tx_packets++;
                lp->stats.tx_bytes += skb->len;
+               spin_unlock_bh(&lp->lock);
                netif_trans_update(dev);
                netif_start_queue(dev);
 
@@ -432,24 +409,29 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        } else if (len == 0) {
                netif_start_queue(dev);
+               spin_lock_bh(&lp->lock);
                lp->stats.tx_dropped++;
+               spin_unlock_bh(&lp->lock);
 
        } else {
                netif_start_queue(dev);
                pr_err("%s: %s failed(%d)\n", dev->name, __func__, len);
        }
 
-       spin_unlock_bh(&lp->lock);
 
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
 }
 
 
-static struct net_device_stats *iss_net_get_stats(struct net_device *dev)
+static void iss_net_get_stats64(struct net_device *dev,
+                               struct rtnl_link_stats64 *stats)
 {
        struct iss_net_private *lp = netdev_priv(dev);
-       return &lp->stats;
+
+       spin_lock_bh(&lp->lock);
+       *stats = lp->stats;
+       spin_unlock_bh(&lp->lock);
 }
 
 static void iss_net_set_multicast_list(struct net_device *dev)
@@ -460,19 +442,6 @@ static void iss_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 }
 
-static int iss_net_set_mac(struct net_device *dev, void *addr)
-{
-       struct iss_net_private *lp = netdev_priv(dev);
-       struct sockaddr *hwaddr = addr;
-
-       if (!is_valid_ether_addr(hwaddr->sa_data))
-               return -EADDRNOTAVAIL;
-       spin_lock_bh(&lp->lock);
-       eth_hw_addr_set(dev, hwaddr->sa_data);
-       spin_unlock_bh(&lp->lock);
-       return 0;
-}
-
 static int iss_net_change_mtu(struct net_device *dev, int new_mtu)
 {
        return -EINVAL;
@@ -494,11 +463,11 @@ static int driver_registered;
 static const struct net_device_ops iss_netdev_ops = {
        .ndo_open               = iss_net_open,
        .ndo_stop               = iss_net_close,
-       .ndo_get_stats          = iss_net_get_stats,
+       .ndo_get_stats64        = iss_net_get_stats64,
        .ndo_start_xmit         = iss_net_start_xmit,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_change_mtu         = iss_net_change_mtu,
-       .ndo_set_mac_address    = iss_net_set_mac,
+       .ndo_set_mac_address    = eth_mac_addr,
        .ndo_tx_timeout         = iss_net_tx_timeout,
        .ndo_set_rx_mode        = iss_net_set_multicast_list,
 };
@@ -520,7 +489,6 @@ static int iss_net_configure(int index, char *init)
        lp = netdev_priv(dev);
        *lp = (struct iss_net_private) {
                .device_list            = LIST_HEAD_INIT(lp->device_list),
-               .opened_list            = LIST_HEAD_INIT(lp->opened_list),
                .dev                    = dev,
                .index                  = index,
        };
index 0f0e0724397f4224ac8cce3ceceae7a52b217c28..4255b92fa3eb0b9e8dd03be87476f96624601771 100644 (file)
@@ -211,12 +211,18 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
        struct simdisk *dev = pde_data(file_inode(file));
        const char *s = dev->filename;
        if (s) {
-               ssize_t n = simple_read_from_buffer(buf, size, ppos,
-                                                       s, strlen(s));
-               if (n < 0)
-                       return n;
-               buf += n;
-               size -= n;
+               ssize_t len = strlen(s);
+               char *temp = kmalloc(len + 2, GFP_KERNEL);
+
+               if (!temp)
+                       return -ENOMEM;
+
+               len = scnprintf(temp, len + 2, "%s\n", s);
+               len = simple_read_from_buffer(buf, size, ppos,
+                                             temp, len);
+
+               kfree(temp);
+               return len;
        }
        return simple_read_from_buffer(buf, size, ppos, "\n", 1);
 }
index 145d129be76fa9b54a5d22198ea6f5f41e4fddb0..0dc22c3716148713596d8c2595256dc7642719f4 100644 (file)
@@ -78,7 +78,7 @@ void __init platform_init(bp_tag_t *first)
 
 void platform_heartbeat(void)
 {
-       static int i=0, t = 0;
+       static int i, t;
 
        if (--t < 0)
        {
index 3950ecbc5c263b0bc93575383d0c388ef6d2edde..4e01bb71ad6e07b7176caf277724a35b54fa4056 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)   += bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)       += blk-cgroup.o
 obj-$(CONFIG_BLK_CGROUP_RWSTAT)        += blk-cgroup-rwstat.o
+obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)       += blk-throttle.o
 obj-$(CONFIG_BLK_CGROUP_IOPRIO)        += blk-ioprio.o
 obj-$(CONFIG_BLK_CGROUP_IOLATENCY)     += blk-iolatency.o
index d39056630d9c1de07d3923daeb8b80e6ab6a086e..3afb550c0f7b782ad9bc95be4a50e111154994ac 100644 (file)
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
                s >>= bb->shift;
                target += (1<<bb->shift) - 1;
                target >>= bb->shift;
-               sectors = target - s;
        }
        /* 'target' is now the first block after the bad range */
 
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
                s += (1<<bb->shift) - 1;
                s >>= bb->shift;
                target >>= bb->shift;
-               sectors = target - s;
        }
 
        write_seqlock_irq(&bb->lock);
index 13de871fa816993a946360e1f97610652e0ceac1..7bf88e591aaf3ba40d60fa9d68443785f2e95730 100644 (file)
@@ -673,17 +673,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
                }
        }
 
-       if (!bdev->bd_openers)
+       if (!atomic_read(&bdev->bd_openers))
                set_init_blocksize(bdev);
        if (test_bit(GD_NEED_PART_SCAN, &disk->state))
                bdev_disk_changed(disk, false);
-       bdev->bd_openers++;
+       atomic_inc(&bdev->bd_openers);
        return 0;
 }
 
 static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
 {
-       if (!--bdev->bd_openers)
+       if (atomic_dec_and_test(&bdev->bd_openers))
                blkdev_flush_mapping(bdev);
        if (bdev->bd_disk->fops->release)
                bdev->bd_disk->fops->release(bdev->bd_disk, mode);
@@ -694,7 +694,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
        struct gendisk *disk = part->bd_disk;
        int ret;
 
-       if (part->bd_openers)
+       if (atomic_read(&part->bd_openers))
                goto done;
 
        ret = blkdev_get_whole(bdev_whole(part), mode);
@@ -708,7 +708,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
        disk->open_partitions++;
        set_init_blocksize(part);
 done:
-       part->bd_openers++;
+       atomic_inc(&part->bd_openers);
        return 0;
 
 out_blkdev_put:
@@ -720,7 +720,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
 {
        struct block_device *whole = bdev_whole(part);
 
-       if (--part->bd_openers)
+       if (!atomic_dec_and_test(&part->bd_openers))
                return;
        blkdev_flush_mapping(part);
        whole->bd_disk->open_partitions--;
@@ -899,7 +899,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
         * of the world and we want to avoid long (could be several minute)
         * syncs while holding the mutex.
         */
-       if (bdev->bd_openers == 1)
+       if (atomic_read(&bdev->bd_openers) == 1)
                sync_blockdev(bdev);
 
        mutex_lock(&disk->open_mutex);
@@ -1044,7 +1044,7 @@ void sync_bdevs(bool wait)
                bdev = I_BDEV(inode);
 
                mutex_lock(&bdev->bd_disk->open_mutex);
-               if (!bdev->bd_openers) {
+               if (!atomic_read(&bdev->bd_openers)) {
                        ; /* skip */
                } else if (wait) {
                        /*
index 420eda2589c0ec99ea4647233fdd85a32b6b63e8..09574af835662ff1bf998fbd6dcf0df6b53c5b68 100644 (file)
@@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
                                   */
        bfqg->bfqd = bfqd;
        bfqg->active_entities = 0;
+       bfqg->online = true;
        bfqg->rq_pos_tree = RB_ROOT;
 }
 
@@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
        entity->sched_data = &parent->sched_data;
 }
 
-static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
-                                        struct blkcg *blkcg)
+static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
 {
-       struct blkcg_gq *blkg;
-
-       blkg = blkg_lookup(blkcg, bfqd->queue);
-       if (likely(blkg))
-               return blkg_to_bfqg(blkg);
-       return NULL;
-}
-
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
-                                    struct blkcg *blkcg)
-{
-       struct bfq_group *bfqg, *parent;
+       struct bfq_group *parent;
        struct bfq_entity *entity;
 
-       bfqg = bfq_lookup_bfqg(bfqd, blkcg);
-
-       if (unlikely(!bfqg))
-               return NULL;
-
        /*
         * Update chain of bfq_groups as we might be handling a leaf group
         * which, along with some of its relatives, has not been hooked yet
@@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
                        bfq_group_set_parent(curr_bfqg, parent);
                }
        }
+}
 
-       return bfqg;
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
+{
+       struct blkcg_gq *blkg = bio->bi_blkg;
+       struct bfq_group *bfqg;
+
+       while (blkg) {
+               bfqg = blkg_to_bfqg(blkg);
+               if (bfqg->online) {
+                       bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
+                       return bfqg;
+               }
+               blkg = blkg->parent;
+       }
+       bio_associate_blkg_from_css(bio,
+                               &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
+       return bfqd->root_group;
 }
 
 /**
@@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  * Move bic to blkcg, assuming that bfqd->lock is held; which makes
  * sure that the reference to cgroup is valid across the call (see
  * comments in bfq_bic_update_cgroup on this issue)
- *
- * NOTE: an alternative approach might have been to store the current
- * cgroup in bfqq and getting a reference to it, reducing the lookup
- * time here, at the price of slightly more complex code.
  */
-static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
-                                               struct bfq_io_cq *bic,
-                                               struct blkcg *blkcg)
+static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+                                    struct bfq_io_cq *bic,
+                                    struct bfq_group *bfqg)
 {
        struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
        struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
-       struct bfq_group *bfqg;
        struct bfq_entity *entity;
 
-       bfqg = bfq_find_set_group(bfqd, blkcg);
-
-       if (unlikely(!bfqg))
-               bfqg = bfqd->root_group;
-
        if (async_bfqq) {
                entity = &async_bfqq->entity;
 
@@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
        }
 
        if (sync_bfqq) {
-               entity = &sync_bfqq->entity;
-               if (entity->sched_data != &bfqg->sched_data)
-                       bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+               if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+                       /* We are the only user of this bfqq, just move it */
+                       if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+                               bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+               } else {
+                       struct bfq_queue *bfqq;
+
+                       /*
+                        * The queue was merged to a different queue. Check
+                        * that the merge chain still belongs to the same
+                        * cgroup.
+                        */
+                       for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+                               if (bfqq->entity.sched_data !=
+                                   &bfqg->sched_data)
+                                       break;
+                       if (bfqq) {
+                               /*
+                                * Some queue changed cgroup so the merge is
+                                * not valid anymore. We cannot easily just
+                                * cancel the merge (by clearing new_bfqq) as
+                                * there may be other processes using this
+                                * queue and holding refs to all queues below
+                                * sync_bfqq->new_bfqq. Similarly if the merge
+                                * already happened, we need to detach from
+                                * bfqq now so that we cannot merge bio to a
+                                * request from the old cgroup.
+                                */
+                               bfq_put_cooperator(sync_bfqq);
+                               bfq_release_process_ref(bfqd, sync_bfqq);
+                               bic_set_bfqq(bic, NULL, 1);
+                       }
+               }
        }
 
        return bfqg;
@@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
 void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 {
        struct bfq_data *bfqd = bic_to_bfqd(bic);
-       struct bfq_group *bfqg = NULL;
+       struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
        uint64_t serial_nr;
 
-       rcu_read_lock();
-       serial_nr = __bio_blkcg(bio)->css.serial_nr;
+       serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
 
        /*
         * Check whether blkcg has changed.  The condition may trigger
         * spuriously on a newly created cic but there's no harm.
         */
        if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
-               goto out;
+               return;
 
-       bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+       /*
+        * New cgroup for this process. Make sure it is linked to bfq internal
+        * cgroup hierarchy.
+        */
+       bfq_link_bfqg(bfqd, bfqg);
+       __bfq_bic_change_cgroup(bfqd, bic, bfqg);
        /*
         * Update blkg_path for bfq_log_* functions. We cache this
         * path, and update it here, for the following
@@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
         */
        blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
        bic->blkcg_serial_nr = serial_nr;
-out:
-       rcu_read_unlock();
 }
 
 /**
@@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
 
 put_async_queues:
        bfq_put_async_queues(bfqd, bfqg);
+       bfqg->online = false;
 
        spin_unlock_irqrestore(&bfqd->lock, flags);
        /*
@@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
        bfq_end_wr_async_queues(bfqd, bfqd->root_group);
 }
 
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
 {
        return bfqd->root_group;
 }
index 1f62dbdc521ff4b3c7e389ddc945f66348c13b77..0d46cb728bbfab177eaf505b2760637ec2e27d97 100644 (file)
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
  */
 static const unsigned long bfq_late_stable_merging = 600;
 
-#define RQ_BIC(rq)             icq_to_bic((rq)->elv.priv[0])
+#define RQ_BIC(rq)             ((struct bfq_io_cq *)((rq)->elv.priv[0]))
 #define RQ_BFQQ(rq)            ((rq)->elv.priv[1])
 
 struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
  */
 void bfq_schedule_dispatch(struct bfq_data *bfqd)
 {
+       lockdep_assert_held(&bfqd->lock);
+
        if (bfqd->queued != 0) {
                bfq_log(bfqd, "schedule dispatch");
                blk_mq_run_hw_queues(bfqd->queue, true);
@@ -2133,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
        if (!bfqd->last_completed_rq_bfqq ||
            bfqd->last_completed_rq_bfqq == bfqq ||
            bfq_bfqq_has_short_ttime(bfqq) ||
-           bfqq->dispatched > 0 ||
-           now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
-           bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+           now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
                return;
 
        /*
@@ -2208,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
 
        bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
        bfqq->queued[rq_is_sync(rq)]++;
-       bfqd->queued++;
+       /*
+        * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+        * may be read without holding the lock in bfq_has_work().
+        */
+       WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
 
-       if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+       if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
                bfq_check_waker(bfqd, bfqq, now_ns);
 
                /*
@@ -2400,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
        if (rq->queuelist.prev != &rq->queuelist)
                list_del_init(&rq->queuelist);
        bfqq->queued[sync]--;
-       bfqd->queued--;
+       /*
+        * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+        * may be read without holding the lock in bfq_has_work().
+        */
+       WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
        elv_rb_del(&bfqq->sort_list, rq);
 
        elv_rqhash_del(q, rq);
@@ -2463,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
 
        spin_lock_irq(&bfqd->lock);
 
-       if (bic)
+       if (bic) {
+               /*
+                * Make sure cgroup info is uptodate for current process before
+                * considering the merge.
+                */
+               bfq_bic_update_cgroup(bic, bio);
+
                bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
-       else
+       } else {
                bfqd->bio_bfqq = NULL;
+       }
        bfqd->bio_bic = bic;
 
        ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
@@ -2496,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
        return ELEVATOR_NO_MERGE;
 }
 
-static struct bfq_queue *bfq_init_rq(struct request *rq);
-
 static void bfq_request_merged(struct request_queue *q, struct request *req,
                               enum elv_merge type)
 {
@@ -2506,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
            blk_rq_pos(req) <
            blk_rq_pos(container_of(rb_prev(&req->rb_node),
                                    struct request, rb_node))) {
-               struct bfq_queue *bfqq = bfq_init_rq(req);
+               struct bfq_queue *bfqq = RQ_BFQQ(req);
                struct bfq_data *bfqd;
                struct request *prev, *next_rq;
 
@@ -2558,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
 static void bfq_requests_merged(struct request_queue *q, struct request *rq,
                                struct request *next)
 {
-       struct bfq_queue *bfqq = bfq_init_rq(rq),
-               *next_bfqq = bfq_init_rq(next);
+       struct bfq_queue *bfqq = RQ_BFQQ(rq),
+               *next_bfqq = RQ_BFQQ(next);
 
        if (!bfqq)
                goto remove;
@@ -2764,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
        if (process_refs == 0 || new_process_refs == 0)
                return NULL;
 
+       /*
+        * Make sure merged queues belong to the same parent. Parents could
+        * have changed since the time we decided the two queues are suitable
+        * for merging.
+        */
+       if (new_bfqq->entity.parent != bfqq->entity.parent)
+               return NULL;
+
        bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
                new_bfqq->pid);
 
@@ -2901,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                                struct bfq_queue *new_bfqq =
                                        bfq_setup_merge(bfqq, stable_merge_bfqq);
 
-                               bic->stably_merged = true;
-                               if (new_bfqq && new_bfqq->bic)
-                                       new_bfqq->bic->stably_merged = true;
+                               if (new_bfqq) {
+                                       bic->stably_merged = true;
+                                       if (new_bfqq->bic)
+                                               new_bfqq->bic->stably_merged =
+                                                                       true;
+                               }
                                return new_bfqq;
                        } else
                                return NULL;
@@ -5045,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
        struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
 
        /*
-        * Avoiding lock: a race on bfqd->busy_queues should cause at
+        * Avoiding lock: a race on bfqd->queued should cause at
         * most a call to dispatch for nothing
         */
        return !list_empty_careful(&bfqd->dispatch) ||
-               bfq_tot_busy_queues(bfqd) > 0;
+               READ_ONCE(bfqd->queued);
 }
 
 static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -5310,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
        bfq_put_queue(bfqq);
 }
 
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
 {
        struct bfq_queue *__bfqq, *next;
 
@@ -5716,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
        struct bfq_queue *bfqq;
        struct bfq_group *bfqg;
 
-       rcu_read_lock();
-
-       bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
-       if (!bfqg) {
-               bfqq = &bfqd->oom_bfqq;
-               goto out;
-       }
-
+       bfqg = bfq_bio_bfqg(bfqd, bio);
        if (!is_sync) {
                async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
                                                  ioprio);
@@ -5769,8 +5786,6 @@ out:
 
        if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
                bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
-
-       rcu_read_unlock();
        return bfqq;
 }
 
@@ -6117,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
                                           unsigned int cmd_flags) {}
 #endif /* CONFIG_BFQ_CGROUP_DEBUG */
 
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                               bool at_head)
 {
@@ -6132,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                bfqg_stats_update_legacy_io(q, rq);
 #endif
        spin_lock_irq(&bfqd->lock);
+       bfqq = bfq_init_rq(rq);
        if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
                spin_unlock_irq(&bfqd->lock);
                blk_mq_free_requests(&free);
                return;
        }
 
-       spin_unlock_irq(&bfqd->lock);
-
        trace_block_rq_insert(rq);
 
-       spin_lock_irq(&bfqd->lock);
-       bfqq = bfq_init_rq(rq);
        if (!bfqq || at_head) {
                if (at_head)
                        list_add(&rq->queuelist, &bfqd->dispatch);
@@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
                bfq_schedule_dispatch(bfqd);
 }
 
-static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
-{
-       bfqq_request_freed(bfqq);
-       bfq_put_queue(bfqq);
-}
-
 /*
  * The processes associated with bfqq may happen to generate their
  * cumulative I/O at a lower rate than the rate at which the device
@@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
 
                bfq_completed_request(bfqq, bfqd);
        }
-       bfq_finish_requeue_request_body(bfqq);
+       bfqq_request_freed(bfqq);
+       bfq_put_queue(bfqq);
+       RQ_BIC(rq)->requests--;
        spin_unlock_irqrestore(&bfqd->lock, flags);
 
        /*
@@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
 
        bfqq_request_allocated(bfqq);
        bfqq->ref++;
+       bic->requests++;
        bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
                     rq, bfqq, bfqq->ref);
 
@@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
        bfq_bfqq_expire(bfqd, bfqq, true, reason);
 
 schedule_dispatch:
-       spin_unlock_irqrestore(&bfqd->lock, flags);
        bfq_schedule_dispatch(bfqd);
+       spin_unlock_irqrestore(&bfqd->lock, flags);
 }
 
 /*
index 3b83e3d1c2e5879af814292ba3d095f1ef3f926b..ca8177d7bf7c685c16b6aa3314d6c4d2155b2e46 100644 (file)
@@ -468,6 +468,7 @@ struct bfq_io_cq {
        struct bfq_queue *stable_merge_bfqq;
 
        bool stably_merged;     /* non splittable if true */
+       unsigned int requests;  /* Number of requests this process has in flight */
 };
 
 /**
@@ -928,6 +929,8 @@ struct bfq_group {
 
        /* reference counter (see comments in bfq_bic_update_cgroup) */
        int ref;
+       /* Is bfq_group still online? */
+       bool online;
 
        struct bfq_entity entity;
        struct bfq_sched_data sched_data;
@@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
 void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                     bool compensate, enum bfqq_expiration reason);
 void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
 void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
 void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
 void bfq_schedule_dispatch(struct bfq_data *bfqd);
@@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
 void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
 void bfq_end_wr_async(struct bfq_data *bfqd);
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
-                                    struct blkcg *blkcg);
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
 struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
 struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
@@ -1100,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
                break;                                                  \
        bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH);           \
        blk_add_cgroup_trace_msg((bfqd)->queue,                         \
-                       bfqg_to_blkg(bfqq_group(bfqq))->blkcg,          \
+                       &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css,    \
                        "%s " fmt, pid_str, ##args);                    \
 } while (0)
 
 #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {                    \
        blk_add_cgroup_trace_msg((bfqd)->queue,                         \
-               bfqg_to_blkg(bfqg)->blkcg, fmt, ##args);                \
+               &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args);          \
 } while (0)
 
 #else /* CONFIG_BFQ_GROUP_IOSCHED */
index 4259125e16ab24075d902f989f424f8b046c9bee..a3893d80dccc9a45443df45eebf707759d070e1e 100644 (file)
@@ -224,24 +224,13 @@ EXPORT_SYMBOL(bio_uninit);
 static void bio_free(struct bio *bio)
 {
        struct bio_set *bs = bio->bi_pool;
-       void *p;
-
-       bio_uninit(bio);
+       void *p = bio;
 
-       if (bs) {
-               bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+       WARN_ON_ONCE(!bs);
 
-               /*
-                * If we have front padding, adjust the bio pointer before freeing
-                */
-               p = bio;
-               p -= bs->front_pad;
-
-               mempool_free(p, &bs->bio_pool);
-       } else {
-               /* Bio was allocated by bio_kmalloc() */
-               kfree(bio);
-       }
+       bio_uninit(bio);
+       bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+       mempool_free(p - bs->front_pad, &bs->bio_pool);
 }
 
 /*
@@ -419,6 +408,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
        queue_work(bs->rescue_workqueue, &bs->rescue_work);
 }
 
+static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
+               unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
+               struct bio_set *bs)
+{
+       struct bio_alloc_cache *cache;
+       struct bio *bio;
+
+       cache = per_cpu_ptr(bs->cache, get_cpu());
+       if (!cache->free_list) {
+               put_cpu();
+               return NULL;
+       }
+       bio = cache->free_list;
+       cache->free_list = bio->bi_next;
+       cache->nr--;
+       put_cpu();
+
+       bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
+       bio->bi_pool = bs;
+       return bio;
+}
+
 /**
  * bio_alloc_bioset - allocate a bio for I/O
  * @bdev:      block device to allocate the bio for (can be %NULL)
@@ -451,6 +462,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
  * for per bio allocations.
  *
+ * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
+ * context, not hard/soft IRQ.
+ *
  * Returns: Pointer to new bio on success, NULL on failure.
  */
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
@@ -465,6 +479,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
        if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
                return NULL;
 
+       if (opf & REQ_ALLOC_CACHE) {
+               if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
+                       bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
+                                                    gfp_mask, bs);
+                       if (bio)
+                               return bio;
+                       /*
+                        * No cached bio available, bio returned below marked with
+                        * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
+                        */
+               } else {
+                       opf &= ~REQ_ALLOC_CACHE;
+               }
+       }
+
        /*
         * submit_bio_noacct() converts recursion to iteration; this means if
         * we're running beneath it, any bios we allocate and submit will not be
@@ -528,28 +557,28 @@ err_free:
 EXPORT_SYMBOL(bio_alloc_bioset);
 
 /**
- * bio_kmalloc - kmalloc a bio for I/O
+ * bio_kmalloc - kmalloc a bio
+ * @nr_vecs:   number of bio_vecs to allocate
  * @gfp_mask:   the GFP_* mask given to the slab allocator
- * @nr_iovecs: number of iovecs to pre-allocate
  *
- * Use kmalloc to allocate and initialize a bio.
+ * Use kmalloc to allocate a bio (including bvecs).  The bio must be initialized
+ * using bio_init() before use.  To free a bio returned from this function use
+ * kfree() after calling bio_uninit().  A bio returned from this function can
+ * be reused by calling bio_uninit() before calling bio_init() again.
+ *
+ * Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this
+ * function are not backed by a mempool can can fail.  Do not use this function
+ * for allocations in the file system I/O path.
  *
  * Returns: Pointer to new bio on success, NULL on failure.
  */
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
 {
        struct bio *bio;
 
-       if (nr_iovecs > UIO_MAXIOV)
-               return NULL;
-
-       bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
-       if (unlikely(!bio))
+       if (nr_vecs > UIO_MAXIOV)
                return NULL;
-       bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
-                0);
-       bio->bi_pool = NULL;
-       return bio;
+       return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
 }
 EXPORT_SYMBOL(bio_kmalloc);
 
@@ -711,7 +740,7 @@ void bio_put(struct bio *bio)
                        return;
        }
 
-       if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
+       if (bio->bi_opf & REQ_ALLOC_CACHE) {
                struct bio_alloc_cache *cache;
 
                bio_uninit(bio);
@@ -732,14 +761,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
        bio_set_flag(bio, BIO_CLONED);
        if (bio_flagged(bio_src, BIO_THROTTLED))
                bio_set_flag(bio, BIO_THROTTLED);
-       if (bio->bi_bdev == bio_src->bi_bdev &&
-           bio_flagged(bio_src, BIO_REMAPPED))
-               bio_set_flag(bio, BIO_REMAPPED);
        bio->bi_ioprio = bio_src->bi_ioprio;
        bio->bi_iter = bio_src->bi_iter;
 
-       bio_clone_blkg_association(bio, bio_src);
-       blkcg_bio_issue_init(bio);
+       if (bio->bi_bdev) {
+               if (bio->bi_bdev == bio_src->bi_bdev &&
+                   bio_flagged(bio_src, BIO_REMAPPED))
+                       bio_set_flag(bio, BIO_REMAPPED);
+               bio_clone_blkg_association(bio, bio_src);
+       }
 
        if (bio_crypt_clone(bio, bio_src, gfp) < 0)
                return -ENOMEM;
@@ -1727,55 +1757,13 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
                flags |= BIOSET_NEED_BVECS;
        if (src->rescue_workqueue)
                flags |= BIOSET_NEED_RESCUER;
+       if (src->cache)
+               flags |= BIOSET_PERCPU_CACHE;
 
        return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
 }
 EXPORT_SYMBOL(bioset_init_from_src);
 
-/**
- * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
- * @kiocb:     kiocb describing the IO
- * @bdev:      block device to allocate the bio for (can be %NULL)
- * @nr_vecs:   number of iovecs to pre-allocate
- * @opf:       operation and flags for bio
- * @bs:                bio_set to allocate from
- *
- * Description:
- *    Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
- *    used to check if we should dip into the per-cpu bio_set allocation
- *    cache. The allocation uses GFP_KERNEL internally. On return, the
- *    bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
- *    MUST be done from process context, not hard/soft IRQ.
- *
- */
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
-               unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
-{
-       struct bio_alloc_cache *cache;
-       struct bio *bio;
-
-       if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
-               return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
-
-       cache = per_cpu_ptr(bs->cache, get_cpu());
-       if (cache->free_list) {
-               bio = cache->free_list;
-               cache->free_list = bio->bi_next;
-               cache->nr--;
-               put_cpu();
-               bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
-                        nr_vecs, opf);
-               bio->bi_pool = bs;
-               bio_set_flag(bio, BIO_PERCPU_CACHE);
-               return bio;
-       }
-       put_cpu();
-       bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
-       bio_set_flag(bio, BIO_PERCPU_CACHE);
-       return bio;
-}
-EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
-
 static int __init init_bio(void)
 {
        int i;
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
new file mode 100644 (file)
index 0000000..760a2e1
--- /dev/null
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "blk-cgroup.h"
+
+/**
+ * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
+ * @app_id: application identifier
+ * @cgrp_id: cgroup id
+ * @app_id_len: size of application identifier
+ */
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
+{
+       struct cgroup *cgrp;
+       struct cgroup_subsys_state *css;
+       struct blkcg *blkcg;
+       int ret  = 0;
+
+       if (app_id_len > FC_APPID_LEN)
+               return -EINVAL;
+
+       cgrp = cgroup_get_from_id(cgrp_id);
+       if (!cgrp)
+               return -ENOENT;
+       css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+       if (!css) {
+               ret = -ENOENT;
+               goto out_cgrp_put;
+       }
+       blkcg = css_to_blkcg(css);
+       /*
+        * There is a slight race condition on setting the appid.
+        * Worst case an I/O may not find the right id.
+        * This is no different from the I/O we let pass while obtaining
+        * the vmid from the fabric.
+        * Adding the overhead of a lock is not necessary.
+        */
+       strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+       css_put(css);
+out_cgrp_put:
+       cgroup_put(cgrp);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
+
+/**
+ * blkcg_get_fc_appid - get the fc app identifier associated with a bio
+ * @bio: target bio
+ *
+ * On success return the fc_app_id, on failure return NULL
+ */
+char *blkcg_get_fc_appid(struct bio *bio)
+{
+       if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
+               return NULL;
+       return bio->bi_blkg->blkcg->fc_app_id;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
index 8dfe62786cd5fa5eae77afed283fcff238070835..40161a3f68d04abbed2facae6099108d93290603 100644 (file)
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
 
 #define BLKG_DESTROY_BATCH_SIZE  64
 
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static struct cgroup_subsys_state *blkcg_css(void)
+{
+       struct cgroup_subsys_state *css;
+
+       css = kthread_blkcg();
+       if (css)
+               return css;
+       return task_css(current, io_cgrp_id);
+}
+
 static bool blkcg_policy_enabled(struct request_queue *q,
                                 const struct blkcg_policy *pol)
 {
@@ -155,6 +172,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
                blk_finish_plug(&plug);
 }
 
+/**
+ * bio_blkcg_css - return the blkcg CSS associated with a bio
+ * @bio: target bio
+ *
+ * This returns the CSS for the blkcg associated with a bio, or %NULL if not
+ * associated. Callers are expected to either handle %NULL or know association
+ * has been done prior to calling this.
+ */
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
+{
+       if (!bio || !bio->bi_blkg)
+               return NULL;
+       return &bio->bi_blkg->blkcg->css;
+}
+EXPORT_SYMBOL_GPL(bio_blkcg_css);
+
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg.  Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+       return css_to_blkcg(blkcg->css.parent);
+}
+
 /**
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        struct blkcg_gq *blkg;
        int i, ret;
 
-       WARN_ON_ONCE(!rcu_read_lock_held());
        lockdep_assert_held(&q->queue_lock);
 
        /* request_queue is dying, do not create/recreate a blkg */
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
 {
        struct blkg_iostat_set *bis = &blkg->iostat;
        u64 rbytes, wbytes, rios, wios, dbytes, dios;
-       bool has_stats = false;
        const char *dname;
        unsigned seq;
        int i;
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
        } while (u64_stats_fetch_retry(&bis->sync, seq));
 
        if (rbytes || wbytes || rios || wios) {
-               has_stats = true;
                seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
                        rbytes, wbytes, rios, wios,
                        dbytes, dios);
        }
 
        if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
-               has_stats = true;
                seq_printf(s, " use_delay=%d delay_nsec=%llu",
                        atomic_read(&blkg->use_delay),
                        atomic64_read(&blkg->delay_nsec));
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
                if (!blkg->pd[i] || !pol->pd_stat_fn)
                        continue;
 
-               if (pol->pd_stat_fn(blkg->pd[i], s))
-                       has_stats = true;
+               pol->pd_stat_fn(blkg->pd[i], s);
        }
 
-       if (has_stats)
-               seq_printf(s, "\n");
+       seq_puts(s, "\n");
 }
 
 static int blkcg_print_stat(struct seq_file *sf, void *v)
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
        { }     /* terminate */
 };
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
+{
+       return &css_to_blkcg(css)->cgwb_list;
+}
+#endif
+
 /*
  * blkcg destruction is a three-stage process.
  *
@@ -1015,25 +1060,6 @@ static struct cftype blkcg_legacy_files[] = {
  *    This finally frees the blkcg.
  */
 
-/**
- * blkcg_css_offline - cgroup css_offline callback
- * @css: css of interest
- *
- * This function is called when @css is about to go away.  Here the cgwbs are
- * offlined first and only once writeback associated with the blkcg has
- * finished do we start step 2 (see above).
- */
-static void blkcg_css_offline(struct cgroup_subsys_state *css)
-{
-       struct blkcg *blkcg = css_to_blkcg(css);
-
-       /* this prevents anyone from attaching or migrating to this blkcg */
-       wb_blkcg_offline(blkcg);
-
-       /* put the base online pin allowing step 2 to be triggered */
-       blkcg_unpin_online(blkcg);
-}
-
 /**
  * blkcg_destroy_blkgs - responsible for shooting down blkgs
  * @blkcg: blkcg of interest
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
-void blkcg_destroy_blkgs(struct blkcg *blkcg)
+static void blkcg_destroy_blkgs(struct blkcg *blkcg)
 {
        might_sleep();
 
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
        spin_unlock_irq(&blkcg->lock);
 }
 
+/**
+ * blkcg_pin_online - pin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * While pinned, a blkcg is kept online.  This is primarily used to
+ * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
+ * while an associated cgwb is still active.
+ */
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
+{
+       refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
+}
+
+/**
+ * blkcg_unpin_online - unpin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * This is primarily used to impedance-match blkg and cgwb lifetimes so
+ * that blkg doesn't go offline while an associated cgwb is still active.
+ * When this count goes to zero, all active cgwbs have finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ */
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
+{
+       struct blkcg *blkcg = css_to_blkcg(blkcg_css);
+
+       do {
+               if (!refcount_dec_and_test(&blkcg->online_pin))
+                       break;
+               blkcg_destroy_blkgs(blkcg);
+               blkcg = blkcg_parent(blkcg);
+       } while (blkcg);
+}
+
+/**
+ * blkcg_css_offline - cgroup css_offline callback
+ * @css: css of interest
+ *
+ * This function is called when @css is about to go away.  Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
+ */
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
+{
+       /* this prevents anyone from attaching or migrating to this blkcg */
+       wb_blkcg_offline(css);
+
+       /* put the base online pin allowing step 2 to be triggered */
+       blkcg_unpin_online(css);
+}
+
 static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
        struct blkcg *blkcg = css_to_blkcg(css);
@@ -1163,8 +1240,7 @@ unlock:
 
 static int blkcg_css_online(struct cgroup_subsys_state *css)
 {
-       struct blkcg *blkcg = css_to_blkcg(css);
-       struct blkcg *parent = blkcg_parent(blkcg);
+       struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
 
        /*
         * blkcg_pin_online() is used to delay blkcg offline so that blkgs
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
         * parent so that offline always happens towards the root.
         */
        if (parent)
-               blkcg_pin_online(parent);
+               blkcg_pin_online(css);
        return 0;
 }
 
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
        preloaded = !radix_tree_preload(GFP_KERNEL);
 
        /* Make sure the root blkg exists. */
-       rcu_read_lock();
+       /* spin_lock_irq can serve as RCU read-side critical section. */
        spin_lock_irq(&q->queue_lock);
        blkg = blkg_create(&blkcg_root, q, new_blkg);
        if (IS_ERR(blkg))
                goto err_unlock;
        q->root_blkg = blkg;
        spin_unlock_irq(&q->queue_lock);
-       rcu_read_unlock();
 
        if (preloaded)
                radix_tree_preload_end();
@@ -1234,7 +1309,6 @@ err_destroy_all:
        return ret;
 err_unlock:
        spin_unlock_irq(&q->queue_lock);
-       rcu_read_unlock();
        if (preloaded)
                radix_tree_preload_end();
        return PTR_ERR(blkg);
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
 void blkcg_maybe_throttle_current(void)
 {
        struct request_queue *q = current->throttle_queue;
-       struct cgroup_subsys_state *css;
        struct blkcg *blkcg;
        struct blkcg_gq *blkg;
        bool use_memdelay = current->use_memdelay;
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
        current->use_memdelay = false;
 
        rcu_read_lock();
-       css = kthread_blkcg();
-       if (css)
-               blkcg = css_to_blkcg(css);
-       else
-               blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
-
+       blkcg = css_to_blkcg(blkcg_css());
        if (!blkcg)
                goto out;
        blkg = blkg_lookup(blkcg, q);
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
        rcu_read_lock();
 
        if (bio->bi_blkg)
-               css = &bio_blkcg(bio)->css;
+               css = bio_blkcg_css(bio);
        else
                css = blkcg_css();
 
@@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio)
        put_cpu();
 }
 
+bool blk_cgroup_congested(void)
+{
+       struct cgroup_subsys_state *css;
+       bool ret = false;
+
+       rcu_read_lock();
+       for (css = blkcg_css(); css; css = css->parent) {
+               if (atomic_read(&css->cgroup->congestion_count)) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
 static int __init blkcg_init(void)
 {
        blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
index 47e1e38390c965a5b4d796616418b7cdbdae09f3..d4de0a35e0660aff2b0358499e807022a65cd07b 100644 (file)
  */
 
 #include <linux/blk-cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/kthread.h>
 #include <linux/blk-mq.h>
 
+struct blkcg_gq;
+struct blkg_policy_data;
+
+
 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
 #define BLKG_STAT_CPU_BATCH    (INT_MAX / 2)
 
 #ifdef CONFIG_BLK_CGROUP
 
+enum blkg_iostat_type {
+       BLKG_IOSTAT_READ,
+       BLKG_IOSTAT_WRITE,
+       BLKG_IOSTAT_DISCARD,
+
+       BLKG_IOSTAT_NR,
+};
+
+struct blkg_iostat {
+       u64                             bytes[BLKG_IOSTAT_NR];
+       u64                             ios[BLKG_IOSTAT_NR];
+};
+
+struct blkg_iostat_set {
+       struct u64_stats_sync           sync;
+       struct blkg_iostat              cur;
+       struct blkg_iostat              last;
+};
+
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
+       /* Pointer to the associated request_queue */
+       struct request_queue            *q;
+       struct list_head                q_node;
+       struct hlist_node               blkcg_node;
+       struct blkcg                    *blkcg;
+
+       /* all non-root blkcg_gq's are guaranteed to have access to parent */
+       struct blkcg_gq                 *parent;
+
+       /* reference count */
+       struct percpu_ref               refcnt;
+
+       /* is this blkg online? protected by both blkcg and q locks */
+       bool                            online;
+
+       struct blkg_iostat_set __percpu *iostat_cpu;
+       struct blkg_iostat_set          iostat;
+
+       struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
+
+       spinlock_t                      async_bio_lock;
+       struct bio_list                 async_bios;
+       union {
+               struct work_struct      async_bio_work;
+               struct work_struct      free_work;
+       };
+
+       atomic_t                        use_delay;
+       atomic64_t                      delay_nsec;
+       atomic64_t                      delay_start;
+       u64                             last_delay;
+       int                             last_use;
+
+       struct rcu_head                 rcu_head;
+};
+
+struct blkcg {
+       struct cgroup_subsys_state      css;
+       spinlock_t                      lock;
+       refcount_t                      online_pin;
+
+       struct radix_tree_root          blkg_tree;
+       struct blkcg_gq __rcu           *blkg_hint;
+       struct hlist_head               blkg_list;
+
+       struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
+
+       struct list_head                all_blkcgs_node;
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
+       char                            fc_app_id[FC_APPID_LEN];
+#endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+       struct list_head                cgwb_list;
+#endif
+};
+
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+       return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
 /*
  * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  * request_queue (q).  This is used by blkcg policies which need to track
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
                                struct seq_file *s);
 
 struct blkcg_policy {
@@ -122,53 +210,15 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-       struct cgroup_subsys_state *css;
-
-       css = kthread_blkcg();
-       if (css)
-               return css;
-       return task_css(current, io_cgrp_id);
-}
-
-/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use.  The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio.  This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it.  However, the latter potentially gets
- * it from task_css().  This can race against task migration and the cgroup
- * dying.  It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return css_to_blkcg(blkcg_css());
-}
-
 /**
  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
  * @return: true if this bio needs to be submitted with the root blkg context.
  *
  * In order to avoid priority inversions we sometimes need to issue a bio as if
  * it were attached to the root blkg, and then backcharge to the actual owning
- * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio.  Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
+ * blkg.  The idea is we do bio_blkcg_css() to look up the actual context for
+ * the bio and attach the appropriate blkg to the bio.  Then we call this helper
+ * and if it is true run with the root blkg for that queue and then do any
  * backcharging to the originating cgroup once the io is complete.
  */
 static inline bool bio_issue_as_root_blkg(struct bio *bio)
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
 struct blkcg_policy {
 };
 
-#ifdef CONFIG_BLOCK
+struct blkcg {
+};
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
                                           const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
-
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
                                                  struct blkcg_policy *pol) { return NULL; }
 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
 #define blk_queue_for_each_rl(rl, q)   \
        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 
-#endif /* CONFIG_BLOCK */
 #endif /* CONFIG_BLK_CGROUP */
 
 #endif /* _BLK_CGROUP_PRIVATE_H */
index bc050677215235d072bfb3d2f01c2514f76662e6..80fa73c419a99646de341a64dcc0c49d1b9ec89f 100644 (file)
@@ -588,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
            (nr_sectors > maxsector ||
             bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
                pr_info_ratelimited("%s: attempt to access beyond end of device\n"
-                                   "%pg: rw=%d, want=%llu, limit=%llu\n",
-                                   current->comm,
-                                   bio->bi_bdev, bio->bi_opf,
-                                   bio_end_sector(bio), maxsector);
+                                   "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
+                                   current->comm, bio->bi_bdev, bio->bi_opf,
+                                   bio->bi_iter.bi_sector, nr_sectors, maxsector);
                return -EIO;
        }
        return 0;
@@ -816,11 +815,11 @@ void submit_bio_noacct(struct bio *bio)
 
        switch (bio_op(bio)) {
        case REQ_OP_DISCARD:
-               if (!blk_queue_discard(q))
+               if (!bdev_max_discard_sectors(bdev))
                        goto not_supported;
                break;
        case REQ_OP_SECURE_ERASE:
-               if (!blk_queue_secure_erase(q))
+               if (!bdev_max_secure_erase_sectors(bdev))
                        goto not_supported;
                break;
        case REQ_OP_ZONE_APPEND:
@@ -889,19 +888,11 @@ void submit_bio(struct bio *bio)
        if (blkcg_punt_bio_submit(bio))
                return;
 
-       /*
-        * If it's a regular read/write or a barrier with data attached,
-        * go through the normal accounting stuff before submission.
-        */
-       if (bio_has_data(bio)) {
-               unsigned int count = bio_sectors(bio);
-
-               if (op_is_write(bio_op(bio))) {
-                       count_vm_events(PGPGOUT, count);
-               } else {
-                       task_io_account_read(bio->bi_iter.bi_size);
-                       count_vm_events(PGPGIN, count);
-               }
+       if (bio_op(bio) == REQ_OP_READ) {
+               task_io_account_read(bio->bi_iter.bi_size);
+               count_vm_events(PGPGIN, bio_sectors(bio));
+       } else if (bio_op(bio) == REQ_OP_WRITE) {
+               count_vm_events(PGPGOUT, bio_sectors(bio));
        }
 
        /*
@@ -1018,21 +1009,22 @@ again:
        }
 }
 
-static unsigned long __part_start_io_acct(struct block_device *part,
-                                         unsigned int sectors, unsigned int op,
-                                         unsigned long start_time)
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+                                unsigned int sectors, unsigned int op,
+                                unsigned long start_time)
 {
        const int sgrp = op_stat_group(op);
 
        part_stat_lock();
-       update_io_ticks(part, start_time, false);
-       part_stat_inc(part, ios[sgrp]);
-       part_stat_add(part, sectors[sgrp], sectors);
-       part_stat_local_inc(part, in_flight[op_is_write(op)]);
+       update_io_ticks(bdev, start_time, false);
+       part_stat_inc(bdev, ios[sgrp]);
+       part_stat_add(bdev, sectors[sgrp], sectors);
+       part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
        part_stat_unlock();
 
        return start_time;
 }
+EXPORT_SYMBOL(bdev_start_io_acct);
 
 /**
  * bio_start_io_acct_time - start I/O accounting for bio based drivers
@@ -1041,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
  */
 void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
 {
-       __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-                            bio_op(bio), start_time);
+       bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+                          bio_op(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
 
@@ -1054,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
  */
 unsigned long bio_start_io_acct(struct bio *bio)
 {
-       return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-                                   bio_op(bio), jiffies);
+       return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+                                 bio_op(bio), jiffies);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct);
 
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
-                                unsigned int op)
-{
-       return __part_start_io_acct(disk->part0, sectors, op, jiffies);
-}
-EXPORT_SYMBOL(disk_start_io_acct);
-
-static void __part_end_io_acct(struct block_device *part, unsigned int op,
-                              unsigned long start_time)
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+                     unsigned long start_time)
 {
        const int sgrp = op_stat_group(op);
        unsigned long now = READ_ONCE(jiffies);
        unsigned long duration = now - start_time;
 
        part_stat_lock();
-       update_io_ticks(part, now, true);
-       part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
-       part_stat_local_dec(part, in_flight[op_is_write(op)]);
+       update_io_ticks(bdev, now, true);
+       part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
+       part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
        part_stat_unlock();
 }
+EXPORT_SYMBOL(bdev_end_io_acct);
 
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
-               struct block_device *orig_bdev)
+                             struct block_device *orig_bdev)
 {
-       __part_end_io_acct(orig_bdev, bio_op(bio), start_time);
+       bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
 
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
-                     unsigned long start_time)
-{
-       __part_end_io_acct(disk->part0, op, start_time);
-}
-EXPORT_SYMBOL(disk_end_io_acct);
-
 /**
  * blk_lld_busy - Check if underlying low-level drivers of a device are busy
  * @q : the queue of the device being checked
index 7c854584b52b506fece323d6275948a10f9a7849..621abd1b0e4d329655b2ea276bed7005e93522ef 100644 (file)
@@ -152,23 +152,25 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
 
        src_bio->bi_status = enc_bio->bi_status;
 
-       bio_put(enc_bio);
+       bio_uninit(enc_bio);
+       kfree(enc_bio);
        bio_endio(src_bio);
 }
 
 static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
 {
+       unsigned int nr_segs = bio_segments(bio_src);
        struct bvec_iter iter;
        struct bio_vec bv;
        struct bio *bio;
 
-       bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
+       bio = bio_kmalloc(nr_segs, GFP_NOIO);
        if (!bio)
                return NULL;
-       bio->bi_bdev            = bio_src->bi_bdev;
+       bio_init(bio, bio_src->bi_bdev, bio->bi_inline_vecs, nr_segs,
+                bio_src->bi_opf);
        if (bio_flagged(bio_src, BIO_REMAPPED))
                bio_set_flag(bio, BIO_REMAPPED);
-       bio->bi_opf             = bio_src->bi_opf;
        bio->bi_ioprio          = bio_src->bi_ioprio;
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
@@ -177,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
                bio->bi_io_vec[bio->bi_vcnt++] = bv;
 
        bio_clone_blkg_association(bio, bio_src);
-       blkcg_bio_issue_init(bio);
 
        return bio;
 }
@@ -363,8 +364,8 @@ out_release_keyslot:
        blk_crypto_put_keyslot(slot);
 out_put_enc_bio:
        if (enc_bio)
-               bio_put(enc_bio);
-
+               bio_uninit(enc_bio);
+       kfree(enc_bio);
        return ret;
 }
 
index 9bd670999d0af467deb0f32e5986bfc09873cd31..33a11ba971eafeb0ac9e022ffb5032ee06f133be 100644 (file)
@@ -533,8 +533,7 @@ struct ioc_gq {
 
        /* statistics */
        struct iocg_pcpu_stat __percpu  *pcpu_stat;
-       struct iocg_stat                local_stat;
-       struct iocg_stat                desc_stat;
+       struct iocg_stat                stat;
        struct iocg_stat                last_stat;
        u64                             last_stat_abs_vusage;
        u64                             usage_delta_us;
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
                return true;
        } else {
                if (iocg->indelay_since) {
-                       iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
+                       iocg->stat.indelay_us += now->now - iocg->indelay_since;
                        iocg->indelay_since = 0;
                }
                iocg->delay = 0;
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
 
        /* if debt is paid in full, restore inuse */
        if (!iocg->abs_vdebt) {
-               iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
+               iocg->stat.indebt_us += now->now - iocg->indebt_since;
                iocg->indebt_since = 0;
 
                propagate_weights(iocg, iocg->active, iocg->last_inuse,
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
 
        if (!waitqueue_active(&iocg->waitq)) {
                if (iocg->wait_since) {
-                       iocg->local_stat.wait_us += now->now - iocg->wait_since;
+                       iocg->stat.wait_us += now->now - iocg->wait_since;
                        iocg->wait_since = 0;
                }
                return;
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
        }
 }
 
+/* propagate the deltas to the parent */
+static void iocg_flush_stat_upward(struct ioc_gq *iocg)
+{
+       if (iocg->level > 0) {
+               struct iocg_stat *parent_stat =
+                       &iocg->ancestors[iocg->level - 1]->stat;
+
+               parent_stat->usage_us +=
+                       iocg->stat.usage_us - iocg->last_stat.usage_us;
+               parent_stat->wait_us +=
+                       iocg->stat.wait_us - iocg->last_stat.wait_us;
+               parent_stat->indebt_us +=
+                       iocg->stat.indebt_us - iocg->last_stat.indebt_us;
+               parent_stat->indelay_us +=
+                       iocg->stat.indelay_us - iocg->last_stat.indelay_us;
+       }
+
+       iocg->last_stat = iocg->stat;
+}
+
 /* collect per-cpu counters and propagate the deltas to the parent */
-static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
+static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
 {
        struct ioc *ioc = iocg->ioc;
-       struct iocg_stat new_stat;
        u64 abs_vusage = 0;
        u64 vusage_delta;
        int cpu;
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
        iocg->last_stat_abs_vusage = abs_vusage;
 
        iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
-       iocg->local_stat.usage_us += iocg->usage_delta_us;
-
-       /* propagate upwards */
-       new_stat.usage_us =
-               iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
-       new_stat.wait_us =
-               iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
-       new_stat.indebt_us =
-               iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
-       new_stat.indelay_us =
-               iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
-
-       /* propagate the deltas to the parent */
-       if (iocg->level > 0) {
-               struct iocg_stat *parent_stat =
-                       &iocg->ancestors[iocg->level - 1]->desc_stat;
+       iocg->stat.usage_us += iocg->usage_delta_us;
 
-               parent_stat->usage_us +=
-                       new_stat.usage_us - iocg->last_stat.usage_us;
-               parent_stat->wait_us +=
-                       new_stat.wait_us - iocg->last_stat.wait_us;
-               parent_stat->indebt_us +=
-                       new_stat.indebt_us - iocg->last_stat.indebt_us;
-               parent_stat->indelay_us +=
-                       new_stat.indelay_us - iocg->last_stat.indelay_us;
-       }
-
-       iocg->last_stat = new_stat;
+       iocg_flush_stat_upward(iocg);
 }
 
 /* get stat counters ready for reading on all active iocgs */
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
 
        /* flush leaves and build inner node walk list */
        list_for_each_entry(iocg, target_iocgs, active_list) {
-               iocg_flush_stat_one(iocg, now);
+               iocg_flush_stat_leaf(iocg, now);
                iocg_build_inner_walk(iocg, &inner_walk);
        }
 
        /* keep flushing upwards by walking the inner list backwards */
        list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
-               iocg_flush_stat_one(iocg, now);
+               iocg_flush_stat_upward(iocg);
                list_del_init(&iocg->walk_list);
        }
 }
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
 
                /* flush wait and indebt stat deltas */
                if (iocg->wait_since) {
-                       iocg->local_stat.wait_us += now->now - iocg->wait_since;
+                       iocg->stat.wait_us += now->now - iocg->wait_since;
                        iocg->wait_since = now->now;
                }
                if (iocg->indebt_since) {
-                       iocg->local_stat.indebt_us +=
+                       iocg->stat.indebt_us +=
                                now->now - iocg->indebt_since;
                        iocg->indebt_since = now->now;
                }
                if (iocg->indelay_since) {
-                       iocg->local_stat.indelay_us +=
+                       iocg->stat.indelay_us +=
                                now->now - iocg->indelay_since;
                        iocg->indelay_since = now->now;
                }
@@ -3005,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
        kfree(iocg);
 }
 
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
 {
        struct ioc_gq *iocg = pd_to_iocg(pd);
        struct ioc *ioc = iocg->ioc;
 
        if (!ioc->enabled)
-               return false;
+               return;
 
        if (iocg->level == 0) {
                unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3027,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
                        iocg->last_stat.wait_us,
                        iocg->last_stat.indebt_us,
                        iocg->last_stat.indelay_us);
-       return true;
 }
 
 static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
index 2f33932e72e368124331e74ee87df5180b4b15a1..5b676c7cf2b634d038f3f0ba0f6f5bb6230802f4 100644 (file)
@@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
        return 0;
 }
 
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
 {
        struct latency_stat stat;
        int cpu;
@@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
                        (unsigned long long)stat.ps.missed,
                        (unsigned long long)stat.ps.total,
                        iolat->rq_depth.max_depth);
-       return true;
 }
 
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
 {
        struct iolatency_grp *iolat = pd_to_lat(pd);
        unsigned long long avg_lat;
        unsigned long long cur_win;
 
        if (!blkcg_debug_stats)
-               return false;
+               return;
 
        if (iolat->ssd)
                return iolatency_ssd_stat(iolat, s);
@@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
        else
                seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
                        iolat->rq_depth.max_depth, avg_lat, cur_win);
-       return true;
 }
 
 static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
index 237d60d8b585799916fcab8415e020796ed01cce..09b7e1200c0f40fb0d539ba889db9b8aa553b668 100644 (file)
 
 #include "blk.h"
 
+static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
+{
+       unsigned int discard_granularity = bdev_discard_granularity(bdev);
+       sector_t granularity_aligned_sector;
+
+       if (bdev_is_partition(bdev))
+               sector += bdev->bd_start_sect;
+
+       granularity_aligned_sector =
+               round_up(sector, discard_granularity >> SECTOR_SHIFT);
+
+       /*
+        * Make sure subsequent bios start aligned to the discard granularity if
+        * it needs to be split.
+        */
+       if (granularity_aligned_sector != sector)
+               return granularity_aligned_sector - sector;
+
+       /*
+        * Align the bio size to the discard granularity to make splitting the bio
+        * at discard granularity boundaries easier in the driver if needed.
+        */
+       return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
+}
+
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, int flags,
-               struct bio **biop)
+               sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
        struct bio *bio = *biop;
-       unsigned int op;
-       sector_t bs_mask, part_offset = 0;
+       sector_t bs_mask;
 
        if (bdev_read_only(bdev))
                return -EPERM;
-
-       if (flags & BLKDEV_DISCARD_SECURE) {
-               if (!blk_queue_secure_erase(q))
-                       return -EOPNOTSUPP;
-               op = REQ_OP_SECURE_ERASE;
-       } else {
-               if (!blk_queue_discard(q))
-                       return -EOPNOTSUPP;
-               op = REQ_OP_DISCARD;
-       }
+       if (!bdev_max_discard_sectors(bdev))
+               return -EOPNOTSUPP;
 
        /* In case the discard granularity isn't set by buggy device driver */
-       if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
+       if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
                char dev_name[BDEVNAME_SIZE];
 
                bdevname(bdev, dev_name);
@@ -48,38 +62,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        if (!nr_sects)
                return -EINVAL;
 
-       /* In case the discard request is in a partition */
-       if (bdev_is_partition(bdev))
-               part_offset = bdev->bd_start_sect;
-
        while (nr_sects) {
-               sector_t granularity_aligned_lba, req_sects;
-               sector_t sector_mapped = sector + part_offset;
-
-               granularity_aligned_lba = round_up(sector_mapped,
-                               q->limits.discard_granularity >> SECTOR_SHIFT);
-
-               /*
-                * Check whether the discard bio starts at a discard_granularity
-                * aligned LBA,
-                * - If no: set (granularity_aligned_lba - sector_mapped) to
-                *   bi_size of the first split bio, then the second bio will
-                *   start at a discard_granularity aligned LBA on the device.
-                * - If yes: use bio_aligned_discard_max_sectors() as the max
-                *   possible bi_size of the first split bio. Then when this bio
-                *   is split in device drive, the split ones are very probably
-                *   to be aligned to discard_granularity of the device's queue.
-                */
-               if (granularity_aligned_lba == sector_mapped)
-                       req_sects = min_t(sector_t, nr_sects,
-                                         bio_aligned_discard_max_sectors(q));
-               else
-                       req_sects = min_t(sector_t, nr_sects,
-                                         granularity_aligned_lba - sector_mapped);
-
-               WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
+               sector_t req_sects =
+                       min(nr_sects, bio_discard_limit(bdev, sector));
 
-               bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
+               bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_iter.bi_size = req_sects << 9;
                sector += req_sects;
@@ -105,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
  * @sector:    start sector
  * @nr_sects:  number of sectors to discard
  * @gfp_mask:  memory allocation flags (for bio_alloc)
- * @flags:     BLKDEV_DISCARD_* flags to control behaviour
  *
  * Description:
  *    Issue a discard request for the sectors in question.
  */
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+               sector_t nr_sects, gfp_t gfp_mask)
 {
        struct bio *bio = NULL;
        struct blk_plug plug;
        int ret;
 
        blk_start_plug(&plug);
-       ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
-                       &bio);
+       ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
        if (!ret && bio) {
                ret = submit_bio_wait(bio);
                if (ret == -EOPNOTSUPP)
@@ -316,3 +301,42 @@ retry:
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp)
+{
+       sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+       unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
+       struct bio *bio = NULL;
+       struct blk_plug plug;
+       int ret = 0;
+
+       if (max_sectors == 0)
+               return -EOPNOTSUPP;
+       if ((sector | nr_sects) & bs_mask)
+               return -EINVAL;
+       if (bdev_read_only(bdev))
+               return -EPERM;
+
+       blk_start_plug(&plug);
+       for (;;) {
+               unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+               bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+               bio->bi_iter.bi_sector = sector;
+               bio->bi_iter.bi_size = len;
+
+               sector += len << SECTOR_SHIFT;
+               nr_sects -= len << SECTOR_SHIFT;
+               if (!nr_sects) {
+                       ret = submit_bio_wait(bio);
+                       bio_put(bio);
+                       break;
+               }
+               cond_resched();
+       }
+       blk_finish_plug(&plug);
+
+       return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_secure_erase);
index c7f71d83eff18924898fb75c41326f98065d7971..df8b066cd548913ca6455d81fee594d2feeed69c 100644 (file)
@@ -152,10 +152,10 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
        nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
 
        ret = -ENOMEM;
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(nr_pages, gfp_mask);
        if (!bio)
                goto out_bmd;
-       bio->bi_opf |= req_op(rq);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq));
 
        if (map_data) {
                nr_pages = 1 << map_data->page_order;
@@ -224,7 +224,8 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
 cleanup:
        if (!map_data)
                bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 out_bmd:
        kfree(bmd);
        return ret;
@@ -234,6 +235,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
                gfp_t gfp_mask)
 {
        unsigned int max_sectors = queue_max_hw_sectors(rq->q);
+       unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
        struct bio *bio;
        int ret;
        int j;
@@ -241,10 +243,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
        if (!iov_iter_count(iter))
                return -EINVAL;
 
-       bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
+       bio = bio_kmalloc(nr_vecs, gfp_mask);
        if (!bio)
                return -ENOMEM;
-       bio->bi_opf |= req_op(rq);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
 
        while (iov_iter_count(iter)) {
                struct page **pages;
@@ -260,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 
                npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
 
-               if (unlikely(offs & queue_dma_alignment(rq->q))) {
-                       ret = -EINVAL;
+               if (unlikely(offs & queue_dma_alignment(rq->q)))
                        j = 0;
-               else {
+               else {
                        for (j = 0; j < npages; j++) {
                                struct page *page = pages[j];
                                unsigned int n = PAGE_SIZE - offs;
@@ -303,7 +304,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 
  out_unmap:
        bio_release_pages(bio, false);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        return ret;
 }
 
@@ -323,7 +325,8 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
 static void bio_map_kern_endio(struct bio *bio)
 {
        bio_invalidate_vmalloc_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 }
 
 /**
@@ -348,9 +351,10 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
        int offset, i;
        struct bio *bio;
 
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(nr_pages, gfp_mask);
        if (!bio)
                return ERR_PTR(-ENOMEM);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
 
        if (is_vmalloc) {
                flush_kernel_vmap_range(data, len);
@@ -374,7 +378,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
                if (bio_add_pc_page(q, bio, page, bytes,
                                    offset) < bytes) {
                        /* we don't support partial mappings */
-                       bio_put(bio);
+                       bio_uninit(bio);
+                       kfree(bio);
                        return ERR_PTR(-EINVAL);
                }
 
@@ -390,7 +395,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
 static void bio_copy_kern_endio(struct bio *bio)
 {
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 }
 
 static void bio_copy_kern_endio_read(struct bio *bio)
@@ -435,9 +441,10 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
                return ERR_PTR(-EINVAL);
 
        nr_pages = end - start;
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(nr_pages, gfp_mask);
        if (!bio)
                return ERR_PTR(-ENOMEM);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
 
        while (len) {
                struct page *page;
@@ -471,7 +478,8 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
 
 cleanup:
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        return ERR_PTR(-ENOMEM);
 }
 
@@ -602,7 +610,8 @@ int blk_rq_unmap_user(struct bio *bio)
 
                next_bio = bio;
                bio = bio->bi_next;
-               bio_put(next_bio);
+               bio_uninit(next_bio);
+               kfree(next_bio);
        }
 
        return ret;
@@ -648,8 +657,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        bio->bi_opf |= req_op(rq);
 
        ret = blk_rq_append_bio(rq, bio);
-       if (unlikely(ret))
-               bio_put(bio);
+       if (unlikely(ret)) {
+               bio_uninit(bio);
+               kfree(bio);
+       }
        return ret;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
index aa0349e9f083b9495fd3ec17973e42636810146d..7e4136a60e1cc69d1fddf9fb6f9b8342c5d329f7 100644 (file)
@@ -113,10 +113,8 @@ static const char *const blk_queue_flag_name[] = {
        QUEUE_FLAG_NAME(FAIL_IO),
        QUEUE_FLAG_NAME(NONROT),
        QUEUE_FLAG_NAME(IO_STAT),
-       QUEUE_FLAG_NAME(DISCARD),
        QUEUE_FLAG_NAME(NOXMERGES),
        QUEUE_FLAG_NAME(ADD_RANDOM),
-       QUEUE_FLAG_NAME(SECERASE),
        QUEUE_FLAG_NAME(SAME_FORCE),
        QUEUE_FLAG_NAME(DEAD),
        QUEUE_FLAG_NAME(INIT_DONE),
index 84d749511f5516add12060ac888764446b0235b5..ae116b7556482a43de920fcee24955b4fdb24bf9 100644 (file)
@@ -1083,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
        WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 
        /*
-        * For a polled request, always complete locallly, it's pointless
+        * For a polled request, always complete locally, it's pointless
         * to redirect the completion.
         */
        if (rq->cmd_flags & REQ_POLLED)
@@ -1169,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
        complete(waiting);
 }
 
+/*
+ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+       if (plug->multiple_queues)
+               return BLK_MAX_REQUEST_COUNT * 2;
+       return BLK_MAX_REQUEST_COUNT;
+}
+
+static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+{
+       struct request *last = rq_list_peek(&plug->mq_list);
+
+       if (!plug->rq_count) {
+               trace_block_plug(rq->q);
+       } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
+                  (!blk_queue_nomerges(rq->q) &&
+                   blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+               blk_mq_flush_plug_list(plug, false);
+               trace_block_plug(rq->q);
+       }
+
+       if (!plug->multiple_queues && last && last->q != rq->q)
+               plug->multiple_queues = true;
+       if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+               plug->has_elevator = true;
+       rq->rq_next = NULL;
+       rq_list_add(&plug->mq_list, rq);
+       plug->rq_count++;
+}
+
+static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
+               rq_end_io_fn *done, bool use_plug)
+{
+       WARN_ON(irqs_disabled());
+       WARN_ON(!blk_rq_is_passthrough(rq));
+
+       rq->end_io = done;
+
+       blk_account_io_start(rq);
+
+       if (use_plug && current->plug) {
+               blk_add_rq_to_plug(current->plug, rq);
+               return;
+       }
+       /*
+        * don't check dying flag for MQ because the request won't
+        * be reused after dying flag is set
+        */
+       blk_mq_sched_insert_request(rq, at_head, true, false);
+}
+
+
 /**
  * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
  * @rq:                request to insert
@@ -1184,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
  */
 void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
 {
-       WARN_ON(irqs_disabled());
-       WARN_ON(!blk_rq_is_passthrough(rq));
+       __blk_execute_rq_nowait(rq, at_head, done, true);
 
-       rq->end_io = done;
-
-       blk_account_io_start(rq);
-
-       /*
-        * don't check dying flag for MQ because the request won't
-        * be reused after dying flag is set
-        */
-       blk_mq_sched_insert_request(rq, at_head, true, false);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
@@ -1233,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
        DECLARE_COMPLETION_ONSTACK(wait);
        unsigned long hang_check;
 
+       /*
+        * iopoll requires request to be submitted to driver, so can't
+        * use plug
+        */
        rq->end_io_data = &wait;
-       blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
+       __blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
+                       !blk_rq_is_poll(rq));
 
        /* Prevent hang_check timer from firing at us during very long I/O */
        hang_check = sysctl_hung_task_timeout_secs;
@@ -2676,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                hctx->queue->mq_ops->commit_rqs(hctx);
 }
 
-/*
- * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
- * queues. This is important for md arrays to benefit from merging
- * requests.
- */
-static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
-{
-       if (plug->multiple_queues)
-               return BLK_MAX_REQUEST_COUNT * 2;
-       return BLK_MAX_REQUEST_COUNT;
-}
-
-static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
-{
-       struct request *last = rq_list_peek(&plug->mq_list);
-
-       if (!plug->rq_count) {
-               trace_block_plug(rq->q);
-       } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
-                  (!blk_queue_nomerges(rq->q) &&
-                   blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
-               blk_mq_flush_plug_list(plug, false);
-               trace_block_plug(rq->q);
-       }
-
-       if (!plug->multiple_queues && last && last->q != rq->q)
-               plug->multiple_queues = true;
-       if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
-               plug->has_elevator = true;
-       rq->rq_next = NULL;
-       rq_list_add(&plug->mq_list, rq);
-       plug->rq_count++;
-}
-
 static bool blk_mq_attempt_bio_merge(struct request_queue *q,
                                     struct bio *bio, unsigned int nr_segs)
 {
index b83df3d2eebcaac76dccdbc6427c9f172012f28c..6ccceb421ed2f7503b96c5e9cf58633400510d3f 100644 (file)
@@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->max_zone_append_sectors = 0;
        lim->max_discard_sectors = 0;
        lim->max_hw_discard_sectors = 0;
+       lim->max_secure_erase_sectors = 0;
        lim->discard_granularity = 0;
        lim->discard_alignment = 0;
        lim->discard_misaligned = 0;
@@ -176,6 +177,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
 
+/**
+ * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
+ * @q:  the request queue for the device
+ * @max_sectors: maximum number of sectors to secure_erase
+ **/
+void blk_queue_max_secure_erase_sectors(struct request_queue *q,
+               unsigned int max_sectors)
+{
+       q->limits.max_secure_erase_sectors = max_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
+
 /**
  * blk_queue_max_write_zeroes_sectors - set max sectors for a single
  *                                      write zeroes
@@ -468,6 +481,40 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
+static int queue_limit_alignment_offset(struct queue_limits *lim,
+               sector_t sector)
+{
+       unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+       unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
+               << SECTOR_SHIFT;
+
+       return (granularity + lim->alignment_offset - alignment) % granularity;
+}
+
+static unsigned int queue_limit_discard_alignment(struct queue_limits *lim,
+               sector_t sector)
+{
+       unsigned int alignment, granularity, offset;
+
+       if (!lim->max_discard_sectors)
+               return 0;
+
+       /* Why are these in bytes, not sectors? */
+       alignment = lim->discard_alignment >> SECTOR_SHIFT;
+       granularity = lim->discard_granularity >> SECTOR_SHIFT;
+       if (!granularity)
+               return 0;
+
+       /* Offset of the partition start in 'granularity' sectors */
+       offset = sector_div(sector, granularity);
+
+       /* And why do we do this modulus *again* in blkdev_issue_discard()? */
+       offset = (granularity + alignment - offset) % granularity;
+
+       /* Turn it back into bytes, gaah */
+       return offset << SECTOR_SHIFT;
+}
+
 static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
 {
        sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
@@ -627,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
                        t->discard_granularity;
        }
-
+       t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors,
+                                                  b->max_secure_erase_sectors);
        t->zone_write_granularity = max(t->zone_write_granularity,
                                        b->zone_write_granularity);
        t->zoned = max(t->zoned, b->zoned);
@@ -901,3 +949,27 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
        }
 }
 EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
+
+int bdev_alignment_offset(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (q->limits.misaligned)
+               return -1;
+       if (bdev_is_partition(bdev))
+               return queue_limit_alignment_offset(&q->limits,
+                               bdev->bd_start_sect);
+       return q->limits.alignment_offset;
+}
+EXPORT_SYMBOL_GPL(bdev_alignment_offset);
+
+unsigned int bdev_discard_alignment(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (bdev_is_partition(bdev))
+               return queue_limit_discard_alignment(&q->limits,
+                               bdev->bd_start_sect);
+       return q->limits.discard_alignment;
+}
+EXPORT_SYMBOL_GPL(bdev_discard_alignment);
index 469c483719bea8309a92a14a6c2c39cb9cc5bd15..139b2d7a99e2fae6a4d40fd1e23783d096064330 100644 (file)
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
                break;                                                  \
        if ((__tg)) {                                                   \
                blk_add_cgroup_trace_msg(__td->queue,                   \
-                       tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
+                       &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
        } else {                                                        \
                blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);  \
        }                                                               \
@@ -2189,13 +2189,14 @@ again:
        }
 
 out_unlock:
-       spin_unlock_irq(&q->queue_lock);
        bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (throttled || !td->track_bio_latency)
                bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
 #endif
+       spin_unlock_irq(&q->queue_lock);
+
        rcu_read_unlock();
        return throttled;
 }
index 8ccbc6e076369b753f21f56b62f8baf4e4e4ef61..434017701403fb699668ec4db2a6a467b2b6ae9f 100644 (file)
@@ -346,20 +346,6 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
        return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
 }
 
-/*
- * The max bio size which is aligned to q->limits.discard_granularity. This
- * is a hint to split large discard bio in generic block layer, then if device
- * driver needs to split the discard bio into smaller ones, their bi_size can
- * be very probably and easily aligned to discard_granularity of the device's
- * queue.
- */
-static inline unsigned int bio_aligned_discard_max_sectors(
-                                       struct request_queue *q)
-{
-       return round_down(UINT_MAX, q->limits.discard_granularity) >>
-                       SECTOR_SHIFT;
-}
-
 /*
  * Internal io_context interface
  */
@@ -450,13 +436,6 @@ extern struct device_attribute dev_attr_events;
 extern struct device_attribute dev_attr_events_async;
 extern struct device_attribute dev_attr_events_poll_msecs;
 
-static inline void bio_clear_polled(struct bio *bio)
-{
-       /* can't support alloc cache if we turn off polling */
-       bio_clear_flag(bio, BIO_PERCPU_CACHE);
-       bio->bi_opf &= ~REQ_POLLED;
-}
-
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
index 467be46d0e65620c08067579ff4fd30f1ce666a6..8f7b6fe3b4db5f621e5efce566ab3e9697b44fd9 100644 (file)
@@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
                goto err_put;
 
        bio_clone_blkg_association(bio, bio_src);
-       blkcg_bio_issue_init(bio);
 
        return bio;
 
index 9f2ecec406b04e51f305e997c090176db70ce90b..b9b83030e0dfa4a77ef9df1f05bcb07737ef0aa2 100644 (file)
@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 
 #define DIO_INLINE_BIO_VECS 4
 
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
-       struct task_struct *waiter = bio->bi_private;
-
-       WRITE_ONCE(bio->bi_private, NULL);
-       blk_wake_io_task(waiter);
-}
-
 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
                struct iov_iter *iter, unsigned int nr_pages)
 {
@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
                bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
        }
        bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
-       bio.bi_private = current;
-       bio.bi_end_io = blkdev_bio_end_io_simple;
        bio.bi_ioprio = iocb->ki_ioprio;
 
        ret = bio_iov_iter_get_pages(&bio, iter);
@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 
        if (iocb->ki_flags & IOCB_NOWAIT)
                bio.bi_opf |= REQ_NOWAIT;
-       if (iocb->ki_flags & IOCB_HIPRI)
-               bio_set_polled(&bio, iocb);
 
-       submit_bio(&bio);
-       for (;;) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (!READ_ONCE(bio.bi_private))
-                       break;
-               if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
-                       blk_io_schedule();
-       }
-       __set_current_state(TASK_RUNNING);
+       submit_bio_wait(&bio);
 
        bio_release_pages(&bio, should_dirty);
        if (unlikely(bio.bi_status))
@@ -197,8 +177,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
            (bdev_logical_block_size(bdev) - 1))
                return -EINVAL;
 
-       bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
-
+       if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+               opf |= REQ_ALLOC_CACHE;
+       bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+                              &blkdev_dio_pool);
        dio = container_of(bio, struct blkdev_dio, bio);
        atomic_set(&dio->ref, 1);
        /*
@@ -320,7 +302,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
            (bdev_logical_block_size(bdev) - 1))
                return -EINVAL;
 
-       bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
+       if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+               opf |= REQ_ALLOC_CACHE;
+       bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+                              &blkdev_dio_pool);
        dio = container_of(bio, struct blkdev_dio, bio);
        dio->flags = 0;
        dio->iocb = iocb;
@@ -672,7 +657,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
                break;
        case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
                error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
-                                            len >> SECTOR_SHIFT, GFP_KERNEL, 0);
+                                            len >> SECTOR_SHIFT, GFP_KERNEL);
                break;
        default:
                error = -EOPNOTSUPP;
index b8b6759d670f01dc3584b8c4fda150a0c03579f1..36532b93184191e08f5ab8048ecb6ce99cfc33ee 100644 (file)
@@ -1010,7 +1010,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 {
        struct gendisk *disk = dev_to_disk(dev);
 
-       return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
+       return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
 }
 
 static ssize_t disk_discard_alignment_show(struct device *dev,
@@ -1019,7 +1019,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
        struct gendisk *disk = dev_to_disk(dev);
 
-       return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
+       return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
 }
 
 static ssize_t diskseq_show(struct device *dev,
index f8703db99c734a7d6724e08113d6a09f43837b9f..46949f1b0dba50b99ca5924a34b2b891ada2233d 100644 (file)
@@ -83,18 +83,17 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
 #endif
 
 static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
-               unsigned long arg, unsigned long flags)
+               unsigned long arg)
 {
        uint64_t range[2];
        uint64_t start, len;
-       struct request_queue *q = bdev_get_queue(bdev);
        struct inode *inode = bdev->bd_inode;
        int err;
 
        if (!(mode & FMODE_WRITE))
                return -EBADF;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(range, (void __user *)arg, sizeof(range)))
@@ -115,15 +114,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
        err = truncate_bdev_range(bdev, mode, start, start + len - 1);
        if (err)
                goto fail;
-
-       err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
-                                  GFP_KERNEL, flags);
-
+       err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
 fail:
        filemap_invalidate_unlock(inode->i_mapping);
        return err;
 }
 
+static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
+               void __user *argp)
+{
+       uint64_t start, len;
+       uint64_t range[2];
+       int err;
+
+       if (!(mode & FMODE_WRITE))
+               return -EBADF;
+       if (!bdev_max_secure_erase_sectors(bdev))
+               return -EOPNOTSUPP;
+       if (copy_from_user(range, argp, sizeof(range)))
+               return -EFAULT;
+
+       start = range[0];
+       len = range[1];
+       if ((start & 511) || (len & 511))
+               return -EINVAL;
+       if (start + len > bdev_nr_bytes(bdev))
+               return -EINVAL;
+
+       filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+       err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+       if (!err)
+               err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
+                                               GFP_KERNEL);
+       filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+       return err;
+}
+
+
 static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
                unsigned long arg)
 {
@@ -451,10 +478,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
        case BLKROSET:
                return blkdev_roset(bdev, mode, cmd, arg);
        case BLKDISCARD:
-               return blk_ioctl_discard(bdev, mode, arg, 0);
+               return blk_ioctl_discard(bdev, mode, arg);
        case BLKSECDISCARD:
-               return blk_ioctl_discard(bdev, mode, arg,
-                               BLKDEV_DISCARD_SECURE);
+               return blk_ioctl_secure_erase(bdev, mode, argp);
        case BLKZEROOUT:
                return blk_ioctl_zeroout(bdev, mode, arg);
        case BLKGETDISKSEQ:
@@ -489,7 +515,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
                                    queue_max_sectors(bdev_get_queue(bdev)));
                return put_ushort(argp, max_sectors);
        case BLKROTATIONAL:
-               return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev)));
+               return put_ushort(argp, !bdev_nonrot(bdev));
        case BLKRASET:
        case BLKFRASET:
                if(!capable(CAP_SYS_ADMIN))
index 3ed5eaf3446a2791f18df91820827409a3e28fe7..6ed602b2f80a5904892717bd2bbe19203045260a 100644 (file)
@@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
        if (at_head) {
                list_add(&rq->queuelist, &per_prio->dispatch);
+               rq->fifo_time = jiffies;
        } else {
                deadline_add_rq_rb(per_prio, rq);
 
index 2c381c694c574c0b6875f52ee40eef6ab782e389..d2fc122d74262de1e9bd29cbe6d96e57c239fbec 100644 (file)
@@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
 #ifdef CONFIG_ACORN_PARTITION_RISCIX
                case PARTITION_RISCIX_SCSI:
                case PARTITION_RISCIX_MFM:
-                       slot = riscix_partition(state, start_sect, slot,
+                       riscix_partition(state, start_sect, slot,
                                                nr_sects);
                        break;
 #endif
 
                case PARTITION_LINUX:
-                       slot = linux_partition(state, start_sect, slot,
+                       linux_partition(state, start_sect, slot,
                                               nr_sects);
                        break;
                }
index da59941754163dcaa72796ded13cb21e942a3f13..9655c728262a4d5a639b38a891790c491bc08db6 100644 (file)
@@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state)
                                /* accept only GEM,BGM,RAW,LNX,SWP partitions */
                                if (!((pi->flg & 1) && OK_id(pi->id)))
                                        continue;
-                               part_fmt = 2;
                                put_partition (state, slot,
                                                be32_to_cpu(pi->st),
                                                be32_to_cpu(pi->siz));
index 2ef8dfa1e5c85f35d4310a2cf7c7a441bf7225ee..8a0ec929023bcd2ded0579194d99a1044bc7cb6d 100644 (file)
@@ -200,21 +200,13 @@ static ssize_t part_ro_show(struct device *dev,
 static ssize_t part_alignment_offset_show(struct device *dev,
                                          struct device_attribute *attr, char *buf)
 {
-       struct block_device *bdev = dev_to_bdev(dev);
-
-       return sprintf(buf, "%u\n",
-               queue_limit_alignment_offset(&bdev_get_queue(bdev)->limits,
-                               bdev->bd_start_sect));
+       return sprintf(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev)));
 }
 
 static ssize_t part_discard_alignment_show(struct device *dev,
                                           struct device_attribute *attr, char *buf)
 {
-       struct block_device *bdev = dev_to_bdev(dev);
-
-       return sprintf(buf, "%u\n",
-               queue_limit_discard_alignment(&bdev_get_queue(bdev)->limits,
-                               bdev->bd_start_sect));
+       return sprintf(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
 }
 
 static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
@@ -486,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno)
                goto out_unlock;
 
        ret = -EBUSY;
-       if (part->bd_openers)
+       if (atomic_read(&part->bd_openers))
                goto out_unlock;
 
        delete_partition(part);
index 27f6c7d9c776de7ae53f85b962aa327b7f3f5e75..38e58960ae036a4e4df2fdaefca2c7842340db42 100644 (file)
@@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
                len = r_cols;
        } else {
                r_stripe = 0;
-               r_cols   = 0;
                len = r_parent;
        }
        if (len < 0)
@@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
                r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
                r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
                len = r_id2;
-       } else {
-               r_id1 = 0;
-               r_id2 = 0;
+       } else
                len = r_diskid;
-       }
        if (len < 0)
                return false;
 
@@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
                r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
                r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
                len = r_id2;
-       } else {
-               r_id1 = 0;
-               r_id2 = 0;
+       } else
                len = r_name;
-       }
        if (len < 0)
                return false;
 
@@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
                        return false;
                }
                len = r_index;
-       } else {
-               r_index = 0;
+       } else
                len = r_diskid;
-       }
        if (len < 0) {
                ldm_error("len %d < 0", len);
                return false;
index 9e42fe3e02f569ca1ad05c70d87f42918542a76d..56637aceaf81c62cfe63cfe6a10fe7e82835266b 100644 (file)
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/blacklist_hashes_checked
 /extract-cert
 /x509_certificate_list
 /x509_revocation_list
index 73d1350c223a8c757dff74a3ce1dbc5d6a9e9a11..476755703cf8b44c9c12425f7b347409baef7170 100644 (file)
@@ -104,8 +104,11 @@ config SYSTEM_BLACKLIST_HASH_LIST
        help
          If set, this option should be the filename of a list of hashes in the
          form "<hash>", "<hash>", ... .  This will be included into a C
-         wrapper to incorporate the list into the kernel.  Each <hash> should
-         be a string of hex digits.
+         wrapper to incorporate the list into the kernel.  Each <hash> must be a
+         string starting with a prefix ("tbs" or "bin"), then a colon (":"), and
+         finally an even number of hexadecimal lowercase characters (up to 128).
+         Certificate hashes can be generated with
+         tools/certs/print-cert-tbs-hash.sh .
 
 config SYSTEM_REVOCATION_LIST
        bool "Provide system-wide ring of revocation certificates"
@@ -124,4 +127,14 @@ config SYSTEM_REVOCATION_KEYS
          containing X.509 certificates to be included in the default blacklist
          keyring.
 
+config SYSTEM_BLACKLIST_AUTH_UPDATE
+       bool "Allow root to add signed blacklist keys"
+       depends on SYSTEM_BLACKLIST_KEYRING
+       depends on SYSTEM_DATA_VERIFICATION
+       help
+         If set, provide the ability to load new blacklist keys at run time if
+         they are signed and vouched by a certificate from the builtin trusted
+         keyring.  The PKCS#7 signature of the description is set in the key
+         payload.  Blacklist keys cannot be removed.
+
 endmenu
index d8443cfb1c401ae51bec3a34b681ea4090149c5f..1d26ae36af20d5621be62b73a15929d078018e93 100644 (file)
@@ -7,6 +7,18 @@ obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o c
 obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
 obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
 ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),)
+quiet_cmd_check_blacklist_hashes = CHECK   $(patsubst "%",%,$(2))
+      cmd_check_blacklist_hashes = $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(2); touch $@
+
+$(eval $(call config_filename,SYSTEM_BLACKLIST_HASH_LIST))
+
+$(obj)/blacklist_hashes.o: $(obj)/blacklist_hashes_checked
+
+CFLAGS_blacklist_hashes.o += -I$(srctree)
+
+targets += blacklist_hashes_checked
+$(obj)/blacklist_hashes_checked: $(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(SYSTEM_BLACKLIST_HASH_LIST_FILENAME) scripts/check-blacklist-hashes.awk FORCE
+       $(call if_changed,check_blacklist_hashes,$(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(CONFIG_SYSTEM_BLACKLIST_HASH_LIST))
 obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
 else
 obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o
@@ -21,7 +33,7 @@ $(obj)/system_certificates.o: $(obj)/x509_certificate_list
 $(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE
        $(call if_changed,extract_certs)
 
-targets += x509_certificate_list
+targets += x509_certificate_list blacklist_hashes_checked
 
 # If module signing is requested, say by allyesconfig, but a key has not been
 # supplied, then one will need to be generated to make sure the build does not
index c9a435b15af40679513d596ad845e31359a7c4df..25094ea7360078946a27ddf6073ff1e87088d569 100644 (file)
 #include <linux/err.h>
 #include <linux/seq_file.h>
 #include <linux/uidgid.h>
+#include <linux/verification.h>
 #include <keys/system_keyring.h>
 #include "blacklist.h"
 #include "common.h"
 
+/*
+ * According to crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo(),
+ * the size of the currently longest supported hash algorithm is 512 bits,
+ * which translates into 128 hex characters.
+ */
+#define MAX_HASH_LEN   128
+
+#define BLACKLIST_KEY_PERM (KEY_POS_SEARCH | KEY_POS_VIEW | \
+                           KEY_USR_SEARCH | KEY_USR_VIEW)
+
+static const char tbs_prefix[] = "tbs";
+static const char bin_prefix[] = "bin";
+
 static struct key *blacklist_keyring;
 
 #ifdef CONFIG_SYSTEM_REVOCATION_LIST
@@ -32,41 +46,89 @@ extern __initconst const unsigned long revocation_certificate_list_size;
  */
 static int blacklist_vet_description(const char *desc)
 {
-       int n = 0;
-
-       if (*desc == ':')
-               return -EINVAL;
-       for (; *desc; desc++)
-               if (*desc == ':')
-                       goto found_colon;
+       int i, prefix_len, tbs_step = 0, bin_step = 0;
+
+       /* The following algorithm only works if prefix lengths match. */
+       BUILD_BUG_ON(sizeof(tbs_prefix) != sizeof(bin_prefix));
+       prefix_len = sizeof(tbs_prefix) - 1;
+       for (i = 0; *desc; desc++, i++) {
+               if (*desc == ':') {
+                       if (tbs_step == prefix_len)
+                               goto found_colon;
+                       if (bin_step == prefix_len)
+                               goto found_colon;
+                       return -EINVAL;
+               }
+               if (i >= prefix_len)
+                       return -EINVAL;
+               if (*desc == tbs_prefix[i])
+                       tbs_step++;
+               if (*desc == bin_prefix[i])
+                       bin_step++;
+       }
        return -EINVAL;
 
 found_colon:
        desc++;
-       for (; *desc; desc++) {
+       for (i = 0; *desc && i < MAX_HASH_LEN; desc++, i++) {
                if (!isxdigit(*desc) || isupper(*desc))
                        return -EINVAL;
-               n++;
        }
+       if (*desc)
+               /* The hash is greater than MAX_HASH_LEN. */
+               return -ENOPKG;
 
-       if (n == 0 || n & 1)
+       /* Checks for an even number of hexadecimal characters. */
+       if (i == 0 || i & 1)
                return -EINVAL;
        return 0;
 }
 
-/*
- * The hash to be blacklisted is expected to be in the description.  There will
- * be no payload.
- */
-static int blacklist_preparse(struct key_preparsed_payload *prep)
+static int blacklist_key_instantiate(struct key *key,
+               struct key_preparsed_payload *prep)
 {
-       if (prep->datalen > 0)
-               return -EINVAL;
-       return 0;
+#ifdef CONFIG_SYSTEM_BLACKLIST_AUTH_UPDATE
+       int err;
+#endif
+
+       /* Sets safe default permissions for keys loaded by user space. */
+       key->perm = BLACKLIST_KEY_PERM;
+
+       /*
+        * Skips the authentication step for builtin hashes, they are not
+        * signed but still trusted.
+        */
+       if (key->flags & (1 << KEY_FLAG_BUILTIN))
+               goto out;
+
+#ifdef CONFIG_SYSTEM_BLACKLIST_AUTH_UPDATE
+       /*
+        * Verifies the description's PKCS#7 signature against the builtin
+        * trusted keyring.
+        */
+       err = verify_pkcs7_signature(key->description,
+                       strlen(key->description), prep->data, prep->datalen,
+                       NULL, VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL);
+       if (err)
+               return err;
+#else
+       /*
+        * It should not be possible to come here because the keyring doesn't
+        * have KEY_USR_WRITE and the only other way to call this function is
+        * for builtin hashes.
+        */
+       WARN_ON_ONCE(1);
+       return -EPERM;
+#endif
+
+out:
+       return generic_key_instantiate(key, prep);
 }
 
-static void blacklist_free_preparse(struct key_preparsed_payload *prep)
+static int blacklist_key_update(struct key *key,
+               struct key_preparsed_payload *prep)
 {
+       return -EPERM;
 }
 
 static void blacklist_describe(const struct key *key, struct seq_file *m)
@@ -77,17 +139,48 @@ static void blacklist_describe(const struct key *key, struct seq_file *m)
 static struct key_type key_type_blacklist = {
        .name                   = "blacklist",
        .vet_description        = blacklist_vet_description,
-       .preparse               = blacklist_preparse,
-       .free_preparse          = blacklist_free_preparse,
-       .instantiate            = generic_key_instantiate,
+       .instantiate            = blacklist_key_instantiate,
+       .update                 = blacklist_key_update,
        .describe               = blacklist_describe,
 };
 
+static char *get_raw_hash(const u8 *hash, size_t hash_len,
+               enum blacklist_hash_type hash_type)
+{
+       size_t type_len;
+       const char *type_prefix;
+       char *buffer, *p;
+
+       switch (hash_type) {
+       case BLACKLIST_HASH_X509_TBS:
+               type_len = sizeof(tbs_prefix) - 1;
+               type_prefix = tbs_prefix;
+               break;
+       case BLACKLIST_HASH_BINARY:
+               type_len = sizeof(bin_prefix) - 1;
+               type_prefix = bin_prefix;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return ERR_PTR(-EINVAL);
+       }
+       buffer = kmalloc(type_len + 1 + hash_len * 2 + 1, GFP_KERNEL);
+       if (!buffer)
+               return ERR_PTR(-ENOMEM);
+       p = memcpy(buffer, type_prefix, type_len);
+       p += type_len;
+       *p++ = ':';
+       bin2hex(p, hash, hash_len);
+       p += hash_len * 2;
+       *p = '\0';
+       return buffer;
+}
+
 /**
- * mark_hash_blacklisted - Add a hash to the system blacklist
+ * mark_raw_hash_blacklisted - Add a hash to the system blacklist
  * @hash: The hash as a hex string with a type prefix (eg. "tbs:23aa429783")
  */
-int mark_hash_blacklisted(const char *hash)
+static int mark_raw_hash_blacklisted(const char *hash)
 {
        key_ref_t key;
 
@@ -96,8 +189,7 @@ int mark_hash_blacklisted(const char *hash)
                                   hash,
                                   NULL,
                                   0,
-                                  ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                   KEY_USR_VIEW),
+                                  BLACKLIST_KEY_PERM,
                                   KEY_ALLOC_NOT_IN_QUOTA |
                                   KEY_ALLOC_BUILT_IN);
        if (IS_ERR(key)) {
@@ -107,29 +199,36 @@ int mark_hash_blacklisted(const char *hash)
        return 0;
 }
 
+int mark_hash_blacklisted(const u8 *hash, size_t hash_len,
+               enum blacklist_hash_type hash_type)
+{
+       const char *buffer;
+       int err;
+
+       buffer = get_raw_hash(hash, hash_len, hash_type);
+       if (IS_ERR(buffer))
+               return PTR_ERR(buffer);
+       err = mark_raw_hash_blacklisted(buffer);
+       kfree(buffer);
+       return err;
+}
+
 /**
  * is_hash_blacklisted - Determine if a hash is blacklisted
  * @hash: The hash to be checked as a binary blob
  * @hash_len: The length of the binary hash
- * @type: Type of hash
+ * @hash_type: Type of hash
  */
-int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type)
+int is_hash_blacklisted(const u8 *hash, size_t hash_len,
+               enum blacklist_hash_type hash_type)
 {
        key_ref_t kref;
-       size_t type_len = strlen(type);
-       char *buffer, *p;
+       const char *buffer;
        int ret = 0;
 
-       buffer = kmalloc(type_len + 1 + hash_len * 2 + 1, GFP_KERNEL);
-       if (!buffer)
-               return -ENOMEM;
-       p = memcpy(buffer, type, type_len);
-       p += type_len;
-       *p++ = ':';
-       bin2hex(p, hash, hash_len);
-       p += hash_len * 2;
-       *p = 0;
-
+       buffer = get_raw_hash(hash, hash_len, hash_type);
+       if (IS_ERR(buffer))
+               return PTR_ERR(buffer);
        kref = keyring_search(make_key_ref(blacklist_keyring, true),
                              &key_type_blacklist, buffer, false);
        if (!IS_ERR(kref)) {
@@ -144,7 +243,8 @@ EXPORT_SYMBOL_GPL(is_hash_blacklisted);
 
 int is_binary_blacklisted(const u8 *hash, size_t hash_len)
 {
-       if (is_hash_blacklisted(hash, hash_len, "bin") == -EKEYREJECTED)
+       if (is_hash_blacklisted(hash, hash_len, BLACKLIST_HASH_BINARY) ==
+                       -EKEYREJECTED)
                return -EPERM;
 
        return 0;
@@ -166,8 +266,10 @@ int add_key_to_revocation_list(const char *data, size_t size)
                                   NULL,
                                   data,
                                   size,
-                                  ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW),
-                                  KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN);
+                                  KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH
+                                  | KEY_USR_VIEW,
+                                  KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN
+                                  | KEY_ALLOC_BYPASS_RESTRICTION);
 
        if (IS_ERR(key)) {
                pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key));
@@ -194,30 +296,57 @@ int is_key_on_revocation_list(struct pkcs7_message *pkcs7)
 }
 #endif
 
+static int restrict_link_for_blacklist(struct key *dest_keyring,
+               const struct key_type *type, const union key_payload *payload,
+               struct key *restrict_key)
+{
+       if (type == &key_type_blacklist)
+               return 0;
+       return -EOPNOTSUPP;
+}
+
 /*
  * Initialise the blacklist
+ *
+ * The blacklist_init() function is registered as an initcall via
+ * device_initcall().  As a result if the blacklist_init() function fails for
+ * any reason the kernel continues to execute.  While cleanly returning -ENODEV
+ * could be acceptable for some non-critical kernel parts, if the blacklist
+ * keyring fails to load it defeats the certificate/key based deny list for
+ * signed modules.  If a critical piece of security functionality that users
+ * expect to be present fails to initialize, panic()ing is likely the right
+ * thing to do.
  */
 static int __init blacklist_init(void)
 {
        const char *const *bl;
+       struct key_restriction *restriction;
 
        if (register_key_type(&key_type_blacklist) < 0)
                panic("Can't allocate system blacklist key type\n");
 
+       restriction = kzalloc(sizeof(*restriction), GFP_KERNEL);
+       if (!restriction)
+               panic("Can't allocate blacklist keyring restriction\n");
+       restriction->check = restrict_link_for_blacklist;
+
        blacklist_keyring =
                keyring_alloc(".blacklist",
                              GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
-                             (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                             KEY_USR_VIEW | KEY_USR_READ |
-                             KEY_USR_SEARCH,
-                             KEY_ALLOC_NOT_IN_QUOTA |
+                             KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+                             KEY_POS_WRITE |
+                             KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH
+#ifdef CONFIG_SYSTEM_BLACKLIST_AUTH_UPDATE
+                             | KEY_USR_WRITE
+#endif
+                             , KEY_ALLOC_NOT_IN_QUOTA |
                              KEY_ALLOC_SET_KEEP,
-                             NULL, NULL);
+                             restriction, NULL);
        if (IS_ERR(blacklist_keyring))
                panic("Can't allocate system blacklist keyring\n");
 
        for (bl = blacklist_hashes; *bl; bl++)
-               if (mark_hash_blacklisted(*bl) < 0)
+               if (mark_raw_hash_blacklisted(*bl) < 0)
                        pr_err("- blacklisting failed\n");
        return 0;
 }
index 91a4ad50dea268d20fec0f944e64ff3425ed82d6..77ed4e93ad56ff4055b4909487d1fb0f52e7f35c 100644 (file)
@@ -69,7 +69,8 @@ int x509_get_sig_params(struct x509_certificate *cert)
        if (ret < 0)
                goto error_2;
 
-       ret = is_hash_blacklisted(sig->digest, sig->digest_size, "tbs");
+       ret = is_hash_blacklisted(sig->digest, sig->digest_size,
+                                 BLACKLIST_HASH_X509_TBS);
        if (ret == -EKEYREJECTED) {
                pr_err("Cert %*phN is blacklisted\n",
                       sig->digest_size, sig->digest);
index f45979aa2d648d61d5e58c8348e4589326c38815..ec0e22a1e25d64bd0d7add3e54892092f6a31cd7 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/tick.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/perf_event.h>
 #include <asm/mwait.h>
 #include <xen/xen.h>
 
@@ -164,6 +165,9 @@ static int power_saving_thread(void *data)
                                tsc_marked_unstable = 1;
                        }
                        local_irq_disable();
+
+                       perf_lopwr_cb(true);
+
                        tick_broadcast_enable();
                        tick_broadcast_enter();
                        stop_critical_timings();
@@ -172,6 +176,9 @@ static int power_saving_thread(void *data)
 
                        start_critical_timings();
                        tick_broadcast_exit();
+
+                       perf_lopwr_cb(false);
+
                        local_irq_enable();
 
                        if (time_before(expire_time, jiffies)) {
index 990ff5b0aeb875d919efad799aa36fb6d5183f8e..e07782b1fbb68eb6ca3d7a7552387266519f1d83 100644 (file)
@@ -1707,24 +1707,23 @@ static int acpi_video_resume(struct notifier_block *nb,
        int i;
 
        switch (val) {
-       case PM_HIBERNATION_PREPARE:
-       case PM_SUSPEND_PREPARE:
-       case PM_RESTORE_PREPARE:
-               return NOTIFY_DONE;
-       }
-
-       video = container_of(nb, struct acpi_video_bus, pm_nb);
-
-       dev_info(&video->device->dev, "Restoring backlight state\n");
+       case PM_POST_HIBERNATION:
+       case PM_POST_SUSPEND:
+       case PM_POST_RESTORE:
+               video = container_of(nb, struct acpi_video_bus, pm_nb);
+
+               dev_info(&video->device->dev, "Restoring backlight state\n");
+
+               for (i = 0; i < video->attached_count; i++) {
+                       video_device = video->attached_array[i].bind_info;
+                       if (video_device && video_device->brightness)
+                               acpi_video_device_lcd_set_level(video_device,
+                                               video_device->brightness->curr);
+               }
 
-       for (i = 0; i < video->attached_count; i++) {
-               video_device = video->attached_array[i].bind_info;
-               if (video_device && video_device->brightness)
-                       acpi_video_device_lcd_set_level(video_device,
-                                       video_device->brightness->curr);
+               return NOTIFY_OK;
        }
-
-       return NOTIFY_OK;
+       return NOTIFY_DONE;
 }
 
 static acpi_status
index 725e2f65cdca26f9516212adceb7c567a55495eb..0a50b49125154f537df81aac6af60ab21a33bcf3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: acapps - common include for ACPI applications/tools
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -17,7 +17,7 @@
 /* Common info for tool signons */
 
 #define ACPICA_NAME                 "Intel ACPI Component Architecture"
-#define ACPICA_COPYRIGHT            "Copyright (c) 2000 - 2021 Intel Corporation"
+#define ACPICA_COPYRIGHT            "Copyright (c) 2000 - 2022 Intel Corporation"
 
 #if ACPI_MACHINE_WIDTH == 64
 #define ACPI_WIDTH          " (64-bit version)"
index be3826f46f885114288a4a44c47087f54ad03705..bb329e34ee7d405f955fd3f15c1367e81cc4a85a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: accommon.h - Common include files for generation of ACPICA source
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 53b41c7a6119a021faee4aa4d2f7188ccb91d901..476d21e67767505d0077303d8f3a35ae74e07123 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: acapps - common include for ACPI applications/tools
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3ccc7b2a76f18ad78c5f5f9d23505692720a4304..d629716aa5b204c6f636665db69d545e8456be28 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acdebug.h - ACPI/AML debugger
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3170a24fe5059e2eb9270a65efe0d24eb9a52a38..fe2c3630a38d78b33a4025e027a5b8177b171fdb 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acdispat.h - dispatcher (parser to interpreter interface)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b29ba436944ad5a29065c326c409aa3eedee916c..922f559a3e5902f380099307c742ca30f63a723e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acevents.h - Event subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 4366d36ef11985d98e6be87815367f5b9a1ca1d2..088d6a7d052cd8925bd6bafb167fb134c2869c9e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acglobal.h - Declarations for global variables
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 810de0b4c12562c11f15710635c469533fed9f60..6f2787506b50c01bf999f6bbbb1e1642b6fdddb2 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: achware.h -- hardware specific interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 816a16e1fc4cf3233c2555c3aaca92dbc8bc041f..6bdf133a2767fea22b77a541761699907ca05890 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acinterp.h - Interpreter subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index be57436182a123851d46373aab83cd9ea36fb5d0..901b1543b869029eeb4d519875f93a199faa0b27 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: aclocal.h - Internal data types used across the ACPI subsystem
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 93bd2d19c1568159d5e5f07703602ee5e3f9fb18..2f3e609df47deeccd92e616e341b9d97ea110927 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acmacros.h - C macros for the entire subsystem.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 199aabac3790bd7f2c2fba895ed6d9a0ca2c5e74..7b27b9cc5916766f65206a220a29b31f3b5f6a0e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acnamesp.h - Namespace subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 0aa0d847cb255002513f6c9f79235e378510a280..6af5dc9950850c12857ef1acff4117d1ecfc635d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acobject.h - Definition of union acpi_operand_object  (Internal object only)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index c3f12ee9fc6f3b162c2e24fd936d6de446fd8bb9..a224926bd9c8a13f99f403953713a1fdd7110298 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acopcode.h - AML opcode information for the AML parser and interpreter
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 8e40e5909458f44fb1115ac4a98d9d226f24b079..4511c2bd8bc39dd49b666bfa31b4f0821c2a31c5 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: acparser.h - AML Parser subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5951b433c30434bdd7c746b605af2a30028ed67b..f7d65a20026b0d1ce142ad27075b2ec1e01e7b75 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acpredef - Information table for ACPI predefined methods and objects
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 37c47e185fd47ccbceca873daf8fd7f3fade5763..f7749c63d277267be69a364fe9a33337eb17c84e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acresrc.h - Resource Manager function prototypes
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e3beb096c46de0eb5b8a185135ada35a25f44c93..b859de96a1e4d15d68d046762c0ee771cbc0748c 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acstruct.h - Internal structs
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 533802fe73e90d64658e0a7699d44e5727824f13..f8d7bfd737df27b8ad73050b605ac3a218d57e40 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: actables.h - ACPI table management
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 59d6ded01614f4902ada5a93c9fea6800560c1e2..6e6270f96bfb9ffe8a944229e97d2fc9c4647b7e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acutils.h -- prototypes for the common (subsystem-wide) procedures
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d6b088c5001f91cd482360735915f5108af74319..62a7ec2775131ee0c12fecd96ac067fee5d33051 100644 (file)
@@ -5,7 +5,7 @@
  *                   Declarations and definitions contained herein are derived
  *                   directly from the ACPI specification.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b98123210281e6c00f209b138d919babb8803dc0..b31779ce204a59417eaaecf129a00651229ff77f 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: amlresrc.h - AML resource descriptors
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index fd813c5d3952951a179c5d87e34b155ec1f3f1d6..105e6ceaa887581a72852be7e1a2e7b754ad87a9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dbhistry - debugger HISTORY command
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 6630d6536fb0ed935785874358ca70a7536d5c8e..2963d1579c05cce171bf7993e91ae9351e3323d5 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: dsargs - Support for execution of dynamic arguments for static
  *                       objects (regions, fields, buffer fields, etc.)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index a152f03135cdb4ffa31f91cedaf260a35069daf0..8492619149d1049ff8771a43197b2bff3d7990a4 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: dscontrol - Support for execution control opcodes -
  *                          if/else/while/return
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b9b03d6299303bce5075a80386f1227dcb76e443..2d99ccf5bde7da7a7207b099530579ed02968661 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dsdebug - Parser/Interpreter interface - debugging
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index a1681776796915e41944239a82a22210ed9edb7e..de175f1b4bebea8f27fe3200432a6bb02cc2f1ad 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dsfield - Dispatcher field routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index ba6f882e83bcfe0708e8b10b47849880f0b37f23..dffd54fdbd518e9f5eb7bd6201bb9963d0f9b74d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dsinit - Object initialization namespace walk
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 8e011e59b9b48ac467d67545a211de624c9338fc..ae2e768830bfc779fae6b928d72d3e6418c980d7 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dsmethod - Parser/Interpreter interface - control method parsing
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3c0c31157e7e029581310b276adff1d909eca8ff..e3dfc734ace9e306df0ace6e7f4db1ba18d00349 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dsobject - Dispatcher object management routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 44c448269861a3aea8edbbe4ce837d4d815e4a6f..2b9b6a974ca9466bba50878d246374f70299503b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dsopcode - Dispatcher support for regions and fields
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e642d65bcc66effa0a3e9e991db612a270a87a15..1624d6e7dc46d5a7144adfadd2bc2ef2c8d91f6b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dspkginit - Completion of deferred package initialization
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index f2d2267054af1726d6253c6504f0855062c2f33f..e8ad41387f84c19452a306b9bba2c1e147d273dd 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: dswexec - Dispatcher method execution callbacks;
  *                        dispatch to interpreter.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -30,7 +30,7 @@ static acpi_execute_op acpi_gbl_op_type_dispatch[] = {
        acpi_ex_opcode_0A_0T_1R,
        acpi_ex_opcode_1A_0T_0R,
        acpi_ex_opcode_1A_0T_1R,
-       acpi_ex_opcode_1A_1T_0R,
+       NULL,                   /* Was: acpi_ex_opcode_1A_0T_0R (Was for Load operator) */
        acpi_ex_opcode_1A_1T_1R,
        acpi_ex_opcode_2A_0T_0R,
        acpi_ex_opcode_2A_0T_1R,
index a377638e44f97e3d5116108bce0ce962760e167b..9f6573646ab5a10402d780a12b0c58512137bdb3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dswload - Dispatcher first pass namespace load callbacks
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3625952c395700e5c080e0f1894d99d1e2aa643e..778df616aaa006d6bda6fb0f1fc22429687b85c0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dswload2 - Dispatcher second pass namespace load callbacks
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 9c123af08bc156af5c3df4fbef460e99d69d8451..634b9100f674b518ebb44f983ade1ceab2b808c8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dswscope - Scope stack manipulation
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index fbe2ba05c82a66504dc5a495c59e15ea522afe9f..0aa735d3b93cc559ddfef41d8f198cc89e2e5e7f 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: dswstate - Dispatcher parse tree walk management routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 35385148fedb77c26d1f3e1b2ba60adb62410e55..df596d46dd974eba813e0431fdf5cda2e21f9445 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evevent - Fixed Event handling and dispatch
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index de4eea606ccd91c52cc47701006294de9553e80e..9aab54797ded929449505222ca0510b1dabd2419 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evglock - Global Lock support
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index c5a06882bdf636a5f6e9f8dd51aa1c7dbc555ce3..a6bb480d631c3474498cc385add2ac75c5e913fb 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpe - General Purpose Event handling and dispatch
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e5f8245c2d9339bfa8c22ca0f62dfed7712f1f3c..39fe4566310ba3cc070037e84537021ad6db1d54 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeblk - GPE block creation and initialization.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b0724d6e6e8030611ea2beb805dfef5a7fb932da..2f1a75fee61cf11d8328b5dc1355e254052fbda7 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeinit - System GPE initialization and update
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2e74308d77258d56c64f31dd1141e812a0ec4ca3..c32eb57aa21db41e5570517b89191244c01ef955 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeutil - GPE utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 8f43d38dc4ca203efed8352b608771e5f240b758..be9a05498adc356454955991b53bacf05efd8a0b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evhandler - Support for Address Space handlers
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index f14ebcd610abe9bd7b91fde0b65a89e73be381f1..6172cddc1b396225871297d4638f9bb07ac16d13 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evmisc - Miscellaneous event manager support functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b9d77d327d38fb25338ab7da7d1f86fe827370ae..b96b3a7e78e50acdcab4ac0239d8a223a05068dd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evregion - Operation Region support
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d28dee929e61a72c4fc9fb952ffd0cb07ee7a832..ca4ba6b351feefabbe93e0380688e8edd2ec5510 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evrgnini- ACPI address_space (op_region) init
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index ff5cf5b0705a8d69bc9553586ce39b54704bf974..18219abba108aa182e288b23b83ea531b993d80c 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evxface - External interfaces for ACPI events
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5445a361c621fde952e0b7c6d1ac8635672cb399..8187b081e0a613c40174ac00d57496dca1c6aee0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evxfevnt - External Interfaces, ACPI event disable/enable
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index a6d53cf864500b877f0f084633eeed4d69c9ce11..340947e412bbf782ad81de88dab2b5f11a5bebd9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: evxfgpe - External Interfaces for General Purpose Events (GPEs)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b1ff0a8f9c14fe3672c84893d20c948b86b0b695..0a8372bf6a77d199ae1b5955b5c2e52f19065527 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: evxfregn - External Interfaces, ACPI Operation Regions and
  *                         Address Spaces.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2d220d470c60f358cc91559054312718bf87042d..66201742f499498e983402b820ed1d0d90ab30a8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exconcat - Concatenate-type AML operators
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 6c2685a6a4c1cc89333df447c9c568dc454de936..d7d74ef87b186164b1e45e9a19279df192f743d3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exconfig - Namespace reconfiguration (Load/Unload opcodes)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -87,11 +87,21 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
        struct acpi_namespace_node *parent_node;
        struct acpi_namespace_node *start_node;
        struct acpi_namespace_node *parameter_node = NULL;
+       union acpi_operand_object *return_obj;
        union acpi_operand_object *ddb_handle;
        u32 table_index;
 
        ACPI_FUNCTION_TRACE(ex_load_table_op);
 
+       /* Create the return object */
+
+       return_obj = acpi_ut_create_integer_object((u64)0);
+       if (!return_obj) {
+               return_ACPI_STATUS(AE_NO_MEMORY);
+       }
+
+       *return_desc = return_obj;
+
        /* Find the ACPI table in the RSDT/XSDT */
 
        acpi_ex_exit_interpreter();
@@ -106,12 +116,6 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
 
                /* Table not found, return an Integer=0 and AE_OK */
 
-               ddb_handle = acpi_ut_create_integer_object((u64) 0);
-               if (!ddb_handle) {
-                       return_ACPI_STATUS(AE_NO_MEMORY);
-               }
-
-               *return_desc = ddb_handle;
                return_ACPI_STATUS(AE_OK);
        }
 
@@ -198,7 +202,13 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
                }
        }
 
-       *return_desc = ddb_handle;
+       /* Remove the reference to ddb_handle created by acpi_ex_add_table above */
+
+       acpi_ut_remove_reference(ddb_handle);
+
+       /* Return -1 (non-zero) indicates success */
+
+       return_obj->integer.value = 0xFFFFFFFFFFFFFFFF;
        return_ACPI_STATUS(status);
 }
 
@@ -249,7 +259,7 @@ acpi_ex_region_read(union acpi_operand_object *obj_desc, u32 length, u8 *buffer)
  *
  * PARAMETERS:  obj_desc        - Region or Buffer/Field where the table will be
  *                                obtained
- *              target          - Where a handle to the table will be stored
+ *              target          - Where the status of the load will be stored
  *              walk_state      - Current state
  *
  * RETURN:      Status
@@ -278,6 +288,20 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
 
        ACPI_FUNCTION_TRACE(ex_load_op);
 
+       if (target->common.descriptor_type == ACPI_DESC_TYPE_NAMED) {
+               target =
+                   acpi_ns_get_attached_object(ACPI_CAST_PTR
+                                               (struct acpi_namespace_node,
+                                                target));
+       }
+       if (target->common.type != ACPI_TYPE_INTEGER) {
+               ACPI_EXCEPTION((AE_INFO, AE_TYPE,
+                               "Type not integer: %X\n", target->common.type));
+               return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
+       }
+
+       target->integer.value = 0;
+
        /* Source Object can be either an op_region or a Buffer/Field */
 
        switch (obj_desc->common.type) {
@@ -430,9 +454,6 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
         */
        status = acpi_ex_add_table(table_index, &ddb_handle);
        if (ACPI_FAILURE(status)) {
-
-               /* On error, table_ptr was deallocated above */
-
                return_ACPI_STATUS(status);
        }
 
@@ -442,21 +463,13 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
        acpi_ns_initialize_objects();
        acpi_ex_enter_interpreter();
 
-       /* Store the ddb_handle into the Target operand */
+       /* Remove the reference to ddb_handle created by acpi_ex_add_table above */
 
-       status = acpi_ex_store(ddb_handle, target, walk_state);
-       if (ACPI_FAILURE(status)) {
-               (void)acpi_ex_unload_table(ddb_handle);
-
-               /* table_ptr was deallocated above */
-
-               acpi_ut_remove_reference(ddb_handle);
-               return_ACPI_STATUS(status);
-       }
+       acpi_ut_remove_reference(ddb_handle);
 
-       /* Remove the reference by added by acpi_ex_store above */
+       /* Return -1 (non-zero) indicates success */
 
-       acpi_ut_remove_reference(ddb_handle);
+       target->integer.value = 0xFFFFFFFFFFFFFFFF;
        return_ACPI_STATUS(status);
 }
 
index 6b7498371eb0d55cc20ae82af30d89af5d6b5996..8de5d47ad485dd56b8f8f1450766e1769aaf674d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exconvrt - Object conversion routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index deb3674ae726feff83417ee56780a1a5cefb1b37..fb2453fa9442898dfbe26b39e9240849d2a0bd9e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: excreate - Named object creation
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 6a01e38b7d5aec428eefc2731996665bf03168b9..8a99aadb9d155ee04072489b20892a5405909d23 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exdebug - Support for stores to the AML Debug Object
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2aea44ecc37d0562e05ecd821d83249fcc16f3ce..24b3d041b3e5a0533c1ce37a0572ef0a49c7b750 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exdump - Interpreter debug output routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 8618500f23b3943e72aebc8a065993cb36266694..2b89a496de65c69a652e0f4bf6c63ba865726991 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exfield - AML execution - field_unit read/write
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index bdc7a30d1217c6bd09806172eb4302c95dfb6810..d769cea1468bbb333a21f25fed63d098ed738ef5 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exfldio - Aml Field I/O
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -104,7 +104,7 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc,
 #ifdef ACPI_UNDER_DEVELOPMENT
        /*
         * If the Field access is any_acc, we can now compute the optimal
-        * access (because we know know the length of the parent region)
+        * access (because we know the length of the parent region)
         */
        if (!(obj_desc->common.flags & AOPOBJ_DATA_VALID)) {
                if (ACPI_FAILURE(status)) {
index ad19f914641bf67349d36179846e0c38f3edac64..b4bac8c60a131f7a3f286aa502e1e61e2dc931e2 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exmisc - ACPI AML (p-code) execution - specific opcodes
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 6237ae8284b1eb1707c26abee6e6c559373e0a9a..e9dcfa1e93eb800337ae66241fab71f850a96913 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exmutex - ASL Mutex Acquire/Release functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5283603d078d5ef3c34785f558c7c06e4b544c44..318eb769058d8f7e8dfcb155ce858adf7ba73f14 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exnames - interpreter/scanner name load/execute
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 44b7c350ed5ca4cd658ae5e55c410887871fb092..d108a1a86f12f25000c329fde155b6b9e4e26daf 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg1 - AML execution - opcodes with 1 argument
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -163,6 +163,7 @@ acpi_status acpi_ex_opcode_1A_0T_0R(struct acpi_walk_state *walk_state)
        return_ACPI_STATUS(status);
 }
 
+#ifdef _OBSOLETE_CODE          /* Was originally used for Load() operator */
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ex_opcode_1A_1T_0R
@@ -187,10 +188,12 @@ acpi_status acpi_ex_opcode_1A_1T_0R(struct acpi_walk_state *walk_state)
        /* Examine the AML opcode */
 
        switch (walk_state->opcode) {
+#ifdef _OBSOLETE_CODE
        case AML_LOAD_OP:
 
                status = acpi_ex_load_op(operand[0], operand[1], walk_state);
                break;
+#endif
 
        default:                /* Unknown opcode */
 
@@ -204,6 +207,7 @@ cleanup:
 
        return_ACPI_STATUS(status);
 }
+#endif
 
 /*******************************************************************************
  *
@@ -215,6 +219,8 @@ cleanup:
  *
  * DESCRIPTION: Execute opcode with one argument, one target, and a
  *              return value.
+ *              January 2022: Added Load operator, with new ACPI 6.4
+ *              semantics.
  *
  ******************************************************************************/
 
@@ -239,6 +245,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
        case AML_FIND_SET_LEFT_BIT_OP:
        case AML_FIND_SET_RIGHT_BIT_OP:
        case AML_FROM_BCD_OP:
+       case AML_LOAD_OP:
        case AML_TO_BCD_OP:
        case AML_CONDITIONAL_REF_OF_OP:
 
@@ -338,6 +345,20 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
                        }
                        break;
 
+               case AML_LOAD_OP:       /* Result1 = Load (Operand[0], Result1) */
+
+                       return_desc->integer.value = 0;
+                       status =
+                           acpi_ex_load_op(operand[0], return_desc,
+                                           walk_state);
+                       if (ACPI_SUCCESS(status)) {
+
+                               /* Return -1 (non-zero) indicates success */
+
+                               return_desc->integer.value = 0xFFFFFFFFFFFFFFFF;
+                       }
+                       break;
+
                case AML_TO_BCD_OP:     /* to_bcd (Operand, Result) */
 
                        return_desc->integer.value = 0;
index 10323ab186da42ac4775db0d01cb20f336a029e2..ebf7c89d52d9978fbddb80e7ac9f5b0746790b27 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg2 - AML execution - opcodes with 2 arguments
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 140aae0096904a99251e1661d1231050d52daef3..4b069bd6bc71e0fa064aa7b782848e09a093e2d9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg3 - AML execution - opcodes with 3 arguments
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2cf9f37a0ba80ef8095a858000d55668ed807629..2a506ef386cfba6ea310893f664b4ac34a316216 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg6 - AML execution - opcodes with 6 arguments
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d8c55dde191b1d583a6b949bd6accc0df61fa533..08f06797386af41598a6687877c1139fe615cfe0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exprep - ACPI AML field prep utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 48c19908fa4e4b701b12f0538fedf8487dc21567..4ff35852c0b38ddef6982862a772cdb788423be0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exregion - ACPI default op_region (address space) handlers
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d80b76455c505ec15e2fa0a3c07f4ac1afa30c2f..b81506d73447b8f4fb0a678ae6b86ba39769265a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exresnte - AML Interpreter object resolution
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index fa6a96242835f0991e8fe045c1cdea96296f0568..61ee7fb46006fbc200fdc5839a324b0ef1e1e8fc 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exresolv - AML Interpreter object resolution
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index cbe2c88b1dc2507edea403381f991d615c76d8e8..3342780230af41a36262f6f45e867fc955e79bd5 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exresop - AML Interpreter operand/object resolution
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 10d68a5f76a32c96e77c714156c98ada47da93e4..4da20d7845df1ee87d0d6efab5cc16fdb8de1e42 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exserial - field_unit support for serial address spaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 12f4210ea085d62673f90232033640c7e19e87f3..f99e8cf27a6cfcb107f0ec15c72398c28ae72df3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exstore - AML Interpreter object store support
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 08469d37e73e0ec9a5aa6276e79bf76f6675fd76..c848b328e76063697abe648942c0d2fe539f7790 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: exstoren - AML Interpreter object store support,
  *                        Store to Node (namespace object)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index a826286833299113a6f3d2371568f6db6df83864..45c757bbf9a971e05eb475475ce3f62d8603ff6f 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exstorob - AML object store support, store to object
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 1281c07112de76e773505d752d95b0ff71a90ec4..7b5470f404f3f85077337776901fd9714ded82f7 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exsystem - Interface to OS services
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -107,7 +107,7 @@ acpi_status acpi_ex_system_wait_mutex(acpi_mutex mutex, u16 timeout)
  *
  * FUNCTION:    acpi_ex_system_do_stall
  *
- * PARAMETERS:  how_long        - The amount of time to stall,
+ * PARAMETERS:  how_long_us     - The amount of time to stall,
  *                                in microseconds
  *
  * RETURN:      Status
@@ -120,24 +120,29 @@ acpi_status acpi_ex_system_wait_mutex(acpi_mutex mutex, u16 timeout)
  *
  ******************************************************************************/
 
-acpi_status acpi_ex_system_do_stall(u32 how_long)
+acpi_status acpi_ex_system_do_stall(u32 how_long_us)
 {
        acpi_status status = AE_OK;
 
        ACPI_FUNCTION_ENTRY();
 
-       if (how_long > 255) {   /* 255 microseconds */
+       if (how_long_us > 255) {
                /*
-                * Longer than 255 usec, this is an error
+                * Longer than 255 microseconds, this is an error
                 *
                 * (ACPI specifies 100 usec as max, but this gives some slack in
                 * order to support existing BIOSs)
                 */
                ACPI_ERROR((AE_INFO,
-                           "Time parameter is too large (%u)", how_long));
+                           "Time parameter is too large (%u)", how_long_us));
                status = AE_AML_OPERAND_VALUE;
        } else {
-               acpi_os_stall(how_long);
+               if (how_long_us > 100) {
+                       ACPI_WARNING((AE_INFO,
+                                     "Time parameter %u us > 100 us violating ACPI spec, please fix the firmware.",
+                                     how_long_us));
+               }
+               acpi_os_stall(how_long_us);
        }
 
        return (status);
@@ -147,7 +152,7 @@ acpi_status acpi_ex_system_do_stall(u32 how_long)
  *
  * FUNCTION:    acpi_ex_system_do_sleep
  *
- * PARAMETERS:  how_long        - The amount of time to sleep,
+ * PARAMETERS:  how_long_ms     - The amount of time to sleep,
  *                                in milliseconds
  *
  * RETURN:      None
@@ -156,7 +161,7 @@ acpi_status acpi_ex_system_do_stall(u32 how_long)
  *
  ******************************************************************************/
 
-acpi_status acpi_ex_system_do_sleep(u64 how_long)
+acpi_status acpi_ex_system_do_sleep(u64 how_long_ms)
 {
        ACPI_FUNCTION_ENTRY();
 
@@ -168,11 +173,11 @@ acpi_status acpi_ex_system_do_sleep(u64 how_long)
         * For compatibility with other ACPI implementations and to prevent
         * accidental deep sleeps, limit the sleep time to something reasonable.
         */
-       if (how_long > ACPI_MAX_SLEEP) {
-               how_long = ACPI_MAX_SLEEP;
+       if (how_long_ms > ACPI_MAX_SLEEP) {
+               how_long_ms = ACPI_MAX_SLEEP;
        }
 
-       acpi_os_sleep(how_long);
+       acpi_os_sleep(how_long_ms);
 
        /* And now we must get the interpreter again */
 
index 8846f483fb020f9984b15c5f2cf3aff4d2899dc8..b570d7a7e13402bbeab1d08495e60b6c093388da 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: extrace - Support for interpreter execution tracing
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 4d41a866f63338a81dd4ebc5487cd19a51d8f672..87f01ce1c1aa4331b3e4797f385a92e833451c96 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: exutils - interpreter/scanner utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 96f55f0799881cb221724c3a73fb94f490fd8bf6..2f1c2fc8bd2a4abb6fe48e867c8b9b1b4edf12af 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: hwacpi - ACPI Hardware Initialization/Mode Interface
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7ee2939c08cd4d6a06cbd7161253c12750ea65b1..d8597e052912fe48259bc57c9a2621859f463877 100644 (file)
@@ -4,7 +4,7 @@
  * Name: hwesleep.c - ACPI Hardware Sleep/Wake Support functions for the
  *                    extended FADT-V5 sleep registers.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 0770aa176cd505f0dbce9037cf8b9f7e76ae9ca4..13d54a48e6e953039f73637c6023a93067e8d102 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: hwgpe - Low level GPE enable/disable/clear functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 69603ba52a3accc41d71b35ac863d6d4c7498f0d..f62d5d0242058474ed7b92af99d44372fcc5c6f7 100644 (file)
@@ -446,7 +446,7 @@ struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id)
  * RETURN:      Status
  *
  * DESCRIPTION: Write the PM1 A/B control registers. These registers are
- *              different than than the PM1 A/B status and enable registers
+ *              different than the PM1 A/B status and enable registers
  *              in that different values can be written to the A/B registers.
  *              Most notably, the SLP_TYP bits can be different, as per the
  *              values returned from the _Sx predefined methods.
index 5efa3d8e483e01c9236de20e19aad13b2a4ad0d1..bd936476dda9667c1b28714c867dce9703761479 100644 (file)
@@ -4,7 +4,7 @@
  * Name: hwsleep.c - ACPI Hardware Sleep/Wake Support functions for the
  *                   original/legacy sleep/PM registers.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 63deadde9f48d3d933804911133b7c9e0575eb40..46f3ae03ab996859ecafa3677a855230aa70a2d6 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: hwtimer.c - ACPI Power Management Timer Interface
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e15badf4077aabbc46c9b62d35f39f9167ddb006..915b26448d2c9114bba6a1155b5e53f78ab89d31 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: hwvalid - I/O request validation
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index fb27aaad0dee2345e5ebb28eec535b7eb6c27ff3..55d9b897e70f1038c36c0858d18a208d7ed8c4cc 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: hwxface - Public ACPICA hardware interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index ba77598ee43e8af16bf3e32c0e34f1beed1b14b9..aff51ceea02cc19505529d4165d804180c3a173c 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: hwxfsleep.c - ACPI Hardware Sleep/Wake External Interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index c8a2747005c5b2e1d73ba5c9dfa27cd94c29540f..22586b90e532ea1656b6412e5dfb6a31fe9cc4fd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsarguments - Validation of args for ACPI predefined methods
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 597d0eed23c166c7a3ee5d49b59ac7b2785a8266..b02555fe38f04071206dfbf2ea8d1acf90734fcc 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: nsconvert - Object conversions for objects returned by
  *                          predefined methods
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2f66f3ed1810c65970efc7d3a0155526287fa6c1..f154824d4eb69f1499f6571cd79d585b0b2e1a06 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsdump - table dumping routines for debug
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d3dc6761bcddbceb96edd5e8fd4965906f519916..b9a88b7b518b125ffb1e20217db78edc1b1783f7 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsdump - table dumping routines for debug
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 4db81f8ba29ba00237824e3567e138e07a2df154..3e6207ad18d81e75fbb0224fdd656c9b88d77ce9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsinit - namespace initialization
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7d77956ed790f39400e5c7ffbbe98c4c4448696c..880260a30c0c8a83bfdaa5d611330a0a16e3b899 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsload - namespace loading/expanding/contracting procedures
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 778f80e624befcb31f6fbc050db44d77953377c8..4b893676ab5c22c03c012c6b5c6f2f349d35c08c 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsparse - namespace interface to AML parser
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e4e5f32da7dc0393c057267f86b30396afa0c425..c0db6690bb32403fbba0e6e9a4d71f3b3cd2013b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nspredef - Validation of ACPI predefined methods and objects
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 6742b50836f79f65cabbd21e537cdb4eab95a54e..82932c9a774b4f5772b99e4d71fa91da256bc343 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsprepkg - Validation of package objects for predefined names
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 499067daa22c65a326d241302c2db39923612b8e..367fcd201f96e3e2d193b86f4b7e9624dc7dcd83 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nsrepair - Repair for objects returned by predefined methods
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 14b71b41e845370a20613a69a27fbf0da7130919..dd533c887e3a2466824cac84100f0f4cbdc0ff3a 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: nsrepair2 - Repair for objects returned by specific
  *                          predefined methods
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 83d0f276da4d52a9948b3b2b0906a7fdac886347..ef531b145adde0d820dbd7de45a88736abbdea3b 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: nsutils - Utilities for accessing ACPI namespace, accessing
  *                        parents and siblings and Scope manipulation
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e7c30ce06e189cd6babe398a600c635af758c65a..82a0dae349e2fddc3bc03f4a8c713d8701087451 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: nswalk - Functions for walking the ACPI namespace
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 03487546da5a7135c87001342a3371b2b8e89512..b2cfdfef319474842376fe792fc64b352ed960cc 100644 (file)
@@ -4,7 +4,7 @@
  * Module Name: nsxfname - Public interfaces to the ACPI subsystem
  *                         ACPI Namespace oriented interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b9ff535aa02e694001ad89f173c4eda0adc27ccc..f7ec5606098c761bb861c29cd2b2a28767d2a530 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psargs - Parse AML opcode arguments
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 4b51dd939f29ac92ac142298edc3f856cf948944..840512fa9fc61b9d3edacfa2d1a1e3725c141464 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psloop - Main AML parse loop
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e4420cd6d2814c835111543b28286c03b2bafce2..bca249e67c6b53f7d56bd60f608409237b645686 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psobject - Support for parse objects
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3e80eb1a5f35c1dc1aabef1ef2f7c564382cec1e..bef69e87a0a29ecbf4ba7c07b7d7d1c8c59eaab2 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psopcode - Parser/Interpreter opcode information table
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -408,8 +408,8 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
                         AML_HAS_ARGS | AML_NSOBJECT | AML_NSNODE |
                         AML_DEFER | AML_FIELD | AML_CREATE),
 /* 4A */ ACPI_OP("Load", ARGP_LOAD_OP, ARGI_LOAD_OP, ACPI_TYPE_ANY,
-                        AML_CLASS_EXECUTE, AML_TYPE_EXEC_1A_1T_0R,
-                        AML_FLAGS_EXEC_1A_1T_0R),
+                        AML_CLASS_EXECUTE, AML_TYPE_EXEC_1A_1T_1R,
+                        AML_FLAGS_EXEC_1A_1T_1R),
 /* 4B */ ACPI_OP("Stall", ARGP_STALL_OP, ARGI_STALL_OP, ACPI_TYPE_ANY,
                         AML_CLASS_EXECUTE, AML_TYPE_EXEC_1A_0T_0R,
                         AML_FLAGS_EXEC_1A_0T_0R),
index 476b00a121f3d8704225158815629610bc471ccc..f10afe699ad76e593c385407e7e1976942317478 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psopinfo - AML opcode information functions and dispatch tables
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7eb7a81619a36547165f24ed5e073219aaebc5cb..ba93f359760a900612ca727cbae8faf293343b9d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psparse - Parser top level AML parse routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3f2eada44942fa024ea594bc493ddfc9e4d23e0f..400f001631ea64aad6501fe1f2bd0233ec76fc91 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psscope - Parser scope stack management routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index ffb2a7bfc6d7eb6e0aca87441ec68cd9bf0d7cee..3012a93423676e2843292c9e9c61d68bcaa2e9f9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: pstree - Parser op tree manipulation/traversal/search
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e6596051d548612fc9eba4de4a3596ed9a3c7b79..49b39aeded12aacafdb7b7a8bae78e85284b789e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psutils - Parser miscellaneous utilities (Parser only)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7018a789debc5b98c49817d129b16d8bc45a8516..7735a01dab90428aefd16c71869862426e4384a1 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: pswalk - Parser routines to walk parsed op tree(s)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index fd0f28c7af1e68369d9402bd9e2df7bf86a8c10c..a6509aeb2955076f9ee83a24d58192a08174f7af 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: psxface - Parser external interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 20360a9db48278f1fc2c9a398f493412bf699081..a7642b34ce48fafaf88a65e76d9bbbafb6ef6aa8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbdata - Table manager data structure functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 047bd094ba68da012ec740cb91753e21d4ae3739..31d7ea84a360f4a8df18aa86be9ba7d2b86a47a3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbfadt   - FADT table utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2c2c2b1f5a28daa2fa58f4f90daab5784a89d419..c31a5ddb0ffd4455e405aceaa05523e769253811 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbfind   - find table
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5649f493a1edd13ad6e89786d986605186195100..499efcaf798dcd1452ad152187b58275efe76df9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbinstal - ACPI table installation and removal
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 4dac16bd63d3424c3148873c84f305f27b4a8ef5..595547db28c060e167b2661aa29fe409a2a51f5e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbprint - Table output utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5e8d50a4b6a9a1f4d1ae020039338547c7a5654b..633a823be65fbd33ce889b724b36aa61b25d78e2 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbutils - ACPI Table utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e6f51fedaf1a9defb4fd1bd3e1151d721c93d572..37da09dca9408bde5ef27ad864ec8c31af8be0eb 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxface - ACPI table-oriented external interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 87356d9ad613db918086bce50fe94842701c10c4..258796e02be134c694952b5953c1967b39fbbfb9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxfload - Table load/unload external interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 9fec3df6c3ba4b782021397d5c3a2b086d10d144..3d09e3f6bd434211eb078d017e153cc618054cb7 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxfroot - Find the root ACPI table (RSDT)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7001f4b113f1778fb23ab8eae8c6484afb0ab424..915321806cd7e6fa75c8af0e2e0c4fcd92d5dc4e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utaddress - op_region address range check
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 796fd9b33a7d0bdff68f0a775500346d783f66fc..2bab6017d82701ddef954ded54fda640db3e99cd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utalloc - local memory allocation routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e1b55575d5fbecff3f69e8bca4126397d6a3af33..72fb7e9ec485bc2dee74af655676566ce71bec0b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utascii - Utility ascii functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 8ab90f78825bd691d744586f31bbd854f3e6a384..59c4050b8e9176591962c2291a2d86cd24e1383c 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utbuffer - Buffer dump routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 814145019f9521854c7196fc59336d9dfc5da2a5..5425968dd2a80bf17c25645b71fbca60b9ece62e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utcache - local cache allocation routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d9877153f4001d9402cba7cc2802b0ff6156372d..400b9e15a709cee579adf9078d53caaf49e7ddb3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utcopy - Internal to external object translation utilities
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 09245945f3192fe57ac1b146c1ca438d24312fe4..64ed546cf19c238584ffb23271e02b0a25b61aa8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utdebug - Debug print/trace routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index bcd3871079d76c4b9d46a9f2e29c949e238983e2..3176393a729d1cc0b4905bec986ef6b2ecde7d66 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utdecode - Utility decoding routines (value-to-string)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index d2503920c620dfd8da31e07a684d9f89a67095ff..df20d46ed8b72c5b04f74c55b68519da2cc8ea89 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: uteval - Object evaluation
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 59a48371a7bca35b315269aa889992748ca24954..cda6e16dddf78c8cc9c0dd1b04fb2563ecfe8e54 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utglobal - Global variables for the ACPI subsystem
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index b1e94c094f9a0b3a4412a0199c439a48c574e798..c811ee2a8160f3c5050265b456ad44a62c2f5e8b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: uthex -- Hex/ASCII support functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 08e9f316cbde69c9ddefffc559760a0d4c291268..b6caab49f1bda981c78c251530c0376e634b6978 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7b606a1e698649ef075b4ee919bd41c012321551..18177e4f26f742769f15bd898844bdfc532a5d0e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utinit - Common ACPI subsystem initialization
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 923dd15e7a169ab136ed8626c197c654ba6fbdc6..84abdbf5cfca9680a1e101e37de5908d58a165f0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utlock - Reader/Writer lock interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 84a210b49e3a42a6d02b4c860d9c5014b07d19f4..d3667bfff40139d442a13a5749cb4d073e27f2ac 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utobject - ACPI object create/delete/size/cache routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 8afa1ccaf12e7cafa2916497f5963db362027647..b8ab0a3cb5b98c93e15b31638010a2deb100f830 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utosi - Support for the _OSI predefined control method
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -74,6 +74,7 @@ static struct acpi_interface_info acpi_default_supported_interfaces[] = {
        {"Windows 2018.2", NULL, 0, ACPI_OSI_WIN_10_RS5},       /* Windows 10 version 1809 - Added 11/2018 */
        {"Windows 2019", NULL, 0, ACPI_OSI_WIN_10_19H1},        /* Windows 10 version 1903 - Added 08/2019 */
        {"Windows 2020", NULL, 0, ACPI_OSI_WIN_10_20H1},        /* Windows 10 version 2004 - Added 08/2021 */
+       {"Windows 2021", NULL, 0, ACPI_OSI_WIN_11},     /* Windows 11 - Added 01/2022 */
 
        /* Feature Group Strings */
 
index a6f87a88c30e52dbea0a7ea1eedcacf0798914ea..2524f013be7a3d62d29505391a3cb69f6401bfda 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utpredef - support functions for predefined names
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 05426596d1f4aae21fb52530534683b0674b9fcb..d5aa2109847f3d7bf475d175e5dff1422543d779 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utprint - Formatted printing routines
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 2ce85fcfeb5b1d868f441d727ecf25c89057cf8a..a06988ac409d1bb5d6ef10a3bab508b26ffb0f63 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: uttrack - Memory allocation tracking routines (debug only)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index dca9061518abed2c5478a805d070a9b7d58cfa52..e24bc670b53eb95ea66e839e0bb1d99bec1964d4 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utuuid -- UUID support functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3285c1a92e401a8875e3528de3ba63144c08c5cd..86e76b443da73972fb1253de8b68e07f245cb1c9 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utxface - External interfaces, miscellaneous utility functions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 91016366de1db659baf5b14f2fb6a46df0fc5bbf..f2acec3a0ee353691bf76d35d8dd1fc9b13c8886 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: utxfinit - External interfaces for ACPICA initialization
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 95cc2a9f3e05878d5ba481b143397a8c363b5a52..d4326ec12d29620f9f343b4ec18649a4e883eabe 100644 (file)
@@ -549,6 +549,9 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
             !arch_is_platform_page(base_addr)))
                return -EINVAL;
 
+       if (is_zero_pfn(base_addr >> PAGE_SHIFT))
+               return -EADDRINUSE;
+
 inject:
        mutex_lock(&einj_mutex);
        rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
index c740f0faad39f927bebd1f27ca81166e552fcd38..8bc71cdc2270ab888d4ad768189f1e230daa4994 100644 (file)
@@ -111,7 +111,8 @@ retry_next:
                goto out;
        }
 retry:
-       rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
+       rc = len = erst_read_record(id, erst_dbg_buf, erst_dbg_buf_len,
+                       erst_dbg_buf_len, NULL);
        /* The record may be cleared by others, try read next record */
        if (rc == -ENOENT)
                goto retry_next;
index 698d67cee05272e67a1524326818fd65e7133318..31b077eedb584860e14452e15aabab66481198a9 100644 (file)
@@ -856,6 +856,74 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 }
 EXPORT_SYMBOL_GPL(erst_read);
 
+static void erst_clear_cache(u64 record_id)
+{
+       int i;
+       u64 *entries;
+
+       mutex_lock(&erst_record_id_cache.lock);
+
+       entries = erst_record_id_cache.entries;
+       for (i = 0; i < erst_record_id_cache.len; i++) {
+               if (entries[i] == record_id)
+                       entries[i] = APEI_ERST_INVALID_RECORD_ID;
+       }
+       __erst_record_id_cache_compact();
+
+       mutex_unlock(&erst_record_id_cache.lock);
+}
+
+ssize_t erst_read_record(u64 record_id, struct cper_record_header *record,
+               size_t buflen, size_t recordlen, const guid_t *creatorid)
+{
+       ssize_t len;
+
+       /*
+        * if creatorid is NULL, read any record for erst-dbg module
+        */
+       if (creatorid == NULL) {
+               len = erst_read(record_id, record, buflen);
+               if (len == -ENOENT)
+                       erst_clear_cache(record_id);
+
+               return len;
+       }
+
+       len = erst_read(record_id, record, buflen);
+       /*
+        * if erst_read return value is -ENOENT skip to next record_id,
+        * and clear the record_id cache.
+        */
+       if (len == -ENOENT) {
+               erst_clear_cache(record_id);
+               goto out;
+       }
+
+       if (len < 0)
+               goto out;
+
+       /*
+        * if erst_read return value is less than record head length,
+        * consider it as -EIO, and clear the record_id cache.
+        */
+       if (len < recordlen) {
+               len = -EIO;
+               erst_clear_cache(record_id);
+               goto out;
+       }
+
+       /*
+        * if creatorid is not wanted, consider it as not found,
+        * for skipping to next record_id.
+        */
+       if (!guid_equal(&record->creator_id, creatorid))
+               len = -ENOENT;
+
+out:
+       return len;
+}
+EXPORT_SYMBOL_GPL(erst_read_record);
+
 int erst_clear(u64 record_id)
 {
        int rc, i;
@@ -996,16 +1064,13 @@ skip:
                goto out;
        }
 
-       len = erst_read(record_id, &rcd->hdr, rcd_len);
+       len = erst_read_record(record_id, &rcd->hdr, rcd_len, sizeof(*rcd),
+                       &CPER_CREATOR_PSTORE);
        /* The record may be cleared by others, try read next record */
        if (len == -ENOENT)
                goto skip;
-       else if (len < 0 || len < sizeof(*rcd)) {
-               rc = -EIO;
+       else if (len < 0)
                goto out;
-       }
-       if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
-               goto skip;
 
        record->buf = kmalloc(len, GFP_KERNEL);
        if (record->buf == NULL) {
index 4df337d545b73386e3f89c1de5734671595e7049..cf31abd0ed1bb9c5f59f3a398f08b5cae430dd8e 100644 (file)
@@ -9,6 +9,7 @@
 #define pr_fmt(fmt) "ACPI: AGDI: " fmt
 
 #include <linux/acpi.h>
+#include <linux/acpi_agdi.h>
 #include <linux/arm_sdei.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
index dc208f5f5a1f7893f55ef6a835cd3c80a6094970..306513fec1e1f71ca0c0bb0464d44e4fb3926967 100644 (file)
@@ -52,7 +52,6 @@ static bool battery_driver_registered;
 static int battery_bix_broken_package;
 static int battery_notification_delay_ms;
 static int battery_ac_is_broken;
-static int battery_quirk_notcharging;
 static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
@@ -216,10 +215,8 @@ static int acpi_battery_get_property(struct power_supply *psy,
                        val->intval = POWER_SUPPLY_STATUS_CHARGING;
                else if (acpi_battery_is_charged(battery))
                        val->intval = POWER_SUPPLY_STATUS_FULL;
-               else if (battery_quirk_notcharging)
-                       val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
                else
-                       val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+                       val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
                break;
        case POWER_SUPPLY_PROP_PRESENT:
                val->intval = acpi_battery_present(battery);
@@ -1105,12 +1102,6 @@ battery_ac_is_broken_quirk(const struct dmi_system_id *d)
        return 0;
 }
 
-static int __init battery_quirk_not_charging(const struct dmi_system_id *d)
-{
-       battery_quirk_notcharging = 1;
-       return 0;
-}
-
 static const struct dmi_system_id bat_dmi_table[] __initconst = {
        {
                /* NEC LZ750/LS */
@@ -1139,19 +1130,6 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
                        DMI_MATCH(DMI_BIOS_DATE, "08/22/2014"),
                },
        },
-       {
-               /*
-                * On Lenovo ThinkPads the BIOS specification defines
-                * a state when the bits for charging and discharging
-                * are both set to 0. That state is "Not Charging".
-                */
-               .callback = battery_quirk_not_charging,
-               .ident = "Lenovo ThinkPad",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"),
-               },
-       },
        {
                /* Microsoft Surface Go 3 */
                .callback = battery_notification_delay_quirk,
index 02d208732f9a3e0b8cbdb09ea30783bdbc9417e4..e4fb9e225ddfcb6830e96e67a90dd02c2b3f0e92 100644 (file)
@@ -21,7 +21,7 @@ static struct kobject *bgrt_kobj;
        {                                                                       \
                return sysfs_emit(buf, "%d\n", bgrt_tab._member);               \
        }                                                                       \
-       struct kobj_attribute bgrt_attr_##_name = __ATTR_RO(_name)
+       static struct kobj_attribute bgrt_attr_##_name = __ATTR_RO(_name)
 
 BGRT_SHOW(version, version);
 BGRT_SHOW(status, status);
index 3e58b613a2c41b599586cd00ac6b6322e59d9290..b67d2ee77cd136eeab5e1b0485d1a26b1be2b607 100644 (file)
@@ -278,6 +278,20 @@ bool osc_sb_apei_support_acked;
 bool osc_pc_lpi_support_confirmed;
 EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
 
+/*
+ * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities':
+ *   Starting with ACPI Specification 6.2, all _CPC registers can be in
+ *   PCC, System Memory, System IO, or Functional Fixed Hardware address
+ *   spaces. OSPM support for this more flexible register space scheme is
+ *   indicated by the “Flexible Address Space for CPPC Registers” _OSC bit.
+ *
+ * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in:
+ * - PCC or Functional Fixed Hardware address space if defined
+ * - SystemMemory address space (NULL register) if not defined
+ */
+bool osc_cpc_flexible_adr_space_confirmed;
+EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed);
+
 /*
  * ACPI 6.4 Operating System Capabilities for USB.
  */
@@ -315,12 +329,15 @@ static void acpi_bus_osc_negotiate_platform_control(void)
 #endif
 #ifdef CONFIG_X86
        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
-       if (boot_cpu_has(X86_FEATURE_HWP)) {
-               capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
-               capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
-       }
 #endif
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+       capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
+       capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
+#endif
+
+       capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
+
        if (IS_ENABLED(CONFIG_SCHED_MC_PRIO))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;
 
@@ -341,10 +358,9 @@ static void acpi_bus_osc_negotiate_platform_control(void)
                return;
        }
 
-#ifdef CONFIG_X86
-       if (boot_cpu_has(X86_FEATURE_HWP))
-               osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
-                               (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+#ifdef CONFIG_ACPI_CPPC_LIB
+       osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+                       (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
 #endif
 
        /*
@@ -366,6 +382,8 @@ static void acpi_bus_osc_negotiate_platform_control(void)
                        capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
                osc_sb_native_usb4_support_confirmed =
                        capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+               osc_cpc_flexible_adr_space_confirmed =
+                       capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
        }
 
        kfree(context.ret.pointer);
@@ -1070,6 +1088,32 @@ int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data)
 }
 EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev);
 
+struct acpi_dev_walk_context {
+       int (*fn)(struct acpi_device *, void *);
+       void *data;
+};
+
+static int acpi_dev_for_one_check(struct device *dev, void *context)
+{
+       struct acpi_dev_walk_context *adwc = context;
+
+       if (dev->bus != &acpi_bus_type)
+               return 0;
+
+       return adwc->fn(to_acpi_device(dev), adwc->data);
+}
+
+int acpi_dev_for_each_child(struct acpi_device *adev,
+                           int (*fn)(struct acpi_device *, void *), void *data)
+{
+       struct acpi_dev_walk_context adwc = {
+               .fn = fn,
+               .data = data,
+       };
+
+       return device_for_each_child(&adev->dev, &adwc, acpi_dev_for_one_check);
+}
+
 /* --------------------------------------------------------------------------
                              Initialization/Cleanup
    -------------------------------------------------------------------------- */
index bc1454789a065906e6d914cbc2fe10991f79798d..903528f7e187e4bc1698e4d24ed5d2693f080527 100644 (file)
@@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
                                (cpc)->cpc_entry.reg.space_id ==        \
                                ACPI_ADR_SPACE_PLATFORM_COMM)
 
+/* Check if a CPC register is in SystemMemory */
+#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER &&  \
+                               (cpc)->cpc_entry.reg.space_id ==        \
+                               ACPI_ADR_SPACE_SYSTEM_MEMORY)
+
+/* Check if a CPC register is in SystemIo */
+#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER &&      \
+                               (cpc)->cpc_entry.reg.space_id ==        \
+                               ACPI_ADR_SPACE_SYSTEM_IO)
+
 /* Evaluates to True if reg is a NULL register descriptor */
 #define IS_NULL_REG(reg) ((reg)->space_id ==  ACPI_ADR_SPACE_SYSTEM_MEMORY && \
                                (reg)->address == 0 &&                  \
@@ -305,7 +315,7 @@ static int send_pcc_cmd(int pcc_ss_id, u16 cmd)
                goto end;
        }
 
-       /* wait for completion and check for PCC errro bit */
+       /* wait for completion and check for PCC error bit */
        ret = check_pcc_chan(pcc_ss_id, true);
 
        if (pcc_ss_data->pcc_mrtt)
@@ -424,6 +434,24 @@ bool acpi_cpc_valid(void)
 }
 EXPORT_SYMBOL_GPL(acpi_cpc_valid);
 
+bool cppc_allow_fast_switch(void)
+{
+       struct cpc_register_resource *desired_reg;
+       struct cpc_desc *cpc_ptr;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+               desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF];
+               if (!CPC_IN_SYSTEM_MEMORY(desired_reg) &&
+                               !CPC_IN_SYSTEM_IO(desired_reg))
+                       return false;
+       }
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(cppc_allow_fast_switch);
+
 /**
  * acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu
  * @cpu: Find all CPUs that share a domain with cpu.
@@ -736,6 +764,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
                                if (gas_t->address) {
                                        void __iomem *addr;
 
+                                       if (!osc_cpc_flexible_adr_space_confirmed) {
+                                               pr_debug("Flexible address space capability not supported\n");
+                                               goto out_free;
+                                       }
+
                                        addr = ioremap(gas_t->address, gas_t->bit_width/8);
                                        if (!addr)
                                                goto out_free;
@@ -758,6 +791,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
                                                 gas_t->address);
                                        goto out_free;
                                }
+                               if (!osc_cpc_flexible_adr_space_confirmed) {
+                                       pr_debug("Flexible address space capability not supported\n");
+                                       goto out_free;
+                               }
                        } else {
                                if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
                                        /* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */
@@ -1447,6 +1484,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf);
  * transition latency for performance change requests. The closest we have
  * is the timing information from the PCCT tables which provides the info
  * on the number and frequency of PCC commands the platform can handle.
+ *
+ * If desired_reg is in the SystemMemory or SystemIo ACPI address space,
+ * then assume there is no latency.
  */
 unsigned int cppc_get_transition_latency(int cpu_num)
 {
@@ -1472,7 +1512,9 @@ unsigned int cppc_get_transition_latency(int cpu_num)
                return CPUFREQ_ETERNAL;
 
        desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
-       if (!CPC_IN_PCC(desired_reg))
+       if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg))
+               return 0;
+       else if (!CPC_IN_PCC(desired_reg))
                return CPUFREQ_ETERNAL;
 
        if (pcc_ss_id < 0)
index cc6c97e7dcaeb42cb66f56f21852b74caa55ccd3..130b5f4a50a3d9d17f563e22eb70df0086a10555 100644 (file)
@@ -10,7 +10,7 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
-#define pr_fmt(fmt) "ACPI: PM: " fmt
+#define pr_fmt(fmt) "PM: " fmt
 
 #include <linux/acpi.h>
 #include <linux/export.h>
@@ -130,8 +130,8 @@ int acpi_device_get_power(struct acpi_device *device, int *state)
        *state = result;
 
  out:
-       dev_dbg(&device->dev, "Device power state is %s\n",
-               acpi_power_state_string(*state));
+       acpi_handle_debug(device->handle, "Power state: %s\n",
+                         acpi_power_state_string(*state));
 
        return 0;
 }
@@ -173,11 +173,8 @@ int acpi_device_set_power(struct acpi_device *device, int state)
        /* Make sure this is a valid target state */
 
        /* There is a special case for D0 addressed below. */
-       if (state > ACPI_STATE_D0 && state == device->power.state) {
-               dev_dbg(&device->dev, "Device already in %s\n",
-                       acpi_power_state_string(state));
-               return 0;
-       }
+       if (state > ACPI_STATE_D0 && state == device->power.state)
+               goto no_change;
 
        if (state == ACPI_STATE_D3_COLD) {
                /*
@@ -189,17 +186,17 @@ int acpi_device_set_power(struct acpi_device *device, int state)
                if (!device->power.states[ACPI_STATE_D3_COLD].flags.valid)
                        target_state = state;
        } else if (!device->power.states[state].flags.valid) {
-               dev_warn(&device->dev, "Power state %s not supported\n",
-                        acpi_power_state_string(state));
+               acpi_handle_debug(device->handle, "Power state %s not supported\n",
+                                 acpi_power_state_string(state));
                return -ENODEV;
        }
 
-       if (!device->power.flags.ignore_parent &&
-           device->parent && (state < device->parent->power.state)) {
-               dev_warn(&device->dev,
-                        "Cannot transition to power state %s for parent in %s\n",
-                        acpi_power_state_string(state),
-                        acpi_power_state_string(device->parent->power.state));
+       if (!device->power.flags.ignore_parent && device->parent &&
+           state < device->parent->power.state) {
+               acpi_handle_debug(device->handle,
+                                 "Cannot transition to %s for parent in %s\n",
+                                 acpi_power_state_string(state),
+                                 acpi_power_state_string(device->parent->power.state));
                return -ENODEV;
        }
 
@@ -216,9 +213,10 @@ int acpi_device_set_power(struct acpi_device *device, int state)
                 * (deeper) states to higher-power (shallower) states.
                 */
                if (state < device->power.state) {
-                       dev_warn(&device->dev, "Cannot transition from %s to %s\n",
-                                acpi_power_state_string(device->power.state),
-                                acpi_power_state_string(state));
+                       acpi_handle_debug(device->handle,
+                                         "Cannot transition from %s to %s\n",
+                                         acpi_power_state_string(device->power.state),
+                                         acpi_power_state_string(state));
                        return -ENODEV;
                }
 
@@ -248,7 +246,7 @@ int acpi_device_set_power(struct acpi_device *device, int state)
 
                        /* Nothing to do here if _PSC is not present. */
                        if (!device->power.flags.explicit_get)
-                               return 0;
+                               goto no_change;
 
                        /*
                         * The power state of the device was set to D0 last
@@ -263,23 +261,29 @@ int acpi_device_set_power(struct acpi_device *device, int state)
                         */
                        result = acpi_dev_pm_explicit_get(device, &psc);
                        if (result || psc == ACPI_STATE_D0)
-                               return 0;
+                               goto no_change;
                }
 
                result = acpi_dev_pm_explicit_set(device, ACPI_STATE_D0);
        }
 
- end:
+end:
        if (result) {
-               dev_warn(&device->dev, "Failed to change power state to %s\n",
-                        acpi_power_state_string(target_state));
+               acpi_handle_debug(device->handle,
+                                 "Failed to change power state to %s\n",
+                                 acpi_power_state_string(target_state));
        } else {
                device->power.state = target_state;
-               dev_dbg(&device->dev, "Power state changed to %s\n",
-                       acpi_power_state_string(target_state));
+               acpi_handle_debug(device->handle, "Power state changed to %s\n",
+                                 acpi_power_state_string(target_state));
        }
 
        return result;
+
+no_change:
+       acpi_handle_debug(device->handle, "Already in %s\n",
+                         acpi_power_state_string(state));
+       return 0;
 }
 EXPORT_SYMBOL(acpi_device_set_power);
 
@@ -425,6 +429,33 @@ bool acpi_bus_power_manageable(acpi_handle handle)
 }
 EXPORT_SYMBOL(acpi_bus_power_manageable);
 
+static int acpi_power_up_if_adr_present(struct acpi_device *adev, void *not_used)
+{
+       if (!(adev->flags.power_manageable && adev->pnp.type.bus_address))
+               return 0;
+
+       acpi_handle_debug(adev->handle, "Power state: %s\n",
+                         acpi_power_state_string(adev->power.state));
+
+       if (adev->power.state == ACPI_STATE_D3_COLD)
+               return acpi_device_set_power(adev, ACPI_STATE_D0);
+
+       return 0;
+}
+
+/**
+ * acpi_dev_power_up_children_with_adr - Power up childres with valid _ADR
+ * @adev: Parent ACPI device object.
+ *
+ * Change the power states of the direct children of @adev that are in D3cold
+ * and hold valid _ADR objects to D0 in order to allow bus (e.g. PCI)
+ * enumeration code to access them.
+ */
+void acpi_dev_power_up_children_with_adr(struct acpi_device *adev)
+{
+       acpi_dev_for_each_child(adev, acpi_power_up_if_adr_present, NULL);
+}
+
 #ifdef CONFIG_PM
 static DEFINE_MUTEX(acpi_pm_notifier_lock);
 static DEFINE_MUTEX(acpi_pm_notifier_install_lock);
index c0da24c9f8c36bb526ad3958fc6438198a228b31..4919e7abe93f47280157dcc91f10f09434951255 100644 (file)
@@ -151,6 +151,7 @@ static int pch_fivr_remove(struct platform_device *pdev)
 static const struct acpi_device_id pch_fivr_device_ids[] = {
        {"INTC1045", 0},
        {"INTC1049", 0},
+       {"INTC1064", 0},
        {"INTC10A3", 0},
        {"", 0},
 };
index dc1f52a5b3f48a67eac780336da1e20b448995f4..86561eda939f7953b21ebd4b85366ee15094a4fa 100644 (file)
 /*
  * Presentation of attributes which are defined for INT3407 and INT3532.
  * They are:
- * PMAX : Maximum platform powe
+ * PMAX : Maximum platform power
  * PSRC : Platform power source
  * ARTG : Adapter rating
  * CTYP : Charger type
- * PBSS : Battery steady power
  * PROP : Rest of worst case platform Power
  * PBSS : Power Battery Steady State
- * PBSS : Power Battery Steady State
  * RBHF : High Frequency Impedance
  * VBNL : Instantaneous No-Load Voltage
  * CMPP : Current Discharge Capability
@@ -117,7 +115,7 @@ static const struct attribute_group dptf_battery_attribute_group = {
 #define POWER_STATE_CHANGED            0x81
 #define STEADY_STATE_POWER_CHANGED     0x83
 #define POWER_PROP_CHANGE_EVENT        0x84
-#define IMPEDANCED_CHNGED              0x85
+#define IMPEDANCE_CHANGED              0x85
 #define VOLTAGE_CURRENT_CHANGED        0x86
 
 static long long dptf_participant_type(acpi_handle handle)
@@ -150,6 +148,9 @@ static void dptf_power_notify(acpi_handle handle, u32 event, void *data)
        case STEADY_STATE_POWER_CHANGED:
                attr = "max_steady_state_power_mw";
                break;
+       case IMPEDANCE_CHANGED:
+               attr = "high_freq_impedance_mohm";
+               break;
        case VOLTAGE_CURRENT_CHANGED:
                attr = "no_load_voltage_mv";
                break;
@@ -231,6 +232,8 @@ static const struct acpi_device_id int3407_device_ids[] = {
        {"INTC1050", 0},
        {"INTC1060", 0},
        {"INTC1061", 0},
+       {"INTC1065", 0},
+       {"INTC1066", 0},
        {"INTC10A4", 0},
        {"INTC10A5", 0},
        {"", 0},
index 42a5563465484aedf742c75464b2406c62bcf319..b7113fa92fa68503ff683fca441b9c7d546037b1 100644 (file)
@@ -27,6 +27,7 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = {
        {"INT3532"},
        {"INTC1040"},
        {"INTC1041"},
+       {"INTC1042"},
        {"INTC1043"},
        {"INTC1044"},
        {"INTC1045"},
@@ -37,6 +38,11 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = {
        {"INTC1050"},
        {"INTC1060"},
        {"INTC1061"},
+       {"INTC1062"},
+       {"INTC1063"},
+       {"INTC1064"},
+       {"INTC1065"},
+       {"INTC1066"},
        {"INTC10A0"},
        {"INTC10A1"},
        {"INTC10A2"},
index 44728529a5b6bca99c19367b156cbadbea71433d..e7b4b4e4a55e487740ec1a2acac68cd6ba689053 100644 (file)
@@ -14,6 +14,7 @@
        {"INT3404", }, /* Fan */ \
        {"INTC1044", }, /* Fan for Tiger Lake generation */ \
        {"INTC1048", }, /* Fan for Alder Lake generation */ \
+       {"INTC1063", }, /* Fan for Meteor Lake generation */ \
        {"INTC10A2", }, /* Fan for Raptor Lake generation */ \
        {"PNP0C0B", } /* Generic ACPI fan */
 
index ef104809f27b17415758cff0d3793a8d7193b070..8d769114a0487013e6635e88139ce8fb96aaa5bf 100644 (file)
@@ -79,17 +79,17 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
 
 static int find_child_checks(struct acpi_device *adev, bool check_children)
 {
-       bool sta_present = true;
        unsigned long long sta;
        acpi_status status;
 
+       if (check_children && list_empty(&adev->children))
+               return -ENODEV;
+
        status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
        if (status == AE_NOT_FOUND)
-               sta_present = false;
-       else if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
-               return -ENODEV;
+               return FIND_CHILD_MIN_SCORE;
 
-       if (check_children && list_empty(&adev->children))
+       if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
                return -ENODEV;
 
        /*
@@ -99,8 +99,10 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
         * matched going forward.  [This means a second spec violation in a row,
         * so whatever we do here is best effort anyway.]
         */
-       return sta_present && !adev->pnp.type.platform_id ?
-                       FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
+       if (adev->pnp.type.platform_id)
+               return FIND_CHILD_MIN_SCORE;
+
+       return FIND_CHILD_MAX_SCORE;
 }
 
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
index 7a70c4bfc23c6684a68c9e517dfc64700c92d98b..3269a888fb7a97a3566be8eaa76ad17eb0843805 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "acpica/accommon.h"
-#include "acpica/acnamesp.h"
 #include "internal.h"
 
 /* Definitions for ACPI_DEBUG_PRINT() */
@@ -1496,91 +1495,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n,
 }
 EXPORT_SYMBOL(acpi_check_region);
 
-static acpi_status acpi_deactivate_mem_region(acpi_handle handle, u32 level,
-                                             void *_res, void **return_value)
-{
-       struct acpi_mem_space_context **mem_ctx;
-       union acpi_operand_object *handler_obj;
-       union acpi_operand_object *region_obj2;
-       union acpi_operand_object *region_obj;
-       struct resource *res = _res;
-       acpi_status status;
-
-       region_obj = acpi_ns_get_attached_object(handle);
-       if (!region_obj)
-               return AE_OK;
-
-       handler_obj = region_obj->region.handler;
-       if (!handler_obj)
-               return AE_OK;
-
-       if (region_obj->region.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
-               return AE_OK;
-
-       if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE))
-               return AE_OK;
-
-       region_obj2 = acpi_ns_get_secondary_object(region_obj);
-       if (!region_obj2)
-               return AE_OK;
-
-       mem_ctx = (void *)&region_obj2->extra.region_context;
-
-       if (!(mem_ctx[0]->address >= res->start &&
-             mem_ctx[0]->address < res->end))
-               return AE_OK;
-
-       status = handler_obj->address_space.setup(region_obj,
-                                                 ACPI_REGION_DEACTIVATE,
-                                                 NULL, (void **)mem_ctx);
-       if (ACPI_SUCCESS(status))
-               region_obj->region.flags &= ~(AOPOBJ_SETUP_COMPLETE);
-
-       return status;
-}
-
-/**
- * acpi_release_memory - Release any mappings done to a memory region
- * @handle: Handle to namespace node
- * @res: Memory resource
- * @level: A level that terminates the search
- *
- * Walks through @handle and unmaps all SystemMemory Operation Regions that
- * overlap with @res and that have already been activated (mapped).
- *
- * This is a helper that allows drivers to place special requirements on memory
- * region that may overlap with operation regions, primarily allowing them to
- * safely map the region as non-cached memory.
- *
- * The unmapped Operation Regions will be automatically remapped next time they
- * are called, so the drivers do not need to do anything else.
- */
-acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
-                               u32 level)
-{
-       acpi_status status;
-
-       if (!(res->flags & IORESOURCE_MEM))
-               return AE_TYPE;
-
-       status = acpi_walk_namespace(ACPI_TYPE_REGION, handle, level,
-                                    acpi_deactivate_mem_region, NULL,
-                                    res, NULL);
-       if (ACPI_FAILURE(status))
-               return status;
-
-       /*
-        * Wait for all of the mappings queued up for removal by
-        * acpi_deactivate_mem_region() to actually go away.
-        */
-       synchronize_rcu();
-       rcu_barrier();
-       flush_scheduled_work();
-
-       return AE_OK;
-}
-EXPORT_SYMBOL_GPL(acpi_release_memory);
-
 /*
  * Let drivers know whether the resource checks are effective
  */
index 6f9e75d148084b48ba2db9de38dd3209dd274f42..b3b507f20e87edf061ca9099fac1070de1be3255 100644 (file)
@@ -927,6 +927,8 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
                host_bridge->preserve_config = 1;
        ACPI_FREE(obj);
 
+       acpi_dev_power_up_children_with_adr(device);
+
        pci_scan_child_bus(bus);
        pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info,
                                    info);
index eb95e188d62bc27a764c3adf813f5a990b93dff4..6a5572a1a80ccfba497c95ca96d00ecf6badf4a1 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu.h>
 #include <linux/minmax.h>
+#include <linux/perf_event.h>
 #include <acpi/processor.h>
 
 /*
 #define ACPI_IDLE_STATE_START  (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX) ? 1 : 0)
 
 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
-module_param(max_cstate, uint, 0000);
-static unsigned int nocst __read_mostly;
-module_param(nocst, uint, 0000);
-static int bm_check_disable __read_mostly;
-module_param(bm_check_disable, uint, 0000);
+module_param(max_cstate, uint, 0400);
+static bool nocst __read_mostly;
+module_param(nocst, bool, 0400);
+static bool bm_check_disable __read_mostly;
+module_param(bm_check_disable, bool, 0400);
 
 static unsigned int latency_factor __read_mostly = 2;
 module_param(latency_factor, uint, 0644);
@@ -544,6 +545,8 @@ static void wait_for_freeze(void)
  */
 static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
+       perf_lopwr_cb(true);
+
        if (cx->entry_method == ACPI_CSTATE_FFH) {
                /* Call into architectural FFH based C-state */
                acpi_processor_ffh_cstate_enter(cx);
@@ -554,6 +557,8 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
                inb(cx->address);
                wait_for_freeze();
        }
+
+       perf_lopwr_cb(false);
 }
 
 /**
index c992e57b2c79054391a9c7cb97565e0906e78b36..3147702710afe51d5715c1e3c0b1d7b903cc6451 100644 (file)
@@ -373,6 +373,18 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
                DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"),
                },
        },
+       /*
+        * ASUS B1400CEAE hangs on resume from suspend (see
+        * https://bugzilla.kernel.org/show_bug.cgi?id=215742).
+        */
+       {
+       .callback = init_default_s3,
+       .ident = "ASUS B1400CEAE",
+       .matches = {
+               DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+               DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"),
+               },
+       },
        {},
 };
 
index d589543875b81ad8f7a7c870336074b5cae0c07c..1eabfcd122eeeb76cc11d1535ccaa2356772eeda 100644 (file)
@@ -142,6 +142,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console)
        case ACPI_DBG2_16550_COMPATIBLE:
        case ACPI_DBG2_16550_SUBSET:
        case ACPI_DBG2_16550_WITH_GAS:
+       case ACPI_DBG2_16550_NVIDIA:
                uart = "uart";
                break;
        default:
index a4b638bea6f16e1eb1b8515351604e435aa07894..cc2fe0618178e0f50974894cd85e5c9ec8c52128 100644 (file)
@@ -415,19 +415,30 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj,
                              loff_t offset, size_t count)
 {
        struct acpi_data_attr *data_attr;
-       void *base;
-       ssize_t rc;
+       void __iomem *base;
+       ssize_t size;
 
        data_attr = container_of(bin_attr, struct acpi_data_attr, attr);
+       size = data_attr->attr.size;
+
+       if (offset < 0)
+               return -EINVAL;
+
+       if (offset >= size)
+               return 0;
 
-       base = acpi_os_map_memory(data_attr->addr, data_attr->attr.size);
+       if (count > size - offset)
+               count = size - offset;
+
+       base = acpi_os_map_iomem(data_attr->addr, size);
        if (!base)
                return -ENOMEM;
-       rc = memory_read_from_buffer(buf, count, &offset, base,
-                                    data_attr->attr.size);
-       acpi_os_unmap_memory(base, data_attr->attr.size);
 
-       return rc;
+       memcpy_fromio(buf, base + offset, count);
+
+       acpi_os_unmap_iomem(base, size);
+
+       return count;
 }
 
 static int acpi_bert_data_init(void *th, struct acpi_data_attr *data_attr)
index d5cedffeeff915b7b571ae08a9298c26a3772015..3a9773a09e199ad884b7954e1dfb24812937ed2a 100644 (file)
@@ -681,7 +681,7 @@ acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func,
 
        if (ret != AE_NOT_FOUND)
                acpi_handle_warn(handle,
-                               "failed to evaluate _DSM (0x%x)\n", ret);
+                                "failed to evaluate _DSM %pUb (0x%x)\n", guid, ret);
 
        return NULL;
 }
index d3bd14aaabf6e6e0c0bca90a7b3aa9a84672305d..7e775ba6fdd905ea766c3e0a9d113fef47d48e26 100644 (file)
@@ -395,107 +395,104 @@ static void amba_device_release(struct device *dev)
        kfree(d);
 }
 
-static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
+static int amba_read_periphid(struct amba_device *dev)
 {
-       u32 size;
+       struct reset_control *rstc;
+       u32 size, pid, cid;
        void __iomem *tmp;
        int i, ret;
 
-       ret = request_resource(parent, &dev->res);
+       ret = dev_pm_domain_attach(&dev->dev, true);
        if (ret)
                goto err_out;
 
-       /* Hard-coded primecell ID instead of plug-n-play */
-       if (dev->periphid != 0)
-               goto skip_probe;
+       ret = amba_get_enable_pclk(dev);
+       if (ret)
+               goto err_pm;
 
        /*
-        * Dynamically calculate the size of the resource
-        * and use this for iomap
+        * Find reset control(s) of the amba bus and de-assert them.
         */
+       rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
+       if (IS_ERR(rstc)) {
+               ret = PTR_ERR(rstc);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&dev->dev, "can't get reset: %d\n", ret);
+               goto err_clk;
+       }
+       reset_control_deassert(rstc);
+       reset_control_put(rstc);
+
        size = resource_size(&dev->res);
        tmp = ioremap(dev->res.start, size);
        if (!tmp) {
                ret = -ENOMEM;
-               goto err_release;
+               goto err_clk;
        }
 
-       ret = dev_pm_domain_attach(&dev->dev, true);
-       if (ret) {
-               iounmap(tmp);
-               goto err_release;
-       }
-
-       ret = amba_get_enable_pclk(dev);
-       if (ret == 0) {
-               u32 pid, cid;
-               struct reset_control *rstc;
-
-               /*
-                * Find reset control(s) of the amba bus and de-assert them.
-                */
-               rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
-               if (IS_ERR(rstc)) {
-                       ret = PTR_ERR(rstc);
-                       if (ret != -EPROBE_DEFER)
-                               dev_err(&dev->dev, "can't get reset: %d\n",
-                                       ret);
-                       goto err_reset;
-               }
-               reset_control_deassert(rstc);
-               reset_control_put(rstc);
-
-               /*
-                * Read pid and cid based on size of resource
-                * they are located at end of region
-                */
-               for (pid = 0, i = 0; i < 4; i++)
-                       pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) <<
-                               (i * 8);
-               for (cid = 0, i = 0; i < 4; i++)
-                       cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) <<
-                               (i * 8);
-
-               if (cid == CORESIGHT_CID) {
-                       /* set the base to the start of the last 4k block */
-                       void __iomem *csbase = tmp + size - 4096;
-
-                       dev->uci.devarch =
-                               readl(csbase + UCI_REG_DEVARCH_OFFSET);
-                       dev->uci.devtype =
-                               readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff;
-               }
+       /*
+        * Read pid and cid based on size of resource
+        * they are located at end of region
+        */
+       for (pid = 0, i = 0; i < 4; i++)
+               pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << (i * 8);
+       for (cid = 0, i = 0; i < 4; i++)
+               cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << (i * 8);
 
-               amba_put_disable_pclk(dev);
+       if (cid == CORESIGHT_CID) {
+               /* set the base to the start of the last 4k block */
+               void __iomem *csbase = tmp + size - 4096;
 
-               if (cid == AMBA_CID || cid == CORESIGHT_CID) {
-                       dev->periphid = pid;
-                       dev->cid = cid;
-               }
+               dev->uci.devarch = readl(csbase + UCI_REG_DEVARCH_OFFSET);
+               dev->uci.devtype = readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff;
+       }
 
-               if (!dev->periphid)
-                       ret = -ENODEV;
+       if (cid == AMBA_CID || cid == CORESIGHT_CID) {
+               dev->periphid = pid;
+               dev->cid = cid;
        }
 
+       if (!dev->periphid)
+               ret = -ENODEV;
+
        iounmap(tmp);
+
+err_clk:
+       amba_put_disable_pclk(dev);
+err_pm:
        dev_pm_domain_detach(&dev->dev, true);
+err_out:
+       return ret;
+}
+
+static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
+{
+       int ret;
 
+       ret = request_resource(parent, &dev->res);
        if (ret)
+               goto err_out;
+
+       /* Hard-coded primecell ID instead of plug-n-play */
+       if (dev->periphid != 0)
+               goto skip_probe;
+
+       ret = amba_read_periphid(dev);
+       if (ret) {
+               if (ret != -EPROBE_DEFER) {
+                       amba_device_put(dev);
+                       goto err_out;
+               }
                goto err_release;
+       }
 
- skip_probe:
+skip_probe:
        ret = device_add(&dev->dev);
- err_release:
+err_release:
        if (ret)
                release_resource(&dev->res);
- err_out:
+err_out:
        return ret;
-
- err_reset:
-       amba_put_disable_pclk(dev);
-       iounmap(tmp);
-       dev_pm_domain_detach(&dev->dev, true);
-       goto err_release;
 }
 
 /*
index 397dfd27c90d4fc6a252532770d8ebb04a487071..c1eca72b4575df73f32dc24ddc23d460cf5de581 100644 (file)
@@ -324,7 +324,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
        { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */
        { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
-       { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG/Lewisburg RAID*/
        { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
        { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */
        { PCI_VDEVICE(INTEL, 0x1e03), board_ahci_low_power }, /* Panther M AHCI */
@@ -367,7 +366,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg/Lewisburg AHCI*/
-       { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg/Lewisburg RAID*/
+       { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* *burg SATA0 'RAID' */
+       { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* *burg SATA1 'RAID' */
+       { PCI_VDEVICE(INTEL, 0x282f), board_ahci }, /* *burg SATA2 'RAID' */
        { PCI_VDEVICE(INTEL, 0x43d4), board_ahci }, /* Rocket Lake PCH-H RAID */
        { PCI_VDEVICE(INTEL, 0x43d5), board_ahci }, /* Rocket Lake PCH-H RAID */
        { PCI_VDEVICE(INTEL, 0x43d6), board_ahci }, /* Rocket Lake PCH-H RAID */
index ab8552b1ff2a14ad548ba066b93bf3b0bc0f7d83..f61795c546cf128deb6dbb84dedcf4e7ab896a09 100644 (file)
@@ -549,15 +549,10 @@ static int brcm_ahci_remove(struct platform_device *pdev)
        struct ata_host *host = dev_get_drvdata(&pdev->dev);
        struct ahci_host_priv *hpriv = host->private_data;
        struct brcm_ahci_priv *priv = hpriv->plat_data;
-       int ret;
 
        brcm_sata_phys_disable(priv);
 
-       ret = ata_platform_remove_one(pdev);
-       if (ret)
-               return ret;
-
-       return 0;
+       return ata_platform_remove_one(pdev);
 }
 
 static void brcm_ahci_shutdown(struct platform_device *pdev)
index ca64837641be2caa7b1df0a4e96662c21b388abb..40e816419f48c75d57930abccc015948c1c2551b 100644 (file)
@@ -96,7 +96,8 @@ struct ata_force_param {
        unsigned long   xfer_mask;
        unsigned int    horkage_on;
        unsigned int    horkage_off;
-       u16             lflags;
+       u16             lflags_on;
+       u16             lflags_off;
 };
 
 struct ata_force_ent {
@@ -386,11 +387,17 @@ static void ata_force_link_limits(struct ata_link *link)
                }
 
                /* let lflags stack */
-               if (fe->param.lflags) {
-                       link->flags |= fe->param.lflags;
+               if (fe->param.lflags_on) {
+                       link->flags |= fe->param.lflags_on;
                        ata_link_notice(link,
                                        "FORCE: link flag 0x%x forced -> 0x%x\n",
-                                       fe->param.lflags, link->flags);
+                                       fe->param.lflags_on, link->flags);
+               }
+               if (fe->param.lflags_off) {
+                       link->flags &= ~fe->param.lflags_off;
+                       ata_link_notice(link,
+                               "FORCE: link flag 0x%x cleared -> 0x%x\n",
+                               fe->param.lflags_off, link->flags);
                }
        }
 }
@@ -898,7 +905,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
  *     RETURNS:
  *     Matching xfer_shift, -1 if no match found.
  */
-int ata_xfer_mode2shift(unsigned long xfer_mode)
+int ata_xfer_mode2shift(u8 xfer_mode)
 {
        const struct ata_xfer_ent *ent;
 
@@ -1398,7 +1405,7 @@ unsigned long ata_id_xfermask(const u16 *id)
 
                /* But wait.. there's more. Design your standards by
                 * committee and you too can get a free iordy field to
-                * process. However its the speeds not the modes that
+                * process. However it is the speeds not the modes that
                 * are supported... Note drivers using the timing API
                 * will get this right anyway
                 */
@@ -3898,7 +3905,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        /* Devices where NCQ should be avoided */
        /* NCQ is slow */
        { "WDC WD740ADFD-00",   NULL,           ATA_HORKAGE_NONCQ },
-       { "WDC WD740ADFD-00NLR1", NULL,         ATA_HORKAGE_NONCQ, },
+       { "WDC WD740ADFD-00NLR1", NULL,         ATA_HORKAGE_NONCQ },
        /* http://thread.gmane.org/gmane.linux.ide/14907 */
        { "FUJITSU MHT2060BH",  NULL,           ATA_HORKAGE_NONCQ },
        /* NCQ is broken */
@@ -3924,23 +3931,23 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        /* drives which fail FPDMA_AA activation (some may freeze afterwards)
           the ST disks also have LPM issues */
        { "ST1000LM024 HN-M101MBB", NULL,       ATA_HORKAGE_BROKEN_FPDMA_AA |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
        { "VB0250EAVER",        "HPG7",         ATA_HORKAGE_BROKEN_FPDMA_AA },
 
        /* Blacklist entries taken from Silicon Image 3124/3132
           Windows driver .inf file - also several Linux problem reports */
-       { "HTS541060G9SA00",    "MB3OC60D",     ATA_HORKAGE_NONCQ, },
-       { "HTS541080G9SA00",    "MB4OC60D",     ATA_HORKAGE_NONCQ, },
-       { "HTS541010G9SA00",    "MBZOC60D",     ATA_HORKAGE_NONCQ, },
+       { "HTS541060G9SA00",    "MB3OC60D",     ATA_HORKAGE_NONCQ },
+       { "HTS541080G9SA00",    "MB4OC60D",     ATA_HORKAGE_NONCQ },
+       { "HTS541010G9SA00",    "MBZOC60D",     ATA_HORKAGE_NONCQ },
 
        /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */
-       { "C300-CTFDDAC128MAG", "0001",         ATA_HORKAGE_NONCQ, },
+       { "C300-CTFDDAC128MAG", "0001",         ATA_HORKAGE_NONCQ },
 
        /* Sandisk SD7/8/9s lock up hard on large trims */
-       { "SanDisk SD[789]*",   NULL,           ATA_HORKAGE_MAX_TRIM_128M, },
+       { "SanDisk SD[789]*",   NULL,           ATA_HORKAGE_MAX_TRIM_128M },
 
        /* devices which puke on READ_NATIVE_MAX */
-       { "HDS724040KLSA80",    "KFAOA20N",     ATA_HORKAGE_BROKEN_HPA, },
+       { "HDS724040KLSA80",    "KFAOA20N",     ATA_HORKAGE_BROKEN_HPA },
        { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA },
        { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
        { "MAXTOR 6L080L4",     "A93.0500",     ATA_HORKAGE_BROKEN_HPA },
@@ -3949,22 +3956,22 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "OCZ-VERTEX",             "1.30",     ATA_HORKAGE_BROKEN_HPA },
 
        /* Devices which report 1 sector over size HPA */
-       { "ST340823A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
-       { "ST320413A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
-       { "ST310211A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
+       { "ST340823A",          NULL,           ATA_HORKAGE_HPA_SIZE },
+       { "ST320413A",          NULL,           ATA_HORKAGE_HPA_SIZE },
+       { "ST310211A",          NULL,           ATA_HORKAGE_HPA_SIZE },
 
        /* Devices which get the IVB wrong */
-       { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB, },
+       { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB },
        /* Maybe we should just blacklist TSSTcorp... */
-       { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]",  ATA_HORKAGE_IVB, },
+       { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]",  ATA_HORKAGE_IVB },
 
        /* Devices that do not need bridging limits applied */
-       { "MTRON MSP-SATA*",            NULL,   ATA_HORKAGE_BRIDGE_OK, },
-       { "BUFFALO HD-QSU2/R5",         NULL,   ATA_HORKAGE_BRIDGE_OK, },
+       { "MTRON MSP-SATA*",            NULL,   ATA_HORKAGE_BRIDGE_OK },
+       { "BUFFALO HD-QSU2/R5",         NULL,   ATA_HORKAGE_BRIDGE_OK },
 
        /* Devices which aren't very happy with higher link speeds */
-       { "WD My Book",                 NULL,   ATA_HORKAGE_1_5_GBPS, },
-       { "Seagate FreeAgent GoFlex",   NULL,   ATA_HORKAGE_1_5_GBPS, },
+       { "WD My Book",                 NULL,   ATA_HORKAGE_1_5_GBPS },
+       { "Seagate FreeAgent GoFlex",   NULL,   ATA_HORKAGE_1_5_GBPS },
 
        /*
         * Devices which choke on SETXFER.  Applies only if both the
@@ -3982,57 +3989,57 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
        { "Crucial_CT512MX100*",        "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
        /* 512GB MX100 with newer firmware has only LPM issues */
        { "Crucial_CT512MX100*",        NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
 
        /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */
        { "Crucial_CT480M500*",         NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
        { "Crucial_CT960M500*",         NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
 
        /* These specific Samsung models/firmware-revs do not handle LPM well */
-       { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
-       { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
-       { "SAMSUNG MZ7TD256HAFV-000L9", NULL,       ATA_HORKAGE_NOLPM, },
-       { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, },
+       { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM },
+       { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM },
+       { "SAMSUNG MZ7TD256HAFV-000L9", NULL,       ATA_HORKAGE_NOLPM },
+       { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM },
 
        /* devices that don't properly handle queued TRIM commands */
        { "Micron_M500IT_*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M5[15]0_*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*M550*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 840 EVO*",       NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_NO_DMA_LOG |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 840*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 850*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 860*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NO_NCQ_ON_ATI, },
+                                               ATA_HORKAGE_NO_NCQ_ON_ATI },
        { "Samsung SSD 870*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NO_NCQ_ON_ATI, },
+                                               ATA_HORKAGE_NO_NCQ_ON_ATI },
        { "FCCT*M500*",                 NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
 
        /* devices that don't properly handle TRIM commands */
-       { "SuperSSpeed S238*",          NULL,   ATA_HORKAGE_NOTRIM, },
-       { "M88V29*",                    NULL,   ATA_HORKAGE_NOTRIM, },
+       { "SuperSSpeed S238*",          NULL,   ATA_HORKAGE_NOTRIM },
+       { "M88V29*",                    NULL,   ATA_HORKAGE_NOTRIM },
 
        /*
         * As defined, the DRAT (Deterministic Read After Trim) and RZAT
@@ -4050,16 +4057,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         * The intel 510 drive has buggy DRAT/RZAT. Explicitly exclude
         * that model before whitelisting all other intel SSDs.
         */
-       { "INTEL*SSDSC2MH*",            NULL,   0, },
+       { "INTEL*SSDSC2MH*",            NULL,   0 },
 
-       { "Micron*",                    NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Crucial*",                   NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "INTEL*SSD*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "SSD*INTEL*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Samsung*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "SAMSUNG*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "SAMSUNG*MZ7KM*",             NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "ST[1248][0248]0[FH]*",       NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Micron*",                    NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Crucial*",                   NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "INTEL*SSD*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "SSD*INTEL*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Samsung*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "SAMSUNG*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "SAMSUNG*MZ7KM*",             NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "ST[1248][0248]0[FH]*",       NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
 
        /*
         * Some WD SATA-I drives spin up and down erratically when the link
@@ -4566,42 +4573,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 #endif /* __BIG_ENDIAN */
 }
 
-/**
- *     ata_qc_new_init - Request an available ATA command, and initialize it
- *     @dev: Device from whom we request an available command structure
- *     @tag: tag
- *
- *     LOCKING:
- *     None.
- */
-
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
-{
-       struct ata_port *ap = dev->link->ap;
-       struct ata_queued_cmd *qc;
-
-       /* no command while frozen */
-       if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
-               return NULL;
-
-       /* libsas case */
-       if (ap->flags & ATA_FLAG_SAS_HOST) {
-               tag = ata_sas_allocate_tag(ap);
-               if (tag < 0)
-                       return NULL;
-       }
-
-       qc = __ata_qc_from_tag(ap, tag);
-       qc->tag = qc->hw_tag = tag;
-       qc->scsicmd = NULL;
-       qc->ap = ap;
-       qc->dev = dev;
-
-       ata_qc_reinit(qc);
-
-       return qc;
-}
-
 /**
  *     ata_qc_free - free unused ata_queued_cmd
  *     @qc: Command to complete
@@ -4614,19 +4585,9 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
  */
 void ata_qc_free(struct ata_queued_cmd *qc)
 {
-       struct ata_port *ap;
-       unsigned int tag;
-
-       WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-       ap = qc->ap;
-
        qc->flags = 0;
-       tag = qc->tag;
-       if (ata_tag_valid(tag)) {
+       if (ata_tag_valid(qc->tag))
                qc->tag = ATA_TAG_POISON;
-               if (ap->flags & ATA_FLAG_SAS_HOST)
-                       ata_sas_free_tag(tag, ap);
-       }
 }
 
 void __ata_qc_complete(struct ata_queued_cmd *qc)
@@ -5605,7 +5566,7 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops)
  *     Start and then freeze ports of @host.  Started status is
  *     recorded in host->flags, so this function can be called
  *     multiple times.  Ports are guaranteed to get started only
- *     once.  If host->ops isn't initialized yet, its set to the
+ *     once.  If host->ops is not initialized yet, it is set to the
  *     first non-dummy port ops.
  *
  *     LOCKING:
@@ -6146,67 +6107,113 @@ int ata_platform_remove_one(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(ata_platform_remove_one);
 
 #ifdef CONFIG_ATA_FORCE
+
+#define force_cbl(name, flag)                          \
+       { #name,        .cbl            = (flag) }
+
+#define force_spd_limit(spd, val)                      \
+       { #spd, .spd_limit              = (val) }
+
+#define force_xfer(mode, shift)                                \
+       { #mode,        .xfer_mask      = (1UL << (shift)) }
+
+#define force_lflag_on(name, flags)                    \
+       { #name,        .lflags_on      = (flags) }
+
+#define force_lflag_onoff(name, flags)                 \
+       { "no" #name,   .lflags_on      = (flags) },    \
+       { #name,        .lflags_off     = (flags) }
+
+#define force_horkage_on(name, flag)                   \
+       { #name,        .horkage_on     = (flag) }
+
+#define force_horkage_onoff(name, flag)                        \
+       { "no" #name,   .horkage_on     = (flag) },     \
+       { #name,        .horkage_off    = (flag) }
+
+static const struct ata_force_param force_tbl[] __initconst = {
+       force_cbl(40c,                  ATA_CBL_PATA40),
+       force_cbl(80c,                  ATA_CBL_PATA80),
+       force_cbl(short40c,             ATA_CBL_PATA40_SHORT),
+       force_cbl(unk,                  ATA_CBL_PATA_UNK),
+       force_cbl(ign,                  ATA_CBL_PATA_IGN),
+       force_cbl(sata,                 ATA_CBL_SATA),
+
+       force_spd_limit(1.5Gbps,        1),
+       force_spd_limit(3.0Gbps,        2),
+
+       force_xfer(pio0,                ATA_SHIFT_PIO + 0),
+       force_xfer(pio1,                ATA_SHIFT_PIO + 1),
+       force_xfer(pio2,                ATA_SHIFT_PIO + 2),
+       force_xfer(pio3,                ATA_SHIFT_PIO + 3),
+       force_xfer(pio4,                ATA_SHIFT_PIO + 4),
+       force_xfer(pio5,                ATA_SHIFT_PIO + 5),
+       force_xfer(pio6,                ATA_SHIFT_PIO + 6),
+       force_xfer(mwdma0,              ATA_SHIFT_MWDMA + 0),
+       force_xfer(mwdma1,              ATA_SHIFT_MWDMA + 1),
+       force_xfer(mwdma2,              ATA_SHIFT_MWDMA + 2),
+       force_xfer(mwdma3,              ATA_SHIFT_MWDMA + 3),
+       force_xfer(mwdma4,              ATA_SHIFT_MWDMA + 4),
+       force_xfer(udma0,               ATA_SHIFT_UDMA + 0),
+       force_xfer(udma16,              ATA_SHIFT_UDMA + 0),
+       force_xfer(udma/16,             ATA_SHIFT_UDMA + 0),
+       force_xfer(udma1,               ATA_SHIFT_UDMA + 1),
+       force_xfer(udma25,              ATA_SHIFT_UDMA + 1),
+       force_xfer(udma/25,             ATA_SHIFT_UDMA + 1),
+       force_xfer(udma2,               ATA_SHIFT_UDMA + 2),
+       force_xfer(udma33,              ATA_SHIFT_UDMA + 2),
+       force_xfer(udma/33,             ATA_SHIFT_UDMA + 2),
+       force_xfer(udma3,               ATA_SHIFT_UDMA + 3),
+       force_xfer(udma44,              ATA_SHIFT_UDMA + 3),
+       force_xfer(udma/44,             ATA_SHIFT_UDMA + 3),
+       force_xfer(udma4,               ATA_SHIFT_UDMA + 4),
+       force_xfer(udma66,              ATA_SHIFT_UDMA + 4),
+       force_xfer(udma/66,             ATA_SHIFT_UDMA + 4),
+       force_xfer(udma5,               ATA_SHIFT_UDMA + 5),
+       force_xfer(udma100,             ATA_SHIFT_UDMA + 5),
+       force_xfer(udma/100,            ATA_SHIFT_UDMA + 5),
+       force_xfer(udma6,               ATA_SHIFT_UDMA + 6),
+       force_xfer(udma133,             ATA_SHIFT_UDMA + 6),
+       force_xfer(udma/133,            ATA_SHIFT_UDMA + 6),
+       force_xfer(udma7,               ATA_SHIFT_UDMA + 7),
+
+       force_lflag_on(nohrst,          ATA_LFLAG_NO_HRST),
+       force_lflag_on(nosrst,          ATA_LFLAG_NO_SRST),
+       force_lflag_on(norst,           ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST),
+       force_lflag_on(rstonce,         ATA_LFLAG_RST_ONCE),
+       force_lflag_onoff(dbdelay,      ATA_LFLAG_NO_DEBOUNCE_DELAY),
+
+       force_horkage_onoff(ncq,        ATA_HORKAGE_NONCQ),
+       force_horkage_onoff(ncqtrim,    ATA_HORKAGE_NO_NCQ_TRIM),
+       force_horkage_onoff(ncqati,     ATA_HORKAGE_NO_NCQ_ON_ATI),
+
+       force_horkage_onoff(trim,       ATA_HORKAGE_NOTRIM),
+       force_horkage_on(trim_zero,     ATA_HORKAGE_ZERO_AFTER_TRIM),
+       force_horkage_on(max_trim_128m, ATA_HORKAGE_MAX_TRIM_128M),
+
+       force_horkage_onoff(dma,        ATA_HORKAGE_NODMA),
+       force_horkage_on(atapi_dmadir,  ATA_HORKAGE_ATAPI_DMADIR),
+       force_horkage_on(atapi_mod16_dma, ATA_HORKAGE_ATAPI_MOD16_DMA),
+
+       force_horkage_onoff(dmalog,     ATA_HORKAGE_NO_DMA_LOG),
+       force_horkage_onoff(iddevlog,   ATA_HORKAGE_NO_ID_DEV_LOG),
+       force_horkage_onoff(logdir,     ATA_HORKAGE_NO_LOG_DIR),
+
+       force_horkage_on(max_sec_128,   ATA_HORKAGE_MAX_SEC_128),
+       force_horkage_on(max_sec_1024,  ATA_HORKAGE_MAX_SEC_1024),
+       force_horkage_on(max_sec_lba48, ATA_HORKAGE_MAX_SEC_LBA48),
+
+       force_horkage_onoff(lpm,        ATA_HORKAGE_NOLPM),
+       force_horkage_onoff(setxfer,    ATA_HORKAGE_NOSETXFER),
+       force_horkage_on(dump_id,       ATA_HORKAGE_DUMP_ID),
+
+       force_horkage_on(disable,       ATA_HORKAGE_DISABLE),
+};
+
 static int __init ata_parse_force_one(char **cur,
                                      struct ata_force_ent *force_ent,
                                      const char **reason)
 {
-       static const struct ata_force_param force_tbl[] __initconst = {
-               { "40c",        .cbl            = ATA_CBL_PATA40 },
-               { "80c",        .cbl            = ATA_CBL_PATA80 },
-               { "short40c",   .cbl            = ATA_CBL_PATA40_SHORT },
-               { "unk",        .cbl            = ATA_CBL_PATA_UNK },
-               { "ign",        .cbl            = ATA_CBL_PATA_IGN },
-               { "sata",       .cbl            = ATA_CBL_SATA },
-               { "1.5Gbps",    .spd_limit      = 1 },
-               { "3.0Gbps",    .spd_limit      = 2 },
-               { "noncq",      .horkage_on     = ATA_HORKAGE_NONCQ },
-               { "ncq",        .horkage_off    = ATA_HORKAGE_NONCQ },
-               { "noncqtrim",  .horkage_on     = ATA_HORKAGE_NO_NCQ_TRIM },
-               { "ncqtrim",    .horkage_off    = ATA_HORKAGE_NO_NCQ_TRIM },
-               { "noncqati",   .horkage_on     = ATA_HORKAGE_NO_NCQ_ON_ATI },
-               { "ncqati",     .horkage_off    = ATA_HORKAGE_NO_NCQ_ON_ATI },
-               { "dump_id",    .horkage_on     = ATA_HORKAGE_DUMP_ID },
-               { "pio0",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 0) },
-               { "pio1",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 1) },
-               { "pio2",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 2) },
-               { "pio3",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 3) },
-               { "pio4",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 4) },
-               { "pio5",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 5) },
-               { "pio6",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 6) },
-               { "mwdma0",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 0) },
-               { "mwdma1",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 1) },
-               { "mwdma2",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 2) },
-               { "mwdma3",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 3) },
-               { "mwdma4",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 4) },
-               { "udma0",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 0) },
-               { "udma16",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 0) },
-               { "udma/16",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 0) },
-               { "udma1",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 1) },
-               { "udma25",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 1) },
-               { "udma/25",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 1) },
-               { "udma2",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 2) },
-               { "udma33",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 2) },
-               { "udma/33",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 2) },
-               { "udma3",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 3) },
-               { "udma44",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 3) },
-               { "udma/44",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 3) },
-               { "udma4",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 4) },
-               { "udma66",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 4) },
-               { "udma/66",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 4) },
-               { "udma5",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 5) },
-               { "udma100",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 5) },
-               { "udma/100",   .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 5) },
-               { "udma6",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 6) },
-               { "udma133",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 6) },
-               { "udma/133",   .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 6) },
-               { "udma7",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 7) },
-               { "nohrst",     .lflags         = ATA_LFLAG_NO_HRST },
-               { "nosrst",     .lflags         = ATA_LFLAG_NO_SRST },
-               { "norst",      .lflags         = ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST },
-               { "rstonce",    .lflags         = ATA_LFLAG_RST_ONCE },
-               { "atapi_dmadir", .horkage_on   = ATA_HORKAGE_ATAPI_DMADIR },
-               { "disable",    .horkage_on     = ATA_HORKAGE_DISABLE },
-       };
        char *start = *cur, *p = *cur;
        char *id, *val, *endp;
        const struct ata_force_param *match_fp = NULL;
@@ -6288,7 +6295,7 @@ static void __init ata_parse_force_param(void)
        int last_port = -1, last_device = -1;
        char *p, *cur, *next;
 
-       /* calculate maximum number of params and allocate force_tbl */
+       /* Calculate maximum number of params and allocate ata_force_tbl */
        for (p = ata_force_param_buf; *p; p++)
                if (*p == ',')
                        size++;
index 044a16daa2d42e040a936b34ab46251b0b02fb0e..7a5fe41aa5ae10eda99cf10c3f8d895de4995f3c 100644 (file)
@@ -1268,31 +1268,6 @@ int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
 }
 EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
 
-int ata_sas_allocate_tag(struct ata_port *ap)
-{
-       unsigned int max_queue = ap->host->n_tags;
-       unsigned int i, tag;
-
-       for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
-               tag = tag < max_queue ? tag : 0;
-
-               /* the last tag is reserved for internal command. */
-               if (ata_tag_internal(tag))
-                       continue;
-
-               if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
-                       ap->sas_last_tag = tag;
-                       return tag;
-               }
-       }
-       return -1;
-}
-
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
-{
-       clear_bit(tag, &ap->sas_tag_allocated);
-}
-
 /**
  *     sata_async_notification - SATA async notification handler
  *     @ap: ATA port where async notification is received
index 06c9d90238d9e217a8b6bec8a14307ba8211056b..42cecf95a4e589bdd21444bd547aeb13a080a411 100644 (file)
@@ -638,24 +638,48 @@ EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
 static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
                                              struct scsi_cmnd *cmd)
 {
+       struct ata_port *ap = dev->link->ap;
        struct ata_queued_cmd *qc;
+       int tag;
 
-       qc = ata_qc_new_init(dev, scsi_cmd_to_rq(cmd)->tag);
-       if (qc) {
-               qc->scsicmd = cmd;
-               qc->scsidone = scsi_done;
-
-               qc->sg = scsi_sglist(cmd);
-               qc->n_elem = scsi_sg_count(cmd);
+       if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+               goto fail;
 
-               if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
-                       qc->flags |= ATA_QCFLAG_QUIET;
+       if (ap->flags & ATA_FLAG_SAS_HOST) {
+               /*
+                * SAS hosts may queue > ATA_MAX_QUEUE commands so use
+                * unique per-device budget token as a tag.
+                */
+               if (WARN_ON_ONCE(cmd->budget_token >= ATA_MAX_QUEUE))
+                       goto fail;
+               tag = cmd->budget_token;
        } else {
-               cmd->result = (DID_OK << 16) | SAM_STAT_TASK_SET_FULL;
-               scsi_done(cmd);
+               tag = scsi_cmd_to_rq(cmd)->tag;
        }
 
+       qc = __ata_qc_from_tag(ap, tag);
+       qc->tag = qc->hw_tag = tag;
+       qc->ap = ap;
+       qc->dev = dev;
+
+       ata_qc_reinit(qc);
+
+       qc->scsicmd = cmd;
+       qc->scsidone = scsi_done;
+
+       qc->sg = scsi_sglist(cmd);
+       qc->n_elem = scsi_sg_count(cmd);
+
+       if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
+               qc->flags |= ATA_QCFLAG_QUIET;
+
        return qc;
+
+fail:
+       set_host_byte(cmd, DID_OK);
+       set_status_byte(cmd, SAM_STAT_TASK_SET_FULL);
+       scsi_done(cmd);
+       return NULL;
 }
 
 static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
index c9c2496d91ea474a8f02655bceedf1e71e3c0c45..926a7f41303dc534435a9bb72fd79c2cda04bc36 100644 (file)
@@ -44,7 +44,6 @@ static inline void ata_force_cbl(struct ata_port *ap) { }
 #endif
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
                           u64 block, u32 n_block, unsigned int tf_flags,
                           unsigned int tag, int class);
@@ -91,18 +90,6 @@ extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 
 #define to_ata_port(d) container_of(d, struct ata_port, tdev)
 
-/* libata-sata.c */
-#ifdef CONFIG_SATA_HOST
-int ata_sas_allocate_tag(struct ata_port *ap);
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
-#else
-static inline int ata_sas_allocate_tag(struct ata_port *ap)
-{
-       return -EOPNOTSUPP;
-}
-static inline void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) { }
-#endif
-
 /* libata-acpi.c */
 #ifdef CONFIG_ATA_ACPI
 extern unsigned int ata_acpi_gtf_filter;
index 2e35505b683c764999684dda38936f5a7b917e70..0117df0fe3c5932060a82a6a4dd9f975cb8e1960 100644 (file)
@@ -536,8 +536,8 @@ static int pata_ftide010_probe(struct platform_device *pdev)
        return 0;
 
 err_dis_clk:
-       if (!IS_ERR(ftide->pclk))
-               clk_disable_unprepare(ftide->pclk);
+       clk_disable_unprepare(ftide->pclk);
+
        return ret;
 }
 
@@ -547,8 +547,7 @@ static int pata_ftide010_remove(struct platform_device *pdev)
        struct ftide010 *ftide = host->private_data;
 
        ata_host_detach(ftide->host);
-       if (!IS_ERR(ftide->pclk))
-               clk_disable_unprepare(ftide->pclk);
+       clk_disable_unprepare(ftide->pclk);
 
        return 0;
 }
index 3250ef317df6bd2202b52e1298a8caca6dea7587..03b6ae37a57826729bb6806fc7ba749d4d68f8bf 100644 (file)
 #include <linux/gfp.h>
 #include <linux/delay.h>
 #include <linux/libata.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/types.h>
 
 #include <asm/cacheflush.h>
-#include <asm/prom.h>
 #include <asm/mpc52xx.h>
 
 #include <linux/fsl/bestcomm/bestcomm.h>
index 0da58ce20d82b5e00e741fb96d665646c418a256..67ef2e26d7df13d4246542caaf6b691122fc8295 100644 (file)
  *     criticial.
  */
 
-static unsigned long sil680_selreg(struct ata_port *ap, int r)
+static int sil680_selreg(struct ata_port *ap, int r)
 {
-       unsigned long base = 0xA0 + r;
-       base += (ap->port_no << 4);
-       return base;
+       return 0xA0 + (ap->port_no << 4) + r;
 }
 
 /**
@@ -65,12 +63,9 @@ static unsigned long sil680_selreg(struct ata_port *ap, int r)
  *     the unit shift.
  */
 
-static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r)
+static int sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r)
 {
-       unsigned long base = 0xA0 + r;
-       base += (ap->port_no << 4);
-       base |= adev->devno ? 2 : 0;
-       return base;
+       return 0xA0 + (ap->port_no << 4) + r + (adev->devno << 1);
 }
 
 
@@ -85,8 +80,9 @@ static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev,
 static int sil680_cable_detect(struct ata_port *ap)
 {
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       unsigned long addr = sil680_selreg(ap, 0);
+       int addr = sil680_selreg(ap, 0);
        u8 ata66;
+
        pci_read_config_byte(pdev, addr, &ata66);
        if (ata66 & 1)
                return ATA_CBL_PATA80;
@@ -113,9 +109,9 @@ static void sil680_set_piomode(struct ata_port *ap, struct ata_device *adev)
                0x328A, 0x2283, 0x1281, 0x10C3, 0x10C1
        };
 
-       unsigned long tfaddr = sil680_selreg(ap, 0x02);
-       unsigned long addr = sil680_seldev(ap, adev, 0x04);
-       unsigned long addr_mask = 0x80 + 4 * ap->port_no;
+       int tfaddr = sil680_selreg(ap, 0x02);
+       int addr = sil680_seldev(ap, adev, 0x04);
+       int addr_mask = 0x80 + 4 * ap->port_no;
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
        int pio = adev->pio_mode - XFER_PIO_0;
        int lowest_pio = pio;
@@ -165,9 +161,9 @@ static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev)
        static const u16 dma_table[3] = { 0x2208, 0x10C2, 0x10C1 };
 
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       unsigned long ma = sil680_seldev(ap, adev, 0x08);
-       unsigned long ua = sil680_seldev(ap, adev, 0x0C);
-       unsigned long addr_mask = 0x80 + 4 * ap->port_no;
+       int ma = sil680_seldev(ap, adev, 0x08);
+       int ua = sil680_seldev(ap, adev, 0x0C);
+       int addr_mask = 0x80 + 4 * ap->port_no;
        int port_shift = adev->devno * 4;
        u8 scsc, mode;
        u16 multi, ultra;
@@ -219,7 +215,7 @@ static void sil680_sff_exec_command(struct ata_port *ap,
 static bool sil680_sff_irq_check(struct ata_port *ap)
 {
        struct pci_dev *pdev    = to_pci_dev(ap->host->dev);
-       unsigned long addr      = sil680_selreg(ap, 1);
+       int addr                = sil680_selreg(ap, 1);
        u8 val;
 
        pci_read_config_byte(pdev, addr, &val);
index 439ca882f73c7fb5b45f5abb725898f72ed32fd0..215c02d4056a7f190f9aec30970be0b58620c6f4 100644 (file)
@@ -248,9 +248,9 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev,
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
        struct ata_device *peer = ata_dev_pair(adev);
        struct ata_timing t, p;
-       static int via_clock = 33333;   /* Bus clock in kHZ */
-       unsigned long T =  1000000000 / via_clock;
-       unsigned long UT = T;
+       const int via_clock = 33333;    /* Bus clock in kHz */
+       const int T = 1000000000 / via_clock;
+       int UT = T;
        int ut;
        int offset = 3 - (2*ap->port_no) - adev->devno;
 
index 00e1c7941d0ea0dd4ae5841959b136789cf6c9f7..b729e9919bb0c7c03daffd1b63558906a75174f6 100644 (file)
@@ -318,7 +318,6 @@ static int gemini_sata_probe(struct platform_device *pdev)
        struct device_node *np = dev->of_node;
        struct sata_gemini *sg;
        struct regmap *map;
-       struct resource *res;
        enum gemini_muxmode muxmode;
        u32 gmode;
        u32 gmask;
@@ -329,11 +328,7 @@ static int gemini_sata_probe(struct platform_device *pdev)
                return -ENOMEM;
        sg->dev = dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
-       sg->base = devm_ioremap_resource(dev, res);
+       sg->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(sg->base))
                return PTR_ERR(sg->base);
 
index 94d1789a233e0f436e51cc9d214c139bb8dceb80..406a907a4caec29344c367d7920e35e27c372cc4 100644 (file)
@@ -735,6 +735,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
                  size_t offset, u32 opt_flags)
 {
        struct firmware *fw = NULL;
+       struct cred *kern_cred = NULL;
+       const struct cred *old_cred;
        bool nondirect = false;
        int ret;
 
@@ -751,6 +753,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
        if (ret <= 0) /* error or already assigned */
                goto out;
 
+       /*
+        * We are about to try to access the firmware file. Because we may have been
+        * called by a driver when serving an unrelated request from userland, we use
+        * the kernel credentials to read the file.
+        */
+       kern_cred = prepare_kernel_cred(NULL);
+       if (!kern_cred) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       old_cred = override_creds(kern_cred);
+
        ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);
 
        /* Only full reads can support decompression, platform, and sysfs. */
@@ -776,6 +790,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
        } else
                ret = assign_fw(fw, device);
 
+       revert_creds(old_cred);
+       put_cred(kern_cred);
+
  out:
        if (ret < 0) {
                fw_abort_batch_reqs(fw);
index bbddb267c2e693264a8ba51ec39dcfb328ed4139..72115917e0bdde442e7bf82f1059c55a58ed672c 100644 (file)
@@ -172,10 +172,10 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
  * @dev: Device to detach.
  * @power_off: Used to indicate whether we should power off the device.
  *
- * This functions will reverse the actions from dev_pm_domain_attach() and
- * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain.
- * Typically it should be invoked during the remove phase, either from
- * subsystem level code or from drivers.
+ * This functions will reverse the actions from dev_pm_domain_attach(),
+ * dev_pm_domain_attach_by_id() and dev_pm_domain_attach_by_name(), thus it
+ * detaches @dev from its PM domain.  Typically it should be invoked during the
+ * remove phase, either from subsystem level code or from drivers.
  *
  * Callers must ensure proper synchronization of this function with power
  * management callbacks.
index 1ee878d126fdf5c789f052d9ebb57f666c4fdc5d..739e52cd4aba5dd4b3c666ad9cbdf38120747cb5 100644 (file)
@@ -131,7 +131,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
 #define genpd_is_cpu_domain(genpd)     (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
 #define genpd_is_rpm_always_on(genpd)  (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON)
 
-static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
+static inline bool irq_safe_dev_in_sleep_domain(struct device *dev,
                const struct generic_pm_domain *genpd)
 {
        bool ret;
@@ -139,11 +139,14 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
        ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
 
        /*
-        * Warn once if an IRQ safe device is attached to a no sleep domain, as
-        * to indicate a suboptimal configuration for PM. For an always on
-        * domain this isn't case, thus don't warn.
+        * Warn once if an IRQ safe device is attached to a domain, which
+        * callbacks are allowed to sleep. This indicates a suboptimal
+        * configuration for PM, but it doesn't matter for an always on domain.
         */
-       if (ret && !genpd_is_always_on(genpd))
+       if (genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd))
+               return ret;
+
+       if (ret)
                dev_warn_once(dev, "PM domain %s will not be powered off\n",
                                genpd->name);
 
@@ -225,24 +228,23 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd)
 
 static void genpd_update_accounting(struct generic_pm_domain *genpd)
 {
-       ktime_t delta, now;
+       u64 delta, now;
 
-       now = ktime_get();
-       delta = ktime_sub(now, genpd->accounting_time);
+       now = ktime_get_mono_fast_ns();
+       if (now <= genpd->accounting_time)
+               return;
+
+       delta = now - genpd->accounting_time;
 
        /*
         * If genpd->status is active, it means we are just
         * out of off and so update the idle time and vice
         * versa.
         */
-       if (genpd->status == GENPD_STATE_ON) {
-               int state_idx = genpd->state_idx;
-
-               genpd->states[state_idx].idle_time =
-                       ktime_add(genpd->states[state_idx].idle_time, delta);
-       } else {
-               genpd->on_time = ktime_add(genpd->on_time, delta);
-       }
+       if (genpd->status == GENPD_STATE_ON)
+               genpd->states[genpd->state_idx].idle_time += delta;
+       else
+               genpd->on_time += delta;
 
        genpd->accounting_time = now;
 }
@@ -476,15 +478,16 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state);
  */
 void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
 {
-       struct generic_pm_domain_data *gpd_data;
        struct generic_pm_domain *genpd;
+       struct gpd_timing_data *td;
 
        genpd = dev_to_genpd_safe(dev);
        if (!genpd)
                return;
 
-       gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-       gpd_data->next_wakeup = next;
+       td = to_gpd_data(dev->power.subsys_data->domain_data)->td;
+       if (td)
+               td->next_wakeup = next;
 }
 EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
 
@@ -506,6 +509,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
        if (!genpd->power_on)
                goto out;
 
+       timed = timed && genpd->gd && !genpd->states[state_idx].fwnode;
        if (!timed) {
                ret = genpd->power_on(genpd);
                if (ret)
@@ -524,7 +528,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
                goto out;
 
        genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
-       genpd->max_off_time_changed = true;
+       genpd->gd->max_off_time_changed = true;
        pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
                 genpd->name, "on", elapsed_ns);
 
@@ -555,6 +559,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
        if (!genpd->power_off)
                goto out;
 
+       timed = timed && genpd->gd && !genpd->states[state_idx].fwnode;
        if (!timed) {
                ret = genpd->power_off(genpd);
                if (ret)
@@ -573,7 +578,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
                goto out;
 
        genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
-       genpd->max_off_time_changed = true;
+       genpd->gd->max_off_time_changed = true;
        pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
                 genpd->name, "off", elapsed_ns);
 
@@ -649,18 +654,12 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
        }
 
        list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-               enum pm_qos_flags_status stat;
-
-               stat = dev_pm_qos_flags(pdd->dev, PM_QOS_FLAG_NO_POWER_OFF);
-               if (stat > PM_QOS_FLAGS_NONE)
-                       return -EBUSY;
-
                /*
                 * Do not allow PM domain to be powered off, when an IRQ safe
                 * device is part of a non-IRQ safe domain.
                 */
                if (!pm_runtime_suspended(pdd->dev) ||
-                       irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
+                       irq_safe_dev_in_sleep_domain(pdd->dev, genpd))
                        not_suspended++;
        }
 
@@ -775,25 +774,27 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
        dev = gpd_data->base.dev;
 
        for (;;) {
-               struct generic_pm_domain *genpd;
+               struct generic_pm_domain *genpd = ERR_PTR(-ENODATA);
                struct pm_domain_data *pdd;
+               struct gpd_timing_data *td;
 
                spin_lock_irq(&dev->power.lock);
 
                pdd = dev->power.subsys_data ?
                                dev->power.subsys_data->domain_data : NULL;
                if (pdd) {
-                       to_gpd_data(pdd)->td.constraint_changed = true;
-                       genpd = dev_to_genpd(dev);
-               } else {
-                       genpd = ERR_PTR(-ENODATA);
+                       td = to_gpd_data(pdd)->td;
+                       if (td) {
+                               td->constraint_changed = true;
+                               genpd = dev_to_genpd(dev);
+                       }
                }
 
                spin_unlock_irq(&dev->power.lock);
 
                if (!IS_ERR(genpd)) {
                        genpd_lock(genpd);
-                       genpd->max_off_time_changed = true;
+                       genpd->gd->max_off_time_changed = true;
                        genpd_unlock(genpd);
                }
 
@@ -879,9 +880,9 @@ static int genpd_runtime_suspend(struct device *dev)
        struct generic_pm_domain *genpd;
        bool (*suspend_ok)(struct device *__dev);
        struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
-       struct gpd_timing_data *td = &gpd_data->td;
+       struct gpd_timing_data *td = gpd_data->td;
        bool runtime_pm = pm_runtime_enabled(dev);
-       ktime_t time_start;
+       ktime_t time_start = 0;
        s64 elapsed_ns;
        int ret;
 
@@ -902,8 +903,7 @@ static int genpd_runtime_suspend(struct device *dev)
                return -EBUSY;
 
        /* Measure suspend latency. */
-       time_start = 0;
-       if (runtime_pm)
+       if (td && runtime_pm)
                time_start = ktime_get();
 
        ret = __genpd_runtime_suspend(dev);
@@ -917,13 +917,13 @@ static int genpd_runtime_suspend(struct device *dev)
        }
 
        /* Update suspend latency value if the measured time exceeds it. */
-       if (runtime_pm) {
+       if (td && runtime_pm) {
                elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
                if (elapsed_ns > td->suspend_latency_ns) {
                        td->suspend_latency_ns = elapsed_ns;
                        dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
                                elapsed_ns);
-                       genpd->max_off_time_changed = true;
+                       genpd->gd->max_off_time_changed = true;
                        td->constraint_changed = true;
                }
        }
@@ -932,7 +932,7 @@ static int genpd_runtime_suspend(struct device *dev)
         * If power.irq_safe is set, this routine may be run with
         * IRQs disabled, so suspend only if the PM domain also is irq_safe.
         */
-       if (irq_safe_dev_in_no_sleep_domain(dev, genpd))
+       if (irq_safe_dev_in_sleep_domain(dev, genpd))
                return 0;
 
        genpd_lock(genpd);
@@ -955,12 +955,11 @@ static int genpd_runtime_resume(struct device *dev)
 {
        struct generic_pm_domain *genpd;
        struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
-       struct gpd_timing_data *td = &gpd_data->td;
-       bool runtime_pm = pm_runtime_enabled(dev);
-       ktime_t time_start;
+       struct gpd_timing_data *td = gpd_data->td;
+       bool timed = td && pm_runtime_enabled(dev);
+       ktime_t time_start = 0;
        s64 elapsed_ns;
        int ret;
-       bool timed = true;
 
        dev_dbg(dev, "%s()\n", __func__);
 
@@ -972,10 +971,8 @@ static int genpd_runtime_resume(struct device *dev)
         * As we don't power off a non IRQ safe domain, which holds
         * an IRQ safe device, we don't need to restore power to it.
         */
-       if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) {
-               timed = false;
+       if (irq_safe_dev_in_sleep_domain(dev, genpd))
                goto out;
-       }
 
        genpd_lock(genpd);
        ret = genpd_power_on(genpd, 0);
@@ -988,8 +985,7 @@ static int genpd_runtime_resume(struct device *dev)
 
  out:
        /* Measure resume latency. */
-       time_start = 0;
-       if (timed && runtime_pm)
+       if (timed)
                time_start = ktime_get();
 
        ret = genpd_start_dev(genpd, dev);
@@ -1001,13 +997,13 @@ static int genpd_runtime_resume(struct device *dev)
                goto err_stop;
 
        /* Update resume latency value if the measured time exceeds it. */
-       if (timed && runtime_pm) {
+       if (timed) {
                elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
                if (elapsed_ns > td->resume_latency_ns) {
                        td->resume_latency_ns = elapsed_ns;
                        dev_dbg(dev, "resume latency exceeded, %lld ns\n",
                                elapsed_ns);
-                       genpd->max_off_time_changed = true;
+                       genpd->gd->max_off_time_changed = true;
                        td->constraint_changed = true;
                }
        }
@@ -1500,9 +1496,11 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_resume);
 
 #endif /* CONFIG_PM_SLEEP */
 
-static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
+static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
+                                                          bool has_governor)
 {
        struct generic_pm_domain_data *gpd_data;
+       struct gpd_timing_data *td;
        int ret;
 
        ret = dev_pm_get_subsys_data(dev);
@@ -1516,26 +1514,38 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
        }
 
        gpd_data->base.dev = dev;
-       gpd_data->td.constraint_changed = true;
-       gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
        gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
-       gpd_data->next_wakeup = KTIME_MAX;
 
-       spin_lock_irq(&dev->power.lock);
+       /* Allocate data used by a governor. */
+       if (has_governor) {
+               td = kzalloc(sizeof(*td), GFP_KERNEL);
+               if (!td) {
+                       ret = -ENOMEM;
+                       goto err_free;
+               }
 
-       if (dev->power.subsys_data->domain_data) {
-               ret = -EINVAL;
-               goto err_free;
+               td->constraint_changed = true;
+               td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
+               td->next_wakeup = KTIME_MAX;
+               gpd_data->td = td;
        }
 
-       dev->power.subsys_data->domain_data = &gpd_data->base;
+       spin_lock_irq(&dev->power.lock);
+
+       if (dev->power.subsys_data->domain_data)
+               ret = -EINVAL;
+       else
+               dev->power.subsys_data->domain_data = &gpd_data->base;
 
        spin_unlock_irq(&dev->power.lock);
 
+       if (ret)
+               goto err_free;
+
        return gpd_data;
 
  err_free:
-       spin_unlock_irq(&dev->power.lock);
+       kfree(gpd_data->td);
        kfree(gpd_data);
  err_put:
        dev_pm_put_subsys_data(dev);
@@ -1551,6 +1561,7 @@ static void genpd_free_dev_data(struct device *dev,
 
        spin_unlock_irq(&dev->power.lock);
 
+       kfree(gpd_data->td);
        kfree(gpd_data);
        dev_pm_put_subsys_data(dev);
 }
@@ -1607,6 +1618,7 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
 static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
                            struct device *base_dev)
 {
+       struct genpd_governor_data *gd = genpd->gd;
        struct generic_pm_domain_data *gpd_data;
        int ret;
 
@@ -1615,7 +1627,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
        if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
                return -EINVAL;
 
-       gpd_data = genpd_alloc_dev_data(dev);
+       gpd_data = genpd_alloc_dev_data(dev, gd);
        if (IS_ERR(gpd_data))
                return PTR_ERR(gpd_data);
 
@@ -1631,7 +1643,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
        dev_pm_domain_set(dev, &genpd->domain);
 
        genpd->device_count++;
-       genpd->max_off_time_changed = true;
+       if (gd)
+               gd->max_off_time_changed = true;
 
        list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
 
@@ -1685,7 +1698,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
        }
 
        genpd->device_count--;
-       genpd->max_off_time_changed = true;
+       if (genpd->gd)
+               genpd->gd->max_off_time_changed = true;
 
        genpd_clear_cpumask(genpd, gpd_data->cpu);
        dev_pm_domain_set(dev, NULL);
@@ -1958,6 +1972,53 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
        return 0;
 }
 
+static int genpd_alloc_data(struct generic_pm_domain *genpd)
+{
+       struct genpd_governor_data *gd = NULL;
+       int ret;
+
+       if (genpd_is_cpu_domain(genpd) &&
+           !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
+               return -ENOMEM;
+
+       if (genpd->gov) {
+               gd = kzalloc(sizeof(*gd), GFP_KERNEL);
+               if (!gd) {
+                       ret = -ENOMEM;
+                       goto free;
+               }
+
+               gd->max_off_time_ns = -1;
+               gd->max_off_time_changed = true;
+               gd->next_wakeup = KTIME_MAX;
+       }
+
+       /* Use only one "off" state if there were no states declared */
+       if (genpd->state_count == 0) {
+               ret = genpd_set_default_power_state(genpd);
+               if (ret)
+                       goto free;
+       }
+
+       genpd->gd = gd;
+       return 0;
+
+free:
+       if (genpd_is_cpu_domain(genpd))
+               free_cpumask_var(genpd->cpus);
+       kfree(gd);
+       return ret;
+}
+
+static void genpd_free_data(struct generic_pm_domain *genpd)
+{
+       if (genpd_is_cpu_domain(genpd))
+               free_cpumask_var(genpd->cpus);
+       if (genpd->free_states)
+               genpd->free_states(genpd->states, genpd->state_count);
+       kfree(genpd->gd);
+}
+
 static void genpd_lock_init(struct generic_pm_domain *genpd)
 {
        if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
@@ -1995,11 +2056,9 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
        atomic_set(&genpd->sd_count, 0);
        genpd->status = is_off ? GENPD_STATE_OFF : GENPD_STATE_ON;
        genpd->device_count = 0;
-       genpd->max_off_time_ns = -1;
-       genpd->max_off_time_changed = true;
        genpd->provider = NULL;
        genpd->has_provider = false;
-       genpd->accounting_time = ktime_get();
+       genpd->accounting_time = ktime_get_mono_fast_ns();
        genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
        genpd->domain.ops.runtime_resume = genpd_runtime_resume;
        genpd->domain.ops.prepare = genpd_prepare;
@@ -2017,26 +2076,22 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
                genpd->dev_ops.start = pm_clk_resume;
        }
 
+       /* The always-on governor works better with the corresponding flag. */
+       if (gov == &pm_domain_always_on_gov)
+               genpd->flags |= GENPD_FLAG_RPM_ALWAYS_ON;
+
        /* Always-on domains must be powered on at initialization. */
        if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) &&
                        !genpd_status_on(genpd))
                return -EINVAL;
 
-       if (genpd_is_cpu_domain(genpd) &&
-           !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
-               return -ENOMEM;
-
-       /* Use only one "off" state if there were no states declared */
-       if (genpd->state_count == 0) {
-               ret = genpd_set_default_power_state(genpd);
-               if (ret) {
-                       if (genpd_is_cpu_domain(genpd))
-                               free_cpumask_var(genpd->cpus);
-                       return ret;
-               }
-       } else if (!gov && genpd->state_count > 1) {
+       /* Multiple states but no governor doesn't make sense. */
+       if (!gov && genpd->state_count > 1)
                pr_warn("%s: no governor for states\n", genpd->name);
-       }
+
+       ret = genpd_alloc_data(genpd);
+       if (ret)
+               return ret;
 
        device_initialize(&genpd->dev);
        dev_set_name(&genpd->dev, "%s", genpd->name);
@@ -2081,10 +2136,7 @@ static int genpd_remove(struct generic_pm_domain *genpd)
        genpd_unlock(genpd);
        genpd_debug_remove(genpd);
        cancel_work_sync(&genpd->power_off_work);
-       if (genpd_is_cpu_domain(genpd))
-               free_cpumask_var(genpd->cpus);
-       if (genpd->free_states)
-               genpd->free_states(genpd->states, genpd->state_count);
+       genpd_free_data(genpd);
 
        pr_debug("%s: removed %s\n", __func__, genpd->name);
 
@@ -3163,6 +3215,7 @@ static int sub_domains_show(struct seq_file *s, void *data)
 static int idle_states_show(struct seq_file *s, void *data)
 {
        struct generic_pm_domain *genpd = s->private;
+       u64 now, delta, idle_time = 0;
        unsigned int i;
        int ret = 0;
 
@@ -3173,17 +3226,19 @@ static int idle_states_show(struct seq_file *s, void *data)
        seq_puts(s, "State          Time Spent(ms) Usage          Rejected\n");
 
        for (i = 0; i < genpd->state_count; i++) {
-               ktime_t delta = 0;
-               s64 msecs;
+               idle_time += genpd->states[i].idle_time;
 
-               if ((genpd->status == GENPD_STATE_OFF) &&
-                               (genpd->state_idx == i))
-                       delta = ktime_sub(ktime_get(), genpd->accounting_time);
+               if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) {
+                       now = ktime_get_mono_fast_ns();
+                       if (now > genpd->accounting_time) {
+                               delta = now - genpd->accounting_time;
+                               idle_time += delta;
+                       }
+               }
 
-               msecs = ktime_to_ms(
-                       ktime_add(genpd->states[i].idle_time, delta));
-               seq_printf(s, "S%-13i %-14lld %-14llu %llu\n", i, msecs,
-                             genpd->states[i].usage, genpd->states[i].rejected);
+               do_div(idle_time, NSEC_PER_MSEC);
+               seq_printf(s, "S%-13i %-14llu %-14llu %llu\n", i, idle_time,
+                          genpd->states[i].usage, genpd->states[i].rejected);
        }
 
        genpd_unlock(genpd);
@@ -3193,18 +3248,22 @@ static int idle_states_show(struct seq_file *s, void *data)
 static int active_time_show(struct seq_file *s, void *data)
 {
        struct generic_pm_domain *genpd = s->private;
-       ktime_t delta = 0;
+       u64 now, on_time, delta = 0;
        int ret = 0;
 
        ret = genpd_lock_interruptible(genpd);
        if (ret)
                return -ERESTARTSYS;
 
-       if (genpd->status == GENPD_STATE_ON)
-               delta = ktime_sub(ktime_get(), genpd->accounting_time);
+       if (genpd->status == GENPD_STATE_ON) {
+               now = ktime_get_mono_fast_ns();
+               if (now > genpd->accounting_time)
+                       delta = now - genpd->accounting_time;
+       }
 
-       seq_printf(s, "%lld ms\n", ktime_to_ms(
-                               ktime_add(genpd->on_time, delta)));
+       on_time = genpd->on_time + delta;
+       do_div(on_time, NSEC_PER_MSEC);
+       seq_printf(s, "%llu ms\n", on_time);
 
        genpd_unlock(genpd);
        return ret;
@@ -3213,7 +3272,7 @@ static int active_time_show(struct seq_file *s, void *data)
 static int total_idle_time_show(struct seq_file *s, void *data)
 {
        struct generic_pm_domain *genpd = s->private;
-       ktime_t delta = 0, total = 0;
+       u64 now, delta, total = 0;
        unsigned int i;
        int ret = 0;
 
@@ -3222,16 +3281,19 @@ static int total_idle_time_show(struct seq_file *s, void *data)
                return -ERESTARTSYS;
 
        for (i = 0; i < genpd->state_count; i++) {
+               total += genpd->states[i].idle_time;
 
-               if ((genpd->status == GENPD_STATE_OFF) &&
-                               (genpd->state_idx == i))
-                       delta = ktime_sub(ktime_get(), genpd->accounting_time);
-
-               total = ktime_add(total, genpd->states[i].idle_time);
+               if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) {
+                       now = ktime_get_mono_fast_ns();
+                       if (now > genpd->accounting_time) {
+                               delta = now - genpd->accounting_time;
+                               total += delta;
+                       }
+               }
        }
-       total = ktime_add(total, delta);
 
-       seq_printf(s, "%lld ms\n", ktime_to_ms(total));
+       do_div(total, NSEC_PER_MSEC);
+       seq_printf(s, "%llu ms\n", total);
 
        genpd_unlock(genpd);
        return ret;
index cd08c58851905706f07006f36e52e60be3a433f3..282a3a135827ceeded2695b4a94b969f0eff6876 100644 (file)
@@ -18,6 +18,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
        s64 constraint_ns;
 
        if (dev->power.subsys_data && dev->power.subsys_data->domain_data) {
+               struct gpd_timing_data *td = dev_gpd_data(dev)->td;
+
                /*
                 * Only take suspend-time QoS constraints of devices into
                 * account, because constraints updated after the device has
@@ -25,7 +27,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
                 * anyway.  In order for them to take effect, the device has to
                 * be resumed and suspended again.
                 */
-               constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns;
+               constraint_ns = td ? td->effective_constraint_ns :
+                               PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
        } else {
                /*
                 * The child is not in a domain and there's no info on its
@@ -49,7 +52,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
  */
 static bool default_suspend_ok(struct device *dev)
 {
-       struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+       struct gpd_timing_data *td = dev_gpd_data(dev)->td;
        unsigned long flags;
        s64 constraint_ns;
 
@@ -136,26 +139,28 @@ static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t n
         * is able to enter its optimal idle state.
         */
        list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-               next_wakeup = to_gpd_data(pdd)->next_wakeup;
+               next_wakeup = to_gpd_data(pdd)->td->next_wakeup;
                if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
                        if (ktime_before(next_wakeup, domain_wakeup))
                                domain_wakeup = next_wakeup;
        }
 
        list_for_each_entry(link, &genpd->parent_links, parent_node) {
-               next_wakeup = link->child->next_wakeup;
+               struct genpd_governor_data *cgd = link->child->gd;
+
+               next_wakeup = cgd ? cgd->next_wakeup : KTIME_MAX;
                if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
                        if (ktime_before(next_wakeup, domain_wakeup))
                                domain_wakeup = next_wakeup;
        }
 
-       genpd->next_wakeup = domain_wakeup;
+       genpd->gd->next_wakeup = domain_wakeup;
 }
 
 static bool next_wakeup_allows_state(struct generic_pm_domain *genpd,
                                     unsigned int state, ktime_t now)
 {
-       ktime_t domain_wakeup = genpd->next_wakeup;
+       ktime_t domain_wakeup = genpd->gd->next_wakeup;
        s64 idle_time_ns, min_sleep_ns;
 
        min_sleep_ns = genpd->states[state].power_off_latency_ns +
@@ -185,8 +190,9 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
         * All subdomains have been powered off already at this point.
         */
        list_for_each_entry(link, &genpd->parent_links, parent_node) {
-               struct generic_pm_domain *sd = link->child;
-               s64 sd_max_off_ns = sd->max_off_time_ns;
+               struct genpd_governor_data *cgd = link->child->gd;
+
+               s64 sd_max_off_ns = cgd ? cgd->max_off_time_ns : -1;
 
                if (sd_max_off_ns < 0)
                        continue;
@@ -215,7 +221,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
                 * domain to turn off and on (that's how much time it will
                 * have to wait worst case).
                 */
-               td = &to_gpd_data(pdd)->td;
+               td = to_gpd_data(pdd)->td;
                constraint_ns = td->effective_constraint_ns;
                /*
                 * Zero means "no suspend at all" and this runs only when all
@@ -244,7 +250,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
         * time and the time needed to turn the domain on is the maximum
         * theoretical time this domain can spend in the "off" state.
         */
-       genpd->max_off_time_ns = min_off_time_ns -
+       genpd->gd->max_off_time_ns = min_off_time_ns -
                genpd->states[state].power_on_latency_ns;
        return true;
 }
@@ -259,6 +265,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
 static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
 {
        struct generic_pm_domain *genpd = pd_to_genpd(pd);
+       struct genpd_governor_data *gd = genpd->gd;
        int state_idx = genpd->state_count - 1;
        struct gpd_link *link;
 
@@ -269,11 +276,11 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
         * cannot be met.
         */
        update_domain_next_wakeup(genpd, now);
-       if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) {
+       if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (gd->next_wakeup != KTIME_MAX)) {
                /* Let's find out the deepest domain idle state, the devices prefer */
                while (state_idx >= 0) {
                        if (next_wakeup_allows_state(genpd, state_idx, now)) {
-                               genpd->max_off_time_changed = true;
+                               gd->max_off_time_changed = true;
                                break;
                        }
                        state_idx--;
@@ -281,14 +288,14 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
 
                if (state_idx < 0) {
                        state_idx = 0;
-                       genpd->cached_power_down_ok = false;
+                       gd->cached_power_down_ok = false;
                        goto done;
                }
        }
 
-       if (!genpd->max_off_time_changed) {
-               genpd->state_idx = genpd->cached_power_down_state_idx;
-               return genpd->cached_power_down_ok;
+       if (!gd->max_off_time_changed) {
+               genpd->state_idx = gd->cached_power_down_state_idx;
+               return gd->cached_power_down_ok;
        }
 
        /*
@@ -297,12 +304,16 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
         * going to be called for any parent until this instance
         * returns.
         */
-       list_for_each_entry(link, &genpd->child_links, child_node)
-               link->parent->max_off_time_changed = true;
+       list_for_each_entry(link, &genpd->child_links, child_node) {
+               struct genpd_governor_data *pgd = link->parent->gd;
+
+               if (pgd)
+                       pgd->max_off_time_changed = true;
+       }
 
-       genpd->max_off_time_ns = -1;
-       genpd->max_off_time_changed = false;
-       genpd->cached_power_down_ok = true;
+       gd->max_off_time_ns = -1;
+       gd->max_off_time_changed = false;
+       gd->cached_power_down_ok = true;
 
        /*
         * Find a state to power down to, starting from the state
@@ -310,7 +321,7 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
         */
        while (!__default_power_down_ok(pd, state_idx)) {
                if (state_idx == 0) {
-                       genpd->cached_power_down_ok = false;
+                       gd->cached_power_down_ok = false;
                        break;
                }
                state_idx--;
@@ -318,8 +329,8 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
 
 done:
        genpd->state_idx = state_idx;
-       genpd->cached_power_down_state_idx = genpd->state_idx;
-       return genpd->cached_power_down_ok;
+       gd->cached_power_down_state_idx = genpd->state_idx;
+       return gd->cached_power_down_ok;
 }
 
 static bool default_power_down_ok(struct dev_pm_domain *pd)
@@ -327,11 +338,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
        return _default_power_down_ok(pd, ktime_get());
 }
 
-static bool always_on_power_down_ok(struct dev_pm_domain *domain)
-{
-       return false;
-}
-
 #ifdef CONFIG_CPU_IDLE
 static bool cpu_power_down_ok(struct dev_pm_domain *pd)
 {
@@ -401,6 +407,5 @@ struct dev_power_governor simple_qos_governor = {
  * pm_genpd_gov_always_on - A governor implementing an always-on policy
  */
 struct dev_power_governor pm_domain_always_on_gov = {
-       .power_down_ok = always_on_power_down_ok,
        .suspend_ok = default_suspend_ok,
 };
index d4059e6ffeaec181c63c85952339b2bbbf45a7b4..676dc72d912d1bd6f091584dd0aef331d2bbb1b6 100644 (file)
@@ -263,7 +263,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
                retval = -EINVAL;
        else if (dev->power.disable_depth > 0)
                retval = -EACCES;
-       else if (atomic_read(&dev->power.usage_count) > 0)
+       else if (atomic_read(&dev->power.usage_count))
                retval = -EAGAIN;
        else if (!dev->power.ignore_children &&
                        atomic_read(&dev->power.child_count))
@@ -1039,13 +1039,33 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 }
 EXPORT_SYMBOL_GPL(pm_schedule_suspend);
 
+static int rpm_drop_usage_count(struct device *dev)
+{
+       int ret;
+
+       ret = atomic_sub_return(1, &dev->power.usage_count);
+       if (ret >= 0)
+               return ret;
+
+       /*
+        * Because rpm_resume() does not check the usage counter, it will resume
+        * the device even if the usage counter is 0 or negative, so it is
+        * sufficient to increment the usage counter here to reverse the change
+        * made above.
+        */
+       atomic_inc(&dev->power.usage_count);
+       dev_warn(dev, "Runtime PM usage count underflow!\n");
+       return -EINVAL;
+}
+
 /**
  * __pm_runtime_idle - Entry point for runtime idle operations.
  * @dev: Device to send idle notification for.
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero.  Then carry out an idle
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error).  Then carry out an idle
  * notification, either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1057,7 +1077,10 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
        int retval;
 
        if (rpmflags & RPM_GET_PUT) {
-               if (!atomic_dec_and_test(&dev->power.usage_count)) {
+               retval = rpm_drop_usage_count(dev);
+               if (retval < 0) {
+                       return retval;
+               } else if (retval > 0) {
                        trace_rpm_usage_rcuidle(dev, rpmflags);
                        return 0;
                }
@@ -1079,7 +1102,8 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero.  Then carry out a suspend,
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error).  Then carry out a suspend,
  * either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1091,7 +1115,10 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
        int retval;
 
        if (rpmflags & RPM_GET_PUT) {
-               if (!atomic_dec_and_test(&dev->power.usage_count)) {
+               retval = rpm_drop_usage_count(dev);
+               if (retval < 0) {
+                       return retval;
+               } else if (retval > 0) {
                        trace_rpm_usage_rcuidle(dev, rpmflags);
                        return 0;
                }
@@ -1210,12 +1237,13 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 {
        struct device *parent = dev->parent;
        bool notify_parent = false;
+       unsigned long flags;
        int error = 0;
 
        if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
                return -EINVAL;
 
-       spin_lock_irq(&dev->power.lock);
+       spin_lock_irqsave(&dev->power.lock, flags);
 
        /*
         * Prevent PM-runtime from being enabled for the device or return an
@@ -1226,7 +1254,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
        else
                error = -EAGAIN;
 
-       spin_unlock_irq(&dev->power.lock);
+       spin_unlock_irqrestore(&dev->power.lock, flags);
 
        if (error)
                return error;
@@ -1247,7 +1275,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
                device_links_read_unlock(idx);
        }
 
-       spin_lock_irq(&dev->power.lock);
+       spin_lock_irqsave(&dev->power.lock, flags);
 
        if (dev->power.runtime_status == status || !parent)
                goto out_set;
@@ -1288,7 +1316,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
                dev->power.runtime_error = 0;
 
  out:
-       spin_unlock_irq(&dev->power.lock);
+       spin_unlock_irqrestore(&dev->power.lock, flags);
 
        if (notify_parent)
                pm_request_idle(parent);
@@ -1527,14 +1555,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
  */
 void pm_runtime_allow(struct device *dev)
 {
+       int ret;
+
        spin_lock_irq(&dev->power.lock);
        if (dev->power.runtime_auto)
                goto out;
 
        dev->power.runtime_auto = true;
-       if (atomic_dec_and_test(&dev->power.usage_count))
+       ret = rpm_drop_usage_count(dev);
+       if (ret == 0)
                rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
-       else
+       else if (ret > 0)
                trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
 
  out:
index a57d469676caad6e8411e75d7503282f670e2f37..11a4ffe913672457aba29d3db77a53afcfa6cb98 100644 (file)
@@ -930,6 +930,7 @@ bool pm_wakeup_pending(void)
 
        return ret || atomic_read(&pm_abort_suspend) > 0;
 }
+EXPORT_SYMBOL_GPL(pm_wakeup_pending);
 
 void pm_system_wakeup(void)
 {
index b4df36c7b17d17b301a1dab2e0aaa994a5272f61..da8996e7a1f1cdf960b01216da27464dd654473a 100644 (file)
@@ -110,6 +110,10 @@ struct regmap {
        int (*reg_write)(void *context, unsigned int reg, unsigned int val);
        int (*reg_update_bits)(void *context, unsigned int reg,
                               unsigned int mask, unsigned int val);
+       /* Bulk read/write */
+       int (*read)(void *context, const void *reg_buf, size_t reg_size,
+                   void *val_buf, size_t val_size);
+       int (*write)(void *context, const void *data, size_t count);
 
        bool defer_caching;
 
index f2469d3435ca384db5f014c031ca3bd2e4687dd9..d0f5bc8279783adcab51bd42df7775000e1dad67 100644 (file)
@@ -183,8 +183,8 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
                        return 0;
        }
 
-       if (!map->max_register)
-               map->max_register = map->num_reg_defaults_raw;
+       if (!map->max_register && map->num_reg_defaults_raw)
+               map->max_register = (map->num_reg_defaults_raw  - 1) * map->reg_stride;
 
        if (map->cache_ops->init) {
                dev_dbg(map->dev, "Initializing %s cache\n",
index 1578fb50668399c82b4bb8de6d3f43dbfe52d1cf..0328b0b34284f8e45e7e556eae408041d1e0362b 100644 (file)
@@ -40,7 +40,7 @@ static int regmap_i3c_read(void *context,
        return i3c_device_do_priv_xfers(i3c, xfers, 2);
 }
 
-static struct regmap_bus regmap_i3c = {
+static const struct regmap_bus regmap_i3c = {
        .write = regmap_i3c_write,
        .read = regmap_i3c_read,
 };
index 597042e2d0094887519c330bc11de21a8bc53fe7..986af26d88c244d3da3a277af09d70e7da202864 100644 (file)
@@ -80,7 +80,7 @@ static int regmap_sccb_write(void *context, unsigned int reg, unsigned int val)
        return i2c_smbus_write_byte_data(i2c, reg, val);
 }
 
-static struct regmap_bus regmap_sccb_bus = {
+static const struct regmap_bus regmap_sccb_bus = {
        .reg_write = regmap_sccb_write,
        .reg_read = regmap_sccb_read,
 };
index fe3ac26b66ad73a415b2fb4eb60dc067db67ccf2..388c3a087bd90b9dbdf9285d9eb442833a46a082 100644 (file)
@@ -42,7 +42,7 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va
        return 0;
 }
 
-static struct regmap_bus regmap_sdw_mbq = {
+static const struct regmap_bus regmap_sdw_mbq = {
        .reg_read = regmap_sdw_mbq_read,
        .reg_write = regmap_sdw_mbq_write,
        .reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
index 966de8a136d9004247b94d5a99bb5f8af1a26ac4..81b0327f719d87d8341237162df4bc56929c97ba 100644 (file)
@@ -30,7 +30,7 @@ static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val)
        return 0;
 }
 
-static struct regmap_bus regmap_sdw = {
+static const struct regmap_bus regmap_sdw = {
        .reg_read = regmap_sdw_read,
        .reg_write = regmap_sdw_write,
        .reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
index 0968059f1ef57be468c421865a23747fd958b30e..8075db788b39ad1a32c3723c4ba33d5d4a48475b 100644 (file)
@@ -22,7 +22,7 @@ static int regmap_slimbus_read(void *context, const void *reg, size_t reg_size,
        return slim_read(sdev, *(u16 *)reg, val_size, val);
 }
 
-static struct regmap_bus regmap_slimbus_bus = {
+static const struct regmap_bus regmap_slimbus_bus = {
        .write = regmap_slimbus_write,
        .read = regmap_slimbus_read,
        .reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
index 1fbaaad71ca5b58d1d67d2cfa3830727b8050e2b..3a8b402db8526c48d4b159200eb417b1b22f2ba1 100644 (file)
@@ -172,17 +172,17 @@ static int w1_reg_a16_v16_write(void *context, unsigned int reg,
  * Various types of supported bus addressing
  */
 
-static struct regmap_bus regmap_w1_bus_a8_v8 = {
+static const struct regmap_bus regmap_w1_bus_a8_v8 = {
        .reg_read = w1_reg_a8_v8_read,
        .reg_write = w1_reg_a8_v8_write,
 };
 
-static struct regmap_bus regmap_w1_bus_a8_v16 = {
+static const struct regmap_bus regmap_w1_bus_a8_v16 = {
        .reg_read = w1_reg_a8_v16_read,
        .reg_write = w1_reg_a8_v16_write,
 };
 
-static struct regmap_bus regmap_w1_bus_a16_v16 = {
+static const struct regmap_bus regmap_w1_bus_a16_v16 = {
        .reg_read = w1_reg_a16_v16_read,
        .reg_write = w1_reg_a16_v16_write,
 };
index 5e12f7cb51471388e33f56c6b762839d3f367a5b..2221d98638317a7b1003c5befed3b860f4f94847 100644 (file)
@@ -838,12 +838,15 @@ struct regmap *__regmap_init(struct device *dev,
                map->reg_stride_order = ilog2(map->reg_stride);
        else
                map->reg_stride_order = -1;
-       map->use_single_read = config->use_single_read || !bus || !bus->read;
-       map->use_single_write = config->use_single_write || !bus || !bus->write;
-       map->can_multi_write = config->can_multi_write && bus && bus->write;
+       map->use_single_read = config->use_single_read || !(config->read || (bus && bus->read));
+       map->use_single_write = config->use_single_write || !(config->write || (bus && bus->write));
+       map->can_multi_write = config->can_multi_write && (config->write || (bus && bus->write));
        if (bus) {
                map->max_raw_read = bus->max_raw_read;
                map->max_raw_write = bus->max_raw_write;
+       } else if (config->max_raw_read && config->max_raw_write) {
+               map->max_raw_read = config->max_raw_read;
+               map->max_raw_write = config->max_raw_write;
        }
        map->dev = dev;
        map->bus = bus;
@@ -877,7 +880,16 @@ struct regmap *__regmap_init(struct device *dev,
                map->read_flag_mask = bus->read_flag_mask;
        }
 
-       if (!bus) {
+       if (config && config->read && config->write) {
+               map->reg_read  = _regmap_bus_read;
+
+               /* Bulk read/write */
+               map->read = config->read;
+               map->write = config->write;
+
+               reg_endian = REGMAP_ENDIAN_NATIVE;
+               val_endian = REGMAP_ENDIAN_NATIVE;
+       } else if (!bus) {
                map->reg_read  = config->reg_read;
                map->reg_write = config->reg_write;
                map->reg_update_bits = config->reg_update_bits;
@@ -894,10 +906,13 @@ struct regmap *__regmap_init(struct device *dev,
        } else {
                map->reg_read  = _regmap_bus_read;
                map->reg_update_bits = bus->reg_update_bits;
-       }
+               /* Bulk read/write */
+               map->read = bus->read;
+               map->write = bus->write;
 
-       reg_endian = regmap_get_reg_endian(bus, config);
-       val_endian = regmap_get_val_endian(dev, bus, config);
+               reg_endian = regmap_get_reg_endian(bus, config);
+               val_endian = regmap_get_val_endian(dev, bus, config);
+       }
 
        switch (config->reg_bits + map->reg_shift) {
        case 2:
@@ -1671,8 +1686,6 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
        size_t len;
        int i;
 
-       WARN_ON(!map->bus);
-
        /* Check for unwritable or noinc registers in range
         * before we start
         */
@@ -1754,7 +1767,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
                val = work_val;
        }
 
-       if (map->async && map->bus->async_write) {
+       if (map->async && map->bus && map->bus->async_write) {
                struct regmap_async *async;
 
                trace_regmap_async_write_start(map, reg, val_len);
@@ -1822,11 +1835,11 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
         * write.
         */
        if (val == work_val)
-               ret = map->bus->write(map->bus_context, map->work_buf,
-                                     map->format.reg_bytes +
-                                     map->format.pad_bytes +
-                                     val_len);
-       else if (map->bus->gather_write)
+               ret = map->write(map->bus_context, map->work_buf,
+                                map->format.reg_bytes +
+                                map->format.pad_bytes +
+                                val_len);
+       else if (map->bus && map->bus->gather_write)
                ret = map->bus->gather_write(map->bus_context, map->work_buf,
                                             map->format.reg_bytes +
                                             map->format.pad_bytes,
@@ -1844,7 +1857,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
                memcpy(buf, map->work_buf, map->format.reg_bytes);
                memcpy(buf + map->format.reg_bytes + map->format.pad_bytes,
                       val, val_len);
-               ret = map->bus->write(map->bus_context, buf, len);
+               ret = map->write(map->bus_context, buf, len);
 
                kfree(buf);
        } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
@@ -1901,7 +1914,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
        struct regmap_range_node *range;
        struct regmap *map = context;
 
-       WARN_ON(!map->bus || !map->format.format_write);
+       WARN_ON(!map->format.format_write);
 
        range = _regmap_range_lookup(map, reg);
        if (range) {
@@ -1916,8 +1929,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 
        trace_regmap_hw_write_start(map, reg, 1);
 
-       ret = map->bus->write(map->bus_context, map->work_buf,
-                             map->format.buf_size);
+       ret = map->write(map->bus_context, map->work_buf, map->format.buf_size);
 
        trace_regmap_hw_write_done(map, reg, 1);
 
@@ -1937,7 +1949,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 {
        struct regmap *map = context;
 
-       WARN_ON(!map->bus || !map->format.format_val);
+       WARN_ON(!map->format.format_val);
 
        map->format.format_val(map->work_buf + map->format.reg_bytes
                               + map->format.pad_bytes, val, 0);
@@ -1951,7 +1963,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 
 static inline void *_regmap_map_get_context(struct regmap *map)
 {
-       return (map->bus) ? map : map->bus_context;
+       return (map->bus || (!map->bus && map->read)) ? map : map->bus_context;
 }
 
 int _regmap_write(struct regmap *map, unsigned int reg,
@@ -2363,7 +2375,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
        u8 = buf;
        *u8 |= map->write_flag_mask;
 
-       ret = map->bus->write(map->bus_context, buf, len);
+       ret = map->write(map->bus_context, buf, len);
 
        kfree(buf);
 
@@ -2669,9 +2681,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
        struct regmap_range_node *range;
        int ret;
 
-       WARN_ON(!map->bus);
-
-       if (!map->bus || !map->bus->read)
+       if (!map->read)
                return -EINVAL;
 
        range = _regmap_range_lookup(map, reg);
@@ -2689,9 +2699,9 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
                                      map->read_flag_mask);
        trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
 
-       ret = map->bus->read(map->bus_context, map->work_buf,
-                            map->format.reg_bytes + map->format.pad_bytes,
-                            val, val_len);
+       ret = map->read(map->bus_context, map->work_buf,
+                       map->format.reg_bytes + map->format.pad_bytes,
+                       val, val_len);
 
        trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
 
@@ -2802,8 +2812,6 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
        unsigned int v;
        int ret, i;
 
-       if (!map->bus)
-               return -EINVAL;
        if (val_len % map->format.val_bytes)
                return -EINVAL;
        if (!IS_ALIGNED(reg, map->reg_stride))
@@ -2818,7 +2826,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
                size_t chunk_count, chunk_bytes;
                size_t chunk_regs = val_count;
 
-               if (!map->bus->read) {
+               if (!map->read) {
                        ret = -ENOTSUPP;
                        goto out;
                }
@@ -2878,7 +2886,7 @@ EXPORT_SYMBOL_GPL(regmap_raw_read);
  * @val: Pointer to data buffer
  * @val_len: Length of output buffer in bytes.
  *
- * The regmap API usually assumes that bulk bus read operations will read a
+ * The regmap API usually assumes that bulk read operations will read a
  * range of registers. Some devices have certain registers for which a read
  * operation read will read from an internal FIFO.
  *
@@ -2896,10 +2904,6 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
        size_t read_len;
        int ret;
 
-       if (!map->bus)
-               return -EINVAL;
-       if (!map->bus->read)
-               return -ENOTSUPP;
        if (val_len % map->format.val_bytes)
                return -EINVAL;
        if (!IS_ALIGNED(reg, map->reg_stride))
@@ -3013,7 +3017,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
        if (val_count == 0)
                return -EINVAL;
 
-       if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
+       if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
                ret = regmap_raw_read(map, reg, val, val_bytes * val_count);
                if (ret != 0)
                        return ret;
index 84d0fcebd6af5acf15230779755d752ece8510be..749ae1246f4cf894f8544248c1461d0d06720db0 100644 (file)
@@ -244,3 +244,5 @@ void aoenet_exit(void);
 void aoenet_xmit(struct sk_buff_head *);
 int is_aoe_netif(struct net_device *ifp);
 int set_aoe_iflist(const char __user *str, size_t size);
+
+extern struct workqueue_struct *aoe_wq;
index 8a91fcac6f829bdbf8ad1709006f7dbdfb2c43b3..348adf3352177d7644489bfe973ea94d22a2f96f 100644 (file)
@@ -435,7 +435,7 @@ err_mempool:
 err:
        spin_lock_irqsave(&d->lock, flags);
        d->flags &= ~DEVFL_GD_NOW;
-       schedule_work(&d->work);
+       queue_work(aoe_wq, &d->work);
        spin_unlock_irqrestore(&d->lock, flags);
 }
 
index 384073ef2323c9ee1c53bc1fd20b5346da159c25..d7317425be510d1c3d4bbac5a18fba2ea8e76c1d 100644 (file)
@@ -968,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
                d->flags |= DEVFL_NEWSIZE;
        else
                d->flags |= DEVFL_GDALLOC;
-       schedule_work(&d->work);
+       queue_work(aoe_wq, &d->work);
 }
 
 static void
index c5753c6bfe8041213f772654b27da2d9938d2ad3..b381d1c3ef327502b9eece7bd37127d12152fd30 100644 (file)
@@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
                        specified = 1;
        }
 
-       flush_scheduled_work();
+       flush_workqueue(aoe_wq);
        /* pass one: do aoedev_downdev, which might sleep */
 restart1:
        spin_lock_irqsave(&devlist_lock, flags);
@@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
 void
 aoedev_exit(void)
 {
-       flush_scheduled_work();
+       flush_workqueue(aoe_wq);
        flush(NULL, 0, EXITING);
 }
 
index 1e4e2971171caf5c0cafd798fe93bc83c439e0e1..6238c4c87cfc73a58e81d3648457f2b95e1541db 100644 (file)
@@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
 MODULE_VERSION(VERSION);
 
 static struct timer_list timer;
+struct workqueue_struct *aoe_wq;
 
 static void discover_timer(struct timer_list *t)
 {
@@ -35,6 +36,7 @@ aoe_exit(void)
        aoechr_exit();
        aoedev_exit();
        aoeblk_exit();          /* free cache after de-allocating bufs */
+       destroy_workqueue(aoe_wq);
 }
 
 static int __init
@@ -42,9 +44,13 @@ aoe_init(void)
 {
        int ret;
 
+       aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
+       if (!aoe_wq)
+               return -ENOMEM;
+
        ret = aoedev_init();
        if (ret)
-               return ret;
+               goto dev_fail;
        ret = aoechr_init();
        if (ret)
                goto chr_fail;
@@ -77,6 +83,8 @@ aoe_init(void)
        aoechr_exit();
  chr_fail:
        aoedev_exit();
+ dev_fail:
+       destroy_workqueue(aoe_wq);
 
        printk(KERN_INFO "aoe: initialisation failure.\n");
        return ret;
index df25eecf80af00d99e31519a7bef401035ec2074..9e060e49b3f8c7233b70b5c32e432c6de272e343 100644 (file)
@@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
                }
        }
 
-       want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+       want = PFN_UP(words*sizeof(long));
        have = b->bm_number_of_pages;
        if (want == have) {
                D_ASSERT(device, b->bm_pages != NULL);
index 4b0b25cc916eed144be0c712034a39ec69304ed3..2887350ae010a78c5c5949c2a61af2eb746ed1d2 100644 (file)
@@ -903,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
        }
 }
 
-/* communicated if (agreed_features & DRBD_FF_WSAME) */
-static void
-assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
-                                       struct request_queue *q)
-{
-       if (q) {
-               p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
-               p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
-               p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
-               p->qlim->io_min = cpu_to_be32(queue_io_min(q));
-               p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
-               p->qlim->discard_enabled = blk_queue_discard(q);
-               p->qlim->write_same_capable = 0;
-       } else {
-               q = device->rq_queue;
-               p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
-               p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
-               p->qlim->alignment_offset = 0;
-               p->qlim->io_min = cpu_to_be32(queue_io_min(q));
-               p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
-               p->qlim->discard_enabled = 0;
-               p->qlim->write_same_capable = 0;
-       }
-}
-
 int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
 {
        struct drbd_device *device = peer_device->device;
@@ -949,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
 
        memset(p, 0, packet_size);
        if (get_ldev_if_state(device, D_NEGOTIATING)) {
-               struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+               struct block_device *bdev = device->ldev->backing_bdev;
+               struct request_queue *q = bdev_get_queue(bdev);
+
                d_size = drbd_get_max_capacity(device->ldev);
                rcu_read_lock();
                u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
@@ -957,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
                q_order_type = drbd_queue_order_type(device);
                max_bio_size = queue_max_hw_sectors(q) << 9;
                max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
-               assign_p_sizes_qlim(device, p, q);
+               p->qlim->physical_block_size =
+                       cpu_to_be32(bdev_physical_block_size(bdev));
+               p->qlim->logical_block_size =
+                       cpu_to_be32(bdev_logical_block_size(bdev));
+               p->qlim->alignment_offset =
+                       cpu_to_be32(bdev_alignment_offset(bdev));
+               p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev));
+               p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev));
+               p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev);
                put_ldev(device);
        } else {
+               struct request_queue *q = device->rq_queue;
+
+               p->qlim->physical_block_size =
+                       cpu_to_be32(queue_physical_block_size(q));
+               p->qlim->logical_block_size =
+                       cpu_to_be32(queue_logical_block_size(q));
+               p->qlim->alignment_offset = 0;
+               p->qlim->io_min = cpu_to_be32(queue_io_min(q));
+               p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
+               p->qlim->discard_enabled = 0;
+
                d_size = 0;
                u_size = 0;
                q_order_type = QUEUE_ORDERED_NONE;
                max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
-               assign_p_sizes_qlim(device, p, NULL);
        }
 
        if (peer_device->connection->agreed_pro_version <= 94)
@@ -3586,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
         * when we want to support more than
         * one PRO_VERSION */
        static const char *cmdnames[] = {
+
                [P_DATA]                = "Data",
-               [P_WSAME]               = "WriteSame",
-               [P_TRIM]                = "Trim",
                [P_DATA_REPLY]          = "DataReply",
                [P_RS_DATA_REPLY]       = "RSDataReply",
                [P_BARRIER]             = "Barrier",
@@ -3599,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
                [P_DATA_REQUEST]        = "DataRequest",
                [P_RS_DATA_REQUEST]     = "RSDataRequest",
                [P_SYNC_PARAM]          = "SyncParam",
-               [P_SYNC_PARAM89]        = "SyncParam89",
                [P_PROTOCOL]            = "ReportProtocol",
                [P_UUIDS]               = "ReportUUIDs",
                [P_SIZES]               = "ReportSizes",
@@ -3607,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
                [P_SYNC_UUID]           = "ReportSyncUUID",
                [P_AUTH_CHALLENGE]      = "AuthChallenge",
                [P_AUTH_RESPONSE]       = "AuthResponse",
+               [P_STATE_CHG_REQ]       = "StateChgRequest",
                [P_PING]                = "Ping",
                [P_PING_ACK]            = "PingAck",
                [P_RECV_ACK]            = "RecvAck",
@@ -3617,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
                [P_NEG_DREPLY]          = "NegDReply",
                [P_NEG_RS_DREPLY]       = "NegRSDReply",
                [P_BARRIER_ACK]         = "BarrierAck",
-               [P_STATE_CHG_REQ]       = "StateChgRequest",
                [P_STATE_CHG_REPLY]     = "StateChgReply",
                [P_OV_REQUEST]          = "OVRequest",
                [P_OV_REPLY]            = "OVReply",
                [P_OV_RESULT]           = "OVResult",
                [P_CSUM_RS_REQUEST]     = "CsumRSRequest",
                [P_RS_IS_IN_SYNC]       = "CsumRSIsInSync",
+               [P_SYNC_PARAM89]        = "SyncParam89",
                [P_COMPRESSED_BITMAP]   = "CBitmap",
                [P_DELAY_PROBE]         = "DelayProbe",
                [P_OUT_OF_SYNC]         = "OutOfSync",
-               [P_RETRY_WRITE]         = "RetryWrite",
                [P_RS_CANCEL]           = "RSCancel",
                [P_CONN_ST_CHG_REQ]     = "conn_st_chg_req",
                [P_CONN_ST_CHG_REPLY]   = "conn_st_chg_reply",
                [P_PROTOCOL_UPDATE]     = "protocol_update",
+               [P_TRIM]                = "Trim",
                [P_RS_THIN_REQ]         = "rs_thin_req",
                [P_RS_DEALLOCATED]      = "rs_deallocated",
+               [P_WSAME]               = "WriteSame",
+               [P_ZEROES]              = "Zeroes",
 
                /* enum drbd_packet, but not commands - obsoleted flags:
                 *      P_MAY_IGNORE
index b7216c186ba4d731c6212f30751b435805a9fcb5..013d355a2033bd61f7e20650bf4e64e301ccd479 100644 (file)
@@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
        struct set_role_parms parms;
        int err;
        enum drbd_ret_code retcode;
+       enum drbd_state_rv rv;
 
        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
        if (!adm_ctx.reply_skb)
@@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
        mutex_lock(&adm_ctx.resource->adm_mutex);
 
        if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
-               retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
-                                               R_PRIMARY, parms.assume_uptodate);
+               rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
        else
-               retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
-                                               R_SECONDARY, 0);
+               rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 
        mutex_unlock(&adm_ctx.resource->adm_mutex);
        genl_lock();
+       drbd_adm_finish(&adm_ctx, info, rv);
+       return 0;
 out:
        drbd_adm_finish(&adm_ctx, info, retcode);
        return 0;
@@ -1204,50 +1205,40 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
 }
 
 static void decide_on_discard_support(struct drbd_device *device,
-                       struct request_queue *q,
-                       struct request_queue *b,
-                       bool discard_zeroes_if_aligned)
+               struct drbd_backing_dev *bdev)
 {
-       /* q = drbd device queue (device->rq_queue)
-        * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
-        *     or NULL if diskless
-        */
-       struct drbd_connection *connection = first_peer_device(device)->connection;
-       bool can_do = b ? blk_queue_discard(b) : true;
-
-       if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
-               can_do = false;
-               drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
-       }
-       if (can_do) {
-               /* We don't care for the granularity, really.
-                * Stacking limits below should fix it for the local
-                * device.  Whether or not it is a suitable granularity
-                * on the remote device is not our problem, really. If
-                * you care, you need to use devices with similar
-                * topology on all peers. */
-               blk_queue_discard_granularity(q, 512);
-               q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
-               q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
-       } else {
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
-               blk_queue_discard_granularity(q, 0);
-               q->limits.max_discard_sectors = 0;
-               q->limits.max_write_zeroes_sectors = 0;
-       }
-}
+       struct drbd_connection *connection =
+               first_peer_device(device)->connection;
+       struct request_queue *q = device->rq_queue;
 
-static void fixup_discard_if_not_supported(struct request_queue *q)
-{
-       /* To avoid confusion, if this queue does not support discard, clear
-        * max_discard_sectors, which is what lsblk -D reports to the user.
-        * Older kernels got this wrong in "stack limits".
-        * */
-       if (!blk_queue_discard(q)) {
-               blk_queue_max_discard_sectors(q, 0);
-               blk_queue_discard_granularity(q, 0);
+       if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
+               goto not_supported;
+
+       if (connection->cstate >= C_CONNECTED &&
+           !(connection->agreed_features & DRBD_FF_TRIM)) {
+               drbd_info(connection,
+                       "peer DRBD too old, does not support TRIM: disabling discards\n");
+               goto not_supported;
        }
+
+       /*
+        * We don't care for the granularity, really.
+        *
+        * Stacking limits below should fix it for the local device.  Whether or
+        * not it is a suitable granularity on the remote device is not our
+        * problem, really. If you care, you need to use devices with similar
+        * topology on all peers.
+        */
+       blk_queue_discard_granularity(q, 512);
+       q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
+       q->limits.max_write_zeroes_sectors =
+               drbd_max_discard_sectors(connection);
+       return;
+
+not_supported:
+       blk_queue_discard_granularity(q, 0);
+       q->limits.max_discard_sectors = 0;
+       q->limits.max_write_zeroes_sectors = 0;
 }
 
 static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
@@ -1273,7 +1264,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
        unsigned int max_segments = 0;
        struct request_queue *b = NULL;
        struct disk_conf *dc;
-       bool discard_zeroes_if_aligned = true;
 
        if (bdev) {
                b = bdev->backing_bdev->bd_disk->queue;
@@ -1282,7 +1272,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
                rcu_read_lock();
                dc = rcu_dereference(device->ldev->disk_conf);
                max_segments = dc->max_bio_bvecs;
-               discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
                rcu_read_unlock();
 
                blk_set_stacking_limits(&q->limits);
@@ -1292,13 +1281,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
        /* This is the workaround for "bio would need to, but cannot, be split" */
        blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
        blk_queue_segment_boundary(q, PAGE_SIZE-1);
-       decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
+       decide_on_discard_support(device, bdev);
 
        if (b) {
                blk_stack_limits(&q->limits, &b->limits, 0);
                disk_update_readahead(device->vdisk);
        }
-       fixup_discard_if_not_supported(q);
        fixup_write_zeroes(device, q);
 }
 
@@ -1437,14 +1425,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
 static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
                               struct drbd_backing_dev *nbc)
 {
-       struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
+       struct block_device *bdev = nbc->backing_bdev;
 
        if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
                disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
        if (disk_conf->al_extents > drbd_al_extents_max(nbc))
                disk_conf->al_extents = drbd_al_extents_max(nbc);
 
-       if (!blk_queue_discard(q)) {
+       if (!bdev_max_discard_sectors(bdev)) {
                if (disk_conf->rs_discard_granularity) {
                        disk_conf->rs_discard_granularity = 0; /* disable feature */
                        drbd_info(device, "rs_discard_granularity feature disabled\n");
@@ -1453,16 +1441,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
 
        if (disk_conf->rs_discard_granularity) {
                int orig_value = disk_conf->rs_discard_granularity;
+               sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
+               unsigned int discard_granularity = bdev_discard_granularity(bdev);
                int remainder;
 
-               if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
-                       disk_conf->rs_discard_granularity = q->limits.discard_granularity;
+               if (discard_granularity > disk_conf->rs_discard_granularity)
+                       disk_conf->rs_discard_granularity = discard_granularity;
 
-               remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
+               remainder = disk_conf->rs_discard_granularity %
+                               discard_granularity;
                disk_conf->rs_discard_granularity += remainder;
 
-               if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
-                       disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
+               if (disk_conf->rs_discard_granularity > discard_size)
+                       disk_conf->rs_discard_granularity = discard_size;
 
                if (disk_conf->rs_discard_granularity != orig_value)
                        drbd_info(device, "rs_discard_granularity changed to %d\n",
@@ -1611,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
                        drbd_send_sync_param(peer_device);
        }
 
-       synchronize_rcu();
-       kfree(old_disk_conf);
+       kvfree_rcu(old_disk_conf);
        kfree(old_plan);
        mod_timer(&device->request_timer, jiffies + HZ);
        goto success;
@@ -2443,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 
        mutex_unlock(&connection->resource->conf_update);
        mutex_unlock(&connection->data.mutex);
-       synchronize_rcu();
-       kfree(old_net_conf);
+       kvfree_rcu(old_net_conf);
 
        if (connection->cstate >= C_WF_REPORT_PARAMS) {
                struct drbd_peer_device *peer_device;
@@ -2502,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        struct drbd_resource *resource;
        struct drbd_connection *connection;
        enum drbd_ret_code retcode;
+       enum drbd_state_rv rv;
        int i;
        int err;
 
@@ -2621,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        }
        rcu_read_unlock();
 
-       retcode = (enum drbd_ret_code)conn_request_state(connection,
-                                       NS(conn, C_UNCONNECTED), CS_VERBOSE);
+       rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
        conn_reconfig_done(connection);
        mutex_unlock(&adm_ctx.resource->adm_mutex);
-       drbd_adm_finish(&adm_ctx, info, retcode);
+       drbd_adm_finish(&adm_ctx, info, rv);
        return 0;
 
 fail:
@@ -2734,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 
        mutex_lock(&adm_ctx.resource->adm_mutex);
        rv = conn_try_disconnect(connection, parms.force_disconnect);
-       if (rv < SS_SUCCESS)
-               retcode = (enum drbd_ret_code)rv;
-       else
-               retcode = NO_ERROR;
        mutex_unlock(&adm_ctx.resource->adm_mutex);
+       if (rv < SS_SUCCESS) {
+               drbd_adm_finish(&adm_ctx, info, rv);
+               return 0;
+       }
+       retcode = NO_ERROR;
  fail:
        drbd_adm_finish(&adm_ctx, info, retcode);
        return 0;
@@ -2857,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
                new_disk_conf->disk_size = (sector_t)rs.resize_size;
                rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
                mutex_unlock(&device->resource->conf_update);
-               synchronize_rcu();
-               kfree(old_disk_conf);
+               kvfree_rcu(old_disk_conf);
                new_disk_conf = NULL;
        }
 
index 08da922f81d1d75c69552dd07207108a8cd225e8..6762be53f40937b60e95f86dca9c904a47c0a7ba 100644 (file)
@@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
        struct drbd_device *device = peer_device->device;
        struct drbd_peer_request *peer_req;
        struct page *page = NULL;
-       unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+       unsigned int nr_pages = PFN_UP(payload_size);
 
        if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
                return NULL;
@@ -1511,7 +1511,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
 {
        struct block_device *bdev = device->ldev->backing_bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
        sector_t tmp, nr;
        unsigned int max_discard_sectors, granularity;
        int alignment;
@@ -1521,10 +1520,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
                goto zero_out;
 
        /* Zero-sector (unknown) and one-sector granularities are the same.  */
-       granularity = max(q->limits.discard_granularity >> 9, 1U);
+       granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
        alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
 
-       max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+       max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
        max_discard_sectors -= max_discard_sectors % granularity;
        if (unlikely(!max_discard_sectors))
                goto zero_out;
@@ -1548,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
                start = tmp;
        }
        while (nr_sectors >= max_discard_sectors) {
-               err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
+               err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
+                                           GFP_NOIO);
                nr_sectors -= max_discard_sectors;
                start += max_discard_sectors;
        }
@@ -1560,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
                nr = nr_sectors;
                nr -= (unsigned int)nr % granularity;
                if (nr) {
-                       err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+                       err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
                        nr_sectors -= nr;
                        start += nr;
                }
@@ -1575,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
 
 static bool can_do_reliable_discards(struct drbd_device *device)
 {
-       struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
        struct disk_conf *dc;
        bool can_do;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
                return false;
 
        rcu_read_lock();
@@ -1629,9 +1628,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
        struct bio *bio;
        struct page *page = peer_req->pages;
        sector_t sector = peer_req->i.sector;
-       unsigned data_size = peer_req->i.size;
-       unsigned n_bios = 0;
-       unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+       unsigned int data_size = peer_req->i.size;
+       unsigned int n_bios = 0;
+       unsigned int nr_pages = PFN_UP(data_size);
 
        /* TRIM/DISCARD: for now, always use the helper function
         * blkdev_issue_zeroout(..., discard=true).
@@ -3751,8 +3750,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
                drbd_info(connection, "peer data-integrity-alg: %s\n",
                          integrity_alg[0] ? integrity_alg : "(none)");
 
-       synchronize_rcu();
-       kfree(old_net_conf);
+       kvfree_rcu(old_net_conf);
        return 0;
 
 disconnect_rcu_unlock:
@@ -3903,7 +3901,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
                                drbd_err(device, "verify-alg of wrong size, "
                                        "peer wants %u, accepting only up to %u byte\n",
                                        data_size, SHARED_SECRET_MAX);
-                               err = -EIO;
                                goto reconnect;
                        }
 
@@ -4121,8 +4118,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
                        rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
                        mutex_unlock(&connection->resource->conf_update);
-                       synchronize_rcu();
-                       kfree(old_disk_conf);
+                       kvfree_rcu(old_disk_conf);
 
                        drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
                                 (unsigned long)p_usize, (unsigned long)my_usize);
index 75be0e16770a090f99e1573140520a30651b54ff..e64bcfba30ef371aa075ca94e05bda2a439e2ec8 100644 (file)
@@ -922,7 +922,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
 
        switch (rbm) {
        case RB_CONGESTED_REMOTE:
-               return 0;
+               return false;
        case RB_LEAST_PENDING:
                return atomic_read(&device->local_cnt) >
                        atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
index 4ee11aef6672b8992a19f262284043a912cf5157..3f7bf9f2d874c71ab771c4d0ff594a695df20815 100644 (file)
@@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
                conn_free_crypto(connection);
                mutex_unlock(&connection->resource->conf_update);
 
-               synchronize_rcu();
-               kfree(old_conf);
+               kvfree_rcu(old_conf);
        }
 
        if (ns_max.susp_fen) {
index 0f9956f4e9c4233cc82f3e87affba0e2b46e0d9a..af3051dd8912d02e8ed9e69da6a413fdd732be2d 100644 (file)
@@ -1030,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
 {
        if (drbd_peer_req_has_active_page(peer_req)) {
                /* This might happen if sendpage() has not finished */
-               int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
+               int i = PFN_UP(peer_req->i.size);
                atomic_add(i, &device->pp_in_use_by_net);
                atomic_sub(i, &device->pp_in_use);
                spin_lock_irq(&device->resource->req_lock);
index a58595f5ee2c8f450a03b651fd6650ddc79f5e0a..e2cb51810e89aa7455ff533d11fd32cf449e06e4 100644 (file)
@@ -1,54 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- *  linux/drivers/block/loop.c
- *
- *  Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
- * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Make real block number available to downstream transfer functions, enables
- * CBC (and relatives) mode encryption requiring unique IVs per data block.
- * Reed H. Petty, rhp@draper.net
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations write_begin is not available on the backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
+ * Copyright 1993 by Theodore Ts'o.
  */
-
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
@@ -59,7 +12,6 @@
 #include <linux/errno.h>
 #include <linux/major.h>
 #include <linux/wait.h>
-#include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/init.h>
 #include <linux/swap.h>
 #include <linux/blk-cgroup.h>
 #include <linux/sched/mm.h>
 #include <linux/statfs.h>
+#include <linux/uaccess.h>
+#include <linux/blk-mq.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/loop.h>
+
+/* Possible states of device */
+enum {
+       Lo_unbound,
+       Lo_bound,
+       Lo_rundown,
+       Lo_deleting,
+};
 
-#include "loop.h"
+struct loop_func_table;
+
+struct loop_device {
+       int             lo_number;
+       loff_t          lo_offset;
+       loff_t          lo_sizelimit;
+       int             lo_flags;
+       char            lo_file_name[LO_NAME_SIZE];
+
+       struct file *   lo_backing_file;
+       struct block_device *lo_device;
+
+       gfp_t           old_gfp_mask;
+
+       spinlock_t              lo_lock;
+       int                     lo_state;
+       spinlock_t              lo_work_lock;
+       struct workqueue_struct *workqueue;
+       struct work_struct      rootcg_work;
+       struct list_head        rootcg_cmd_list;
+       struct list_head        idle_worker_list;
+       struct rb_root          worker_tree;
+       struct timer_list       timer;
+       bool                    use_dio;
+       bool                    sysfs_inited;
+
+       struct request_queue    *lo_queue;
+       struct blk_mq_tag_set   tag_set;
+       struct gendisk          *lo_disk;
+       struct mutex            lo_mutex;
+       bool                    idr_visible;
+};
 
-#include <linux/uaccess.h>
+struct loop_cmd {
+       struct list_head list_entry;
+       bool use_aio; /* use AIO interface to handle I/O */
+       atomic_t ref; /* only for aio */
+       long ret;
+       struct kiocb iocb;
+       struct bio_vec *bvec;
+       struct cgroup_subsys_state *blkcg_css;
+       struct cgroup_subsys_state *memcg_css;
+};
 
 #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
 #define LOOP_DEFAULT_HW_Q_DEPTH (128)
@@ -314,15 +318,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
 
        mode |= FALLOC_FL_KEEP_SIZE;
 
-       if (!blk_queue_discard(lo->lo_queue)) {
-               ret = -EOPNOTSUPP;
-               goto out;
-       }
+       if (!bdev_max_discard_sectors(lo->lo_device))
+               return -EOPNOTSUPP;
 
        ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
        if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
-               ret = -EIO;
- out:
+               return -EIO;
        return ret;
 }
 
@@ -572,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
        if (!file)
                return -EBADF;
+
+       /* suppress uevents while reconfiguring the device */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
        is_loop = is_loop_device(file);
        error = loop_global_lock_killable(lo, is_loop);
        if (error)
@@ -626,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
        fput(old_file);
        if (partscan)
                loop_reread_partitions(lo);
-       return 0;
+
+       error = 0;
+done:
+       /* enable and uncork uevent now that we are done */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+       return error;
 
 out_err:
        loop_global_unlock(lo, is_loop);
 out_putf:
        fput(file);
-       return error;
+       goto done;
 }
 
 /* loop sysfs attributes */
@@ -762,7 +772,7 @@ static void loop_config_discard(struct loop_device *lo)
                struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
 
                max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
-               granularity = backingq->limits.discard_granularity ?:
+               granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
                        queue_physical_block_size(backingq);
 
        /*
@@ -787,14 +797,11 @@ static void loop_config_discard(struct loop_device *lo)
                q->limits.discard_granularity = granularity;
                blk_queue_max_discard_sectors(q, max_discard_sectors);
                blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
        } else {
                q->limits.discard_granularity = 0;
                blk_queue_max_discard_sectors(q, 0);
                blk_queue_max_write_zeroes_sectors(q, 0);
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
        }
-       q->limits.discard_alignment = 0;
 }
 
 struct loop_worker {
@@ -808,8 +815,6 @@ struct loop_worker {
 };
 
 static void loop_workfn(struct work_struct *work);
-static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
 
 #ifdef CONFIG_BLK_CGROUP
 static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -893,6 +898,39 @@ queue_work:
        spin_unlock_irq(&lo->lo_work_lock);
 }
 
+static void loop_set_timer(struct loop_device *lo)
+{
+       timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
+{
+       struct loop_worker *pos, *worker;
+
+       spin_lock_irq(&lo->lo_work_lock);
+       list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+                               idle_list) {
+               if (!delete_all &&
+                   time_is_after_jiffies(worker->last_ran_at +
+                                         LOOP_IDLE_WORKER_TIMEOUT))
+                       break;
+               list_del(&worker->idle_list);
+               rb_erase(&worker->rb_node, &lo->worker_tree);
+               css_put(worker->blkcg_css);
+               kfree(worker);
+       }
+       if (!list_empty(&lo->idle_worker_list))
+               loop_set_timer(lo);
+       spin_unlock_irq(&lo->lo_work_lock);
+}
+
+static void loop_free_idle_workers_timer(struct timer_list *timer)
+{
+       struct loop_device *lo = container_of(timer, struct loop_device, timer);
+
+       return loop_free_idle_workers(lo, false);
+}
+
 static void loop_update_rotational(struct loop_device *lo)
 {
        struct file *file = lo->lo_backing_file;
@@ -903,7 +941,7 @@ static void loop_update_rotational(struct loop_device *lo)
 
        /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
        if (file_bdev)
-               nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
+               nonrot = bdev_nonrot(file_bdev);
 
        if (nonrot)
                blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@ -967,6 +1005,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
        /* This is safe, since we have a reference from open(). */
        __module_get(THIS_MODULE);
 
+       /* suppress uevents while reconfiguring the device */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
        /*
         * If we don't hold exclusive handle for the device, upgrade to it
         * here to avoid changing device under exclusive owner.
@@ -1011,24 +1052,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
            !file->f_op->write_iter)
                lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-       lo->workqueue = alloc_workqueue("loop%d",
-                                       WQ_UNBOUND | WQ_FREEZABLE,
-                                       0,
-                                       lo->lo_number);
        if (!lo->workqueue) {
-               error = -ENOMEM;
-               goto out_unlock;
+               lo->workqueue = alloc_workqueue("loop%d",
+                                               WQ_UNBOUND | WQ_FREEZABLE,
+                                               0, lo->lo_number);
+               if (!lo->workqueue) {
+                       error = -ENOMEM;
+                       goto out_unlock;
+               }
        }
 
        disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
        set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
-       INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
-       INIT_LIST_HEAD(&lo->rootcg_cmd_list);
-       INIT_LIST_HEAD(&lo->idle_worker_list);
-       lo->worker_tree = RB_ROOT;
-       timer_setup(&lo->timer, loop_free_idle_workers,
-               TIMER_DEFERRABLE);
        lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
        lo->lo_device = bdev;
        lo->lo_backing_file = file;
@@ -1073,7 +1109,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
                loop_reread_partitions(lo);
        if (!(mode & FMODE_EXCL))
                bd_abort_claiming(bdev, loop_configure);
-       return 0;
+
+       error = 0;
+done:
+       /* enable and uncork uevent now that we are done */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+       return error;
 
 out_unlock:
        loop_global_unlock(lo, is_loop);
@@ -1084,53 +1125,24 @@ out_putf:
        fput(file);
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
-       return error;
+       goto done;
 }
 
 static void __loop_clr_fd(struct loop_device *lo, bool release)
 {
        struct file *filp;
        gfp_t gfp = lo->old_gfp_mask;
-       struct loop_worker *pos, *worker;
-
-       /*
-        * Flush loop_configure() and loop_change_fd(). It is acceptable for
-        * loop_validate_file() to succeed, for actual clear operation has not
-        * started yet.
-        */
-       mutex_lock(&loop_validate_mutex);
-       mutex_unlock(&loop_validate_mutex);
-       /*
-        * loop_validate_file() now fails because l->lo_state != Lo_bound
-        * became visible.
-        */
-
-       /*
-        * Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
-        * after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
-        * lo->lo_state has changed while waiting for lo->lo_mutex.
-        */
-       mutex_lock(&lo->lo_mutex);
-       BUG_ON(lo->lo_state != Lo_rundown);
-       mutex_unlock(&lo->lo_mutex);
 
        if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
                blk_queue_write_cache(lo->lo_queue, false, false);
 
-       /* freeze request queue during the transition */
-       blk_mq_freeze_queue(lo->lo_queue);
-
-       destroy_workqueue(lo->workqueue);
-       spin_lock_irq(&lo->lo_work_lock);
-       list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
-                               idle_list) {
-               list_del(&worker->idle_list);
-               rb_erase(&worker->rb_node, &lo->worker_tree);
-               css_put(worker->blkcg_css);
-               kfree(worker);
-       }
-       spin_unlock_irq(&lo->lo_work_lock);
-       del_timer_sync(&lo->timer);
+       /*
+        * Freeze the request queue when unbinding on a live file descriptor and
+        * thus an open device.  When called from ->release we are guaranteed
+        * that there is no I/O in progress already.
+        */
+       if (!release)
+               blk_mq_freeze_queue(lo->lo_queue);
 
        spin_lock_irq(&lo->lo_lock);
        filp = lo->lo_backing_file;
@@ -1151,7 +1163,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
        mapping_set_gfp_mask(filp->f_mapping, gfp);
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
-       blk_mq_unfreeze_queue(lo->lo_queue);
+       if (!release)
+               blk_mq_unfreeze_queue(lo->lo_queue);
 
        disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
 
@@ -1202,11 +1215,20 @@ static int loop_clr_fd(struct loop_device *lo)
 {
        int err;
 
-       err = mutex_lock_killable(&lo->lo_mutex);
+       /*
+        * Since lo_ioctl() is called without locks held, it is possible that
+        * loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
+        *
+        * Therefore, use global lock when setting Lo_rundown state in order to
+        * make sure that loop_validate_file() will fail if the "struct file"
+        * which loop_configure()/loop_change_fd() found via fget() was this
+        * loop device.
+        */
+       err = loop_global_lock_killable(lo, true);
        if (err)
                return err;
        if (lo->lo_state != Lo_bound) {
-               mutex_unlock(&lo->lo_mutex);
+               loop_global_unlock(lo, true);
                return -ENXIO;
        }
        /*
@@ -1219,13 +1241,13 @@ static int loop_clr_fd(struct loop_device *lo)
         * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
         * command to fail with EBUSY.
         */
-       if (atomic_read(&lo->lo_refcnt) > 1) {
+       if (disk_openers(lo->lo_disk) > 1) {
                lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
-               mutex_unlock(&lo->lo_mutex);
+               loop_global_unlock(lo, true);
                return 0;
        }
        lo->lo_state = Lo_rundown;
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, true);
 
        __loop_clr_fd(lo, false);
        return 0;
@@ -1257,15 +1279,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
        /* I/O need to be drained during transfer transition */
        blk_mq_freeze_queue(lo->lo_queue);
 
-       if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
-               /* If any pages were dirtied after invalidate_bdev(), try again */
-               err = -EAGAIN;
-               pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
-                       __func__, lo->lo_number, lo->lo_file_name,
-                       lo->lo_device->bd_inode->i_mapping->nrpages);
-               goto out_unfreeze;
-       }
-
        prev_lo_flags = lo->lo_flags;
 
        err = loop_set_status_from_info(lo, info);
@@ -1476,21 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
        invalidate_bdev(lo->lo_device);
 
        blk_mq_freeze_queue(lo->lo_queue);
-
-       /* invalidate_bdev should have truncated all the pages */
-       if (lo->lo_device->bd_inode->i_mapping->nrpages) {
-               err = -EAGAIN;
-               pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
-                       __func__, lo->lo_number, lo->lo_file_name,
-                       lo->lo_device->bd_inode->i_mapping->nrpages);
-               goto out_unfreeze;
-       }
-
        blk_queue_logical_block_size(lo->lo_queue, arg);
        blk_queue_physical_block_size(lo->lo_queue, arg);
        blk_queue_io_min(lo->lo_queue, arg);
        loop_update_dio(lo);
-out_unfreeze:
        blk_mq_unfreeze_queue(lo->lo_queue);
 
        return err;
@@ -1720,33 +1722,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 }
 #endif
 
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
-       struct loop_device *lo = bdev->bd_disk->private_data;
-       int err;
-
-       err = mutex_lock_killable(&lo->lo_mutex);
-       if (err)
-               return err;
-       if (lo->lo_state == Lo_deleting)
-               err = -ENXIO;
-       else
-               atomic_inc(&lo->lo_refcnt);
-       mutex_unlock(&lo->lo_mutex);
-       return err;
-}
-
 static void lo_release(struct gendisk *disk, fmode_t mode)
 {
        struct loop_device *lo = disk->private_data;
 
-       mutex_lock(&lo->lo_mutex);
-       if (atomic_dec_return(&lo->lo_refcnt))
-               goto out_unlock;
+       if (disk_openers(disk) > 0)
+               return;
 
-       if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
-               if (lo->lo_state != Lo_bound)
-                       goto out_unlock;
+       mutex_lock(&lo->lo_mutex);
+       if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
                lo->lo_state = Lo_rundown;
                mutex_unlock(&lo->lo_mutex);
                /*
@@ -1755,27 +1739,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
                 */
                __loop_clr_fd(lo, true);
                return;
-       } else if (lo->lo_state == Lo_bound) {
-               /*
-                * Otherwise keep thread (if running) and config,
-                * but flush possible ongoing bios in thread.
-                */
-               blk_mq_freeze_queue(lo->lo_queue);
-               blk_mq_unfreeze_queue(lo->lo_queue);
        }
-
-out_unlock:
        mutex_unlock(&lo->lo_mutex);
 }
 
+static void lo_free_disk(struct gendisk *disk)
+{
+       struct loop_device *lo = disk->private_data;
+
+       if (lo->workqueue)
+               destroy_workqueue(lo->workqueue);
+       loop_free_idle_workers(lo, true);
+       del_timer_sync(&lo->timer);
+       mutex_destroy(&lo->lo_mutex);
+       kfree(lo);
+}
+
 static const struct block_device_operations lo_fops = {
        .owner =        THIS_MODULE,
-       .open =         lo_open,
        .release =      lo_release,
        .ioctl =        lo_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = lo_compat_ioctl,
 #endif
+       .free_disk =    lo_free_disk,
 };
 
 /*
@@ -1834,12 +1821,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
        cmd->blkcg_css = NULL;
        cmd->memcg_css = NULL;
 #ifdef CONFIG_BLK_CGROUP
-       if (rq->bio && rq->bio->bi_blkg) {
-               cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+       if (rq->bio) {
+               cmd->blkcg_css = bio_blkcg_css(rq->bio);
 #ifdef CONFIG_MEMCG
-               cmd->memcg_css =
-                       cgroup_get_e_css(cmd->blkcg_css->cgroup,
-                                       &memory_cgrp_subsys);
+               if (cmd->blkcg_css) {
+                       cmd->memcg_css =
+                               cgroup_get_e_css(cmd->blkcg_css->cgroup,
+                                               &memory_cgrp_subsys);
+               }
 #endif
        }
 #endif
@@ -1888,11 +1877,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
        }
 }
 
-static void loop_set_timer(struct loop_device *lo)
-{
-       timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
 static void loop_process_work(struct loop_worker *worker,
                        struct list_head *cmd_list, struct loop_device *lo)
 {
@@ -1941,27 +1925,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
        loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
 }
 
-static void loop_free_idle_workers(struct timer_list *timer)
-{
-       struct loop_device *lo = container_of(timer, struct loop_device, timer);
-       struct loop_worker *pos, *worker;
-
-       spin_lock_irq(&lo->lo_work_lock);
-       list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
-                               idle_list) {
-               if (time_is_after_jiffies(worker->last_ran_at +
-                                               LOOP_IDLE_WORKER_TIMEOUT))
-                       break;
-               list_del(&worker->idle_list);
-               rb_erase(&worker->rb_node, &lo->worker_tree);
-               css_put(worker->blkcg_css);
-               kfree(worker);
-       }
-       if (!list_empty(&lo->idle_worker_list))
-               loop_set_timer(lo);
-       spin_unlock_irq(&lo->lo_work_lock);
-}
-
 static const struct blk_mq_ops loop_mq_ops = {
        .queue_rq       = loop_queue_rq,
        .complete       = lo_complete_rq,
@@ -1977,6 +1940,9 @@ static int loop_add(int i)
        lo = kzalloc(sizeof(*lo), GFP_KERNEL);
        if (!lo)
                goto out;
+       lo->worker_tree = RB_ROOT;
+       INIT_LIST_HEAD(&lo->idle_worker_list);
+       timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
        lo->lo_state = Lo_unbound;
 
        err = mutex_lock_killable(&loop_ctl_mutex);
@@ -2046,11 +2012,12 @@ static int loop_add(int i)
         */
        if (!part_shift)
                disk->flags |= GENHD_FL_NO_PART;
-       atomic_set(&lo->lo_refcnt, 0);
        mutex_init(&lo->lo_mutex);
        lo->lo_number           = i;
        spin_lock_init(&lo->lo_lock);
        spin_lock_init(&lo->lo_work_lock);
+       INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
+       INIT_LIST_HEAD(&lo->rootcg_cmd_list);
        disk->major             = LOOP_MAJOR;
        disk->first_minor       = i << part_shift;
        disk->minors            = 1 << part_shift;
@@ -2090,15 +2057,14 @@ static void loop_remove(struct loop_device *lo)
 {
        /* Make this loop device unreachable from pathname. */
        del_gendisk(lo->lo_disk);
-       blk_cleanup_disk(lo->lo_disk);
+       blk_cleanup_queue(lo->lo_disk->queue);
        blk_mq_free_tag_set(&lo->tag_set);
 
        mutex_lock(&loop_ctl_mutex);
        idr_remove(&loop_index_idr, lo->lo_number);
        mutex_unlock(&loop_ctl_mutex);
-       /* There is no route which can find this loop device. */
-       mutex_destroy(&lo->lo_mutex);
-       kfree(lo);
+
+       put_disk(lo->lo_disk);
 }
 
 static void loop_probe(dev_t dev)
@@ -2137,13 +2103,12 @@ static int loop_control_remove(int idx)
        ret = mutex_lock_killable(&lo->lo_mutex);
        if (ret)
                goto mark_visible;
-       if (lo->lo_state != Lo_unbound ||
-           atomic_read(&lo->lo_refcnt) > 0) {
+       if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
                mutex_unlock(&lo->lo_mutex);
                ret = -EBUSY;
                goto mark_visible;
        }
-       /* Mark this loop device no longer open()-able. */
+       /* Mark this loop device as no more bound, but not quite unbound yet */
        lo->lo_state = Lo_deleting;
        mutex_unlock(&lo->lo_mutex);
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
deleted file mode 100644 (file)
index 082d4b6..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
- */
-#ifndef _LINUX_LOOP_H
-#define _LINUX_LOOP_H
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <uapi/linux/loop.h>
-
-/* Possible states of device */
-enum {
-       Lo_unbound,
-       Lo_bound,
-       Lo_rundown,
-       Lo_deleting,
-};
-
-struct loop_func_table;
-
-struct loop_device {
-       int             lo_number;
-       atomic_t        lo_refcnt;
-       loff_t          lo_offset;
-       loff_t          lo_sizelimit;
-       int             lo_flags;
-       char            lo_file_name[LO_NAME_SIZE];
-
-       struct file *   lo_backing_file;
-       struct block_device *lo_device;
-
-       gfp_t           old_gfp_mask;
-
-       spinlock_t              lo_lock;
-       int                     lo_state;
-       spinlock_t              lo_work_lock;
-       struct workqueue_struct *workqueue;
-       struct work_struct      rootcg_work;
-       struct list_head        rootcg_cmd_list;
-       struct list_head        idle_worker_list;
-       struct rb_root          worker_tree;
-       struct timer_list       timer;
-       bool                    use_dio;
-       bool                    sysfs_inited;
-
-       struct request_queue    *lo_queue;
-       struct blk_mq_tag_set   tag_set;
-       struct gendisk          *lo_disk;
-       struct mutex            lo_mutex;
-       bool                    idr_visible;
-};
-
-struct loop_cmd {
-       struct list_head list_entry;
-       bool use_aio; /* use AIO interface to handle I/O */
-       atomic_t ref; /* only for aio */
-       long ret;
-       struct kiocb iocb;
-       struct bio_vec *bvec;
-       struct cgroup_subsys_state *blkcg_css;
-       struct cgroup_subsys_state *memcg_css;
-};
-
-#endif
index 4fbaf0b4958b7931ef27d0e6c2e2a16a7cbac827..27386a572ba490c3417489e1d3fb279543fbaed3 100644 (file)
@@ -2729,7 +2729,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
 {
        struct mtip_port *port = dd->port;
 
-       /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+       /* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
        port->block1 =
                dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
                                        &port->block1_dma, GFP_KERNEL);
index 5a1f98494dddf9e24cc5d92d6b8f0ae618c5c516..ac8b045c777c00b5889a650e7287274a1742b5b1 100644 (file)
@@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
 
        if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
                nbd->disk->queue->limits.discard_granularity = blksize;
-               nbd->disk->queue->limits.discard_alignment = blksize;
                blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
        }
        blk_queue_logical_block_size(nbd->disk->queue, blksize);
@@ -947,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
        struct nbd_config *config = nbd->config;
        if (!config->dead_conn_timeout)
                return 0;
-       if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
+
+       if (!wait_event_timeout(config->conn_wait,
+                               test_bit(NBD_RT_DISCONNECTED,
+                                        &config->runtime_flags) ||
+                               atomic_read(&config->live_connections) > 0,
+                               config->dead_conn_timeout))
                return 0;
-       return wait_event_timeout(config->conn_wait,
-                                 atomic_read(&config->live_connections) > 0,
-                                 config->dead_conn_timeout) > 0;
+
+       return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
 }
 
 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -1217,11 +1220,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
        return -ENOSPC;
 }
 
-static void nbd_bdev_reset(struct block_device *bdev)
+static void nbd_bdev_reset(struct nbd_device *nbd)
 {
-       if (bdev->bd_openers > 1)
+       if (disk_openers(nbd->disk) > 1)
                return;
-       set_capacity(bdev->bd_disk, 0);
+       set_capacity(nbd->disk, 0);
 }
 
 static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1231,8 +1234,6 @@ static void nbd_parse_flags(struct nbd_device *nbd)
                set_disk_ro(nbd->disk, true);
        else
                set_disk_ro(nbd->disk, false);
-       if (config->flags & NBD_FLAG_SEND_TRIM)
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
        if (config->flags & NBD_FLAG_SEND_FLUSH) {
                if (config->flags & NBD_FLAG_SEND_FUA)
                        blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1318,9 +1319,7 @@ static void nbd_config_put(struct nbd_device *nbd)
 
                nbd->tag_set.timeout = 0;
                nbd->disk->queue->limits.discard_granularity = 0;
-               nbd->disk->queue->limits.discard_alignment = 0;
-               blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+               blk_queue_max_discard_sectors(nbd->disk->queue, 0);
 
                mutex_unlock(&nbd->config_lock);
                nbd_put(nbd);
@@ -1389,7 +1388,7 @@ static int nbd_start_device(struct nbd_device *nbd)
        return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
 }
 
-static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device_ioctl(struct nbd_device *nbd)
 {
        struct nbd_config *config = nbd->config;
        int ret;
@@ -1408,7 +1407,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
        flush_workqueue(nbd->recv_workq);
 
        mutex_lock(&nbd->config_lock);
-       nbd_bdev_reset(bdev);
+       nbd_bdev_reset(nbd);
        /* user requested, ignore socket errors */
        if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
                ret = 0;
@@ -1422,7 +1421,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
 {
        sock_shutdown(nbd);
        __invalidate_device(bdev, true);
-       nbd_bdev_reset(bdev);
+       nbd_bdev_reset(nbd);
        if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
@@ -1468,7 +1467,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
                config->flags = arg;
                return 0;
        case NBD_DO_IT:
-               return nbd_start_device_ioctl(nbd, bdev);
+               return nbd_start_device_ioctl(nbd);
        case NBD_CLEAR_QUE:
                /*
                 * This is for compatibility only.  The queue is always cleared
@@ -1579,7 +1578,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
        struct nbd_device *nbd = disk->private_data;
 
        if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
-                       disk->part0->bd_openers == 0)
+                       disk_openers(disk) == 0)
                nbd_disconnect_and_put(nbd);
 
        nbd_config_put(nbd);
@@ -1784,7 +1783,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
        blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
        disk->queue->limits.discard_granularity = 0;
-       disk->queue->limits.discard_alignment = 0;
        blk_queue_max_discard_sectors(disk->queue, 0);
        blk_queue_max_segment_size(disk->queue, UINT_MAX);
        blk_queue_max_segments(disk->queue, USHRT_MAX);
@@ -2082,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
        mutex_lock(&nbd->config_lock);
        nbd_disconnect(nbd);
        sock_shutdown(nbd);
+       wake_up(&nbd->config->conn_wait);
        /*
         * Make sure recv thread has finished, we can safely call nbd_clear_que()
         * to cancel the inflight I/Os.
index c441a4972064e8b5995cc9d35f79e1d8c377d0e2..539cfeac263dbe1f49e0a7c1f9f77037554b65cc 100644 (file)
@@ -11,6 +11,9 @@
 #include <linux/init.h>
 #include "null_blk.h"
 
+#undef pr_fmt
+#define pr_fmt(fmt)    "null_blk: " fmt
+
 #define FREE_BATCH             16
 
 #define TICKS_PER_SEC          50ULL
@@ -232,6 +235,7 @@ static struct nullb_device *null_alloc_dev(void);
 static void null_free_dev(struct nullb_device *dev);
 static void null_del_dev(struct nullb *nullb);
 static int null_add_dev(struct nullb_device *dev);
+static struct nullb *null_find_dev_by_name(const char *name);
 static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
 
 static inline struct nullb_device *to_nullb_device(struct config_item *item)
@@ -560,6 +564,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
 {
        struct nullb_device *dev;
 
+       if (null_find_dev_by_name(name))
+               return ERR_PTR(-EEXIST);
+
        dev = null_alloc_dev();
        if (!dev)
                return ERR_PTR(-ENOMEM);
@@ -1765,9 +1772,7 @@ static void null_config_discard(struct nullb *nullb)
        }
 
        nullb->q->limits.discard_granularity = nullb->dev->blocksize;
-       nullb->q->limits.discard_alignment = nullb->dev->blocksize;
        blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
 }
 
 static const struct block_device_operations null_bio_ops = {
@@ -2061,7 +2066,13 @@ static int null_add_dev(struct nullb_device *dev)
 
        null_config_discard(nullb);
 
-       sprintf(nullb->disk_name, "nullb%d", nullb->index);
+       if (config_item_name(&dev->item)) {
+               /* Use configfs dir name as the device name */
+               snprintf(nullb->disk_name, sizeof(nullb->disk_name),
+                        "%s", config_item_name(&dev->item));
+       } else {
+               sprintf(nullb->disk_name, "nullb%d", nullb->index);
+       }
 
        rv = null_gendisk_register(nullb);
        if (rv)
@@ -2071,6 +2082,8 @@ static int null_add_dev(struct nullb_device *dev)
        list_add_tail(&nullb->list, &nullb_list);
        mutex_unlock(&lock);
 
+       pr_info("disk %s created\n", nullb->disk_name);
+
        return 0;
 out_cleanup_zone:
        null_free_zoned_dev(dev);
@@ -2088,12 +2101,53 @@ out:
        return rv;
 }
 
+static struct nullb *null_find_dev_by_name(const char *name)
+{
+       struct nullb *nullb = NULL, *nb;
+
+       mutex_lock(&lock);
+       list_for_each_entry(nb, &nullb_list, list) {
+               if (strcmp(nb->disk_name, name) == 0) {
+                       nullb = nb;
+                       break;
+               }
+       }
+       mutex_unlock(&lock);
+
+       return nullb;
+}
+
+static int null_create_dev(void)
+{
+       struct nullb_device *dev;
+       int ret;
+
+       dev = null_alloc_dev();
+       if (!dev)
+               return -ENOMEM;
+
+       ret = null_add_dev(dev);
+       if (ret) {
+               null_free_dev(dev);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void null_destroy_dev(struct nullb *nullb)
+{
+       struct nullb_device *dev = nullb->dev;
+
+       null_del_dev(nullb);
+       null_free_dev(dev);
+}
+
 static int __init null_init(void)
 {
        int ret = 0;
        unsigned int i;
        struct nullb *nullb;
-       struct nullb_device *dev;
 
        if (g_bs > PAGE_SIZE) {
                pr_warn("invalid block size\n");
@@ -2113,19 +2167,21 @@ static int __init null_init(void)
        }
 
        if (g_queue_mode == NULL_Q_RQ) {
-               pr_err("legacy IO path no longer available\n");
+               pr_err("legacy IO path is no longer available\n");
                return -EINVAL;
        }
+
        if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
                if (g_submit_queues != nr_online_nodes) {
                        pr_warn("submit_queues param is set to %u.\n",
-                                                       nr_online_nodes);
+                               nr_online_nodes);
                        g_submit_queues = nr_online_nodes;
                }
-       } else if (g_submit_queues > nr_cpu_ids)
+       } else if (g_submit_queues > nr_cpu_ids) {
                g_submit_queues = nr_cpu_ids;
-       else if (g_submit_queues <= 0)
+       } else if (g_submit_queues <= 0) {
                g_submit_queues = 1;
+       }
 
        if (g_queue_mode == NULL_Q_MQ && shared_tags) {
                ret = null_init_tag_set(NULL, &tag_set);
@@ -2149,16 +2205,9 @@ static int __init null_init(void)
        }
 
        for (i = 0; i < nr_devices; i++) {
-               dev = null_alloc_dev();
-               if (!dev) {
-                       ret = -ENOMEM;
-                       goto err_dev;
-               }
-               ret = null_add_dev(dev);
-               if (ret) {
-                       null_free_dev(dev);
+               ret = null_create_dev();
+               if (ret)
                        goto err_dev;
-               }
        }
 
        pr_info("module loaded\n");
@@ -2167,9 +2216,7 @@ static int __init null_init(void)
 err_dev:
        while (!list_empty(&nullb_list)) {
                nullb = list_entry(nullb_list.next, struct nullb, list);
-               dev = nullb->dev;
-               null_del_dev(nullb);
-               null_free_dev(dev);
+               null_destroy_dev(nullb);
        }
        unregister_blkdev(null_major, "nullb");
 err_conf:
@@ -2190,12 +2237,8 @@ static void __exit null_exit(void)
 
        mutex_lock(&lock);
        while (!list_empty(&nullb_list)) {
-               struct nullb_device *dev;
-
                nullb = list_entry(nullb_list.next, struct nullb, list);
-               dev = nullb->dev;
-               null_del_dev(nullb);
-               null_free_dev(dev);
+               null_destroy_dev(nullb);
        }
        mutex_unlock(&lock);
 
index 78eb56b0ca55f5b0dd6917983ec0acbaa54f1255..4525a65e1b23d91bd6a756901c0d6d5064b99558 100644 (file)
 #include <linux/mutex.h>
 
 struct nullb_cmd {
-       struct request *rq;
-       struct bio *bio;
+       union {
+               struct request *rq;
+               struct bio *bio;
+       };
        unsigned int tag;
        blk_status_t error;
+       bool fake_timeout;
        struct nullb_queue *nq;
        struct hrtimer timer;
-       bool fake_timeout;
 };
 
 struct nullb_queue {
index dae54dd1aeac31df0a4757a1c52fd1df70faeadf..ed158ea4fdd1adf5f5abde17d5e7f216e9e0ca20 100644 (file)
@@ -6,6 +6,9 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#undef pr_fmt
+#define pr_fmt(fmt)    "null_blk: " fmt
+
 static inline sector_t mb_to_sects(unsigned long mb)
 {
        return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
                dev->zone_capacity = dev->zone_size;
 
        if (dev->zone_capacity > dev->zone_size) {
-               pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
-                                       dev->zone_capacity, dev->zone_size);
+               pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
+                      dev->zone_capacity, dev->zone_size);
                return -EINVAL;
        }
 
index 86c8794ede415c0476f6eaddf32c63c2c946fb0e..789093375344310e882c4314cdecd0bac1637d7a 100644 (file)
@@ -12,7 +12,7 @@
  * Theory of operation:
  *
  * At the lowest level, there is the standard driver for the CD/DVD device,
- * typically ide-cd.c or sr.c. This driver can handle read and write requests,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
  * but it doesn't know anything about the special restrictions that apply to
  * packet writing. One restriction is that write requests must be aligned to
  * packet boundaries on the physical media, and the size of a write request
@@ -522,7 +522,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
                goto no_pkt;
 
        pkt->frames = frames;
-       pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
+       pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
        if (!pkt->w_bio)
                goto no_bio;
 
@@ -536,27 +536,21 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
        bio_list_init(&pkt->orig_bios);
 
        for (i = 0; i < frames; i++) {
-               struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
-               if (!bio)
+               pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+               if (!pkt->r_bios[i])
                        goto no_rd_bio;
-
-               pkt->r_bios[i] = bio;
        }
 
        return pkt;
 
 no_rd_bio:
-       for (i = 0; i < frames; i++) {
-               struct bio *bio = pkt->r_bios[i];
-               if (bio)
-                       bio_put(bio);
-       }
-
+       for (i = 0; i < frames; i++)
+               kfree(pkt->r_bios[i]);
 no_page:
        for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
                if (pkt->pages[i])
                        __free_page(pkt->pages[i]);
-       bio_put(pkt->w_bio);
+       kfree(pkt->w_bio);
 no_bio:
        kfree(pkt);
 no_pkt:
@@ -570,14 +564,11 @@ static void pkt_free_packet_data(struct packet_data *pkt)
 {
        int i;
 
-       for (i = 0; i < pkt->frames; i++) {
-               struct bio *bio = pkt->r_bios[i];
-               if (bio)
-                       bio_put(bio);
-       }
+       for (i = 0; i < pkt->frames; i++)
+               kfree(pkt->r_bios[i]);
        for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
                __free_page(pkt->pages[i]);
-       bio_put(pkt->w_bio);
+       kfree(pkt->w_bio);
        kfree(pkt);
 }
 
@@ -951,6 +942,7 @@ static void pkt_end_io_read(struct bio *bio)
 
        if (bio->bi_status)
                atomic_inc(&pkt->io_errors);
+       bio_uninit(bio);
        if (atomic_dec_and_test(&pkt->io_wait)) {
                atomic_inc(&pkt->run_sm);
                wake_up(&pd->wqueue);
@@ -968,6 +960,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 
        pd->stats.pkt_ended++;
 
+       bio_uninit(bio);
        pkt_bio_finished(pd);
        atomic_dec(&pkt->io_wait);
        atomic_inc(&pkt->run_sm);
@@ -1022,7 +1015,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
                        continue;
 
                bio = pkt->r_bios[f];
-               bio_reset(bio, pd->bdev, REQ_OP_READ);
+               bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
                bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
                bio->bi_end_io = pkt_end_io_read;
                bio->bi_private = pkt;
@@ -1235,7 +1228,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
        int f;
 
-       bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
+       bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+                REQ_OP_WRITE);
        pkt->w_bio->bi_iter.bi_sector = pkt->sector;
        pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
        pkt->w_bio->bi_private = pkt;
index b844432bad20b4b07b7fdc89424ee0095428ec28..2b21f717cce1a71d6a2f0fc2dd912b8958fae5d4 100644 (file)
@@ -4942,7 +4942,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
        blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
 
        if (rbd_dev->opts->trim) {
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
                q->limits.discard_granularity = rbd_dev->opts->alloc_size;
                blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
                blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
index b66e8840b94b88400e0cba0ea1af01b815a390e3..409c76b81aed4613f57cb3f5cb14d4db3a331ae3 100644 (file)
@@ -25,6 +25,7 @@ static int rnbd_client_major;
 static DEFINE_IDA(index_ida);
 static DEFINE_MUTEX(sess_lock);
 static LIST_HEAD(sess_list);
+static struct workqueue_struct *rnbd_clt_wq;
 
 /*
  * Maximum number of partitions an instance can have.
@@ -1364,11 +1365,9 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
        blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
        dev->queue->limits.discard_granularity  = dev->discard_granularity;
        dev->queue->limits.discard_alignment    = dev->discard_alignment;
-       if (dev->max_discard_sectors)
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue);
        if (dev->secure_discard)
-               blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue);
-
+               blk_queue_max_secure_erase_sectors(dev->queue,
+                               dev->max_discard_sectors);
        blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
        blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
        blk_queue_max_segments(dev->queue, dev->max_segments);
@@ -1761,12 +1760,12 @@ static void rnbd_destroy_sessions(void)
                         * procedure takes minutes.
                         */
                        INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
-                       queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
+                       queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
                }
                rnbd_clt_put_sess(sess);
        }
        /* Wait for all scheduled unmap works */
-       flush_workqueue(system_long_wq);
+       flush_workqueue(rnbd_clt_wq);
        WARN_ON(!list_empty(&sess_list));
 }
 
@@ -1791,6 +1790,14 @@ static int __init rnbd_client_init(void)
                pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
                       err);
                unregister_blkdev(rnbd_client_major, "rnbd");
+               return err;
+       }
+       rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
+       if (!rnbd_clt_wq) {
+               pr_err("Failed to load module, alloc_workqueue failed.\n");
+               rnbd_clt_destroy_sysfs_files();
+               unregister_blkdev(rnbd_client_major, "rnbd");
+               err = -ENOMEM;
        }
 
        return err;
@@ -1801,6 +1808,7 @@ static void __exit rnbd_client_exit(void)
        rnbd_destroy_sessions();
        unregister_blkdev(rnbd_client_major, "rnbd");
        ida_destroy(&index_ida);
+       destroy_workqueue(rnbd_clt_wq);
 }
 
 module_init(rnbd_client_init);
index 2c3df02b5e8ec3aa96bd34f32aa29e9a94570607..4309e52524691b9df4df0399d99a0d98060f53fc 100644 (file)
@@ -44,16 +44,12 @@ static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
 
 static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
 {
-       return blk_queue_secure_erase(bdev_get_queue(dev->bdev));
+       return bdev_max_secure_erase_sectors(dev->bdev);
 }
 
 static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
 {
-       if (!blk_queue_discard(bdev_get_queue(dev->bdev)))
-               return 0;
-
-       return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev),
-                                        REQ_OP_DISCARD);
+       return bdev_max_discard_sectors(dev->bdev);
 }
 
 static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
@@ -63,7 +59,7 @@ static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
 
 static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
 {
-       return bdev_get_queue(dev->bdev)->limits.discard_alignment;
+       return bdev_discard_alignment(dev->bdev);
 }
 
 #endif /* RNBD_SRV_DEV_H */
index f04df6294650b6f31b4009eb790b2ace72427b6d..beaef43a67b9deee8a6bbd7303de602f8be6939f 100644 (file)
@@ -533,7 +533,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
                                        struct rnbd_srv_sess_dev *sess_dev)
 {
        struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
-       struct request_queue *q = bdev_get_queue(rnbd_dev->bdev);
 
        rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
        rsp->device_id =
@@ -558,9 +557,9 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
        rsp->secure_discard =
                cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
        rsp->cache_policy = 0;
-       if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+       if (bdev_write_cache(rnbd_dev->bdev))
                rsp->cache_policy |= RNBD_WRITEBACK;
-       if (blk_queue_fua(q))
+       if (bdev_fua(rnbd_dev->bdev))
                rsp->cache_policy |= RNBD_FUA;
 }
 
index a8bcf3f664af1526be4e5cb2914d0d1771a9c1dd..d624cc8eddc3c766eca2546aaff883a6f6cdf5d2 100644 (file)
@@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev)
                blk_queue_io_opt(q, blk_size * opt_io_size);
 
        if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
-               q->limits.discard_granularity = blk_size;
-
                virtio_cread(vdev, struct virtio_blk_config,
                             discard_sector_alignment, &v);
-               q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
+               if (v)
+                       q->limits.discard_granularity = v << SECTOR_SHIFT;
+               else
+                       q->limits.discard_granularity = blk_size;
 
                virtio_cread(vdev, struct virtio_blk_config,
                             max_discard_sectors, &v);
@@ -888,8 +889,6 @@ static int virtblk_probe(struct virtio_device *vdev)
                        v = sg_elems;
                blk_queue_max_discard_segments(q,
                                               min(v, MAX_DISCARD_SEGMENTS));
-
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
        }
 
        if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
index de42458195bc1c8990faf8477168ea3b0d604e20..a97f2bf5b01b96febefd0c5637897fd744eb2918 100644 (file)
@@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
        int status = BLKIF_RSP_OKAY;
        struct xen_blkif *blkif = ring->blkif;
        struct block_device *bdev = blkif->vbd.bdev;
-       unsigned long secure;
        struct phys_req preq;
 
        xen_blkif_get(blkif);
@@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
        }
        ring->st_ds_req++;
 
-       secure = (blkif->vbd.discard_secure &&
-                (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
-                BLKDEV_DISCARD_SECURE : 0;
+       if (blkif->vbd.discard_secure &&
+           (req->u.discard.flag & BLKIF_DISCARD_SECURE))
+               err = blkdev_issue_secure_erase(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors, GFP_KERNEL);
+       else
+               err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+                               req->u.discard.nr_sectors, GFP_KERNEL);
 
-       err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
-                                  req->u.discard.nr_sectors,
-                                  GFP_KERNEL, secure);
 fail_response:
        if (err == -EOPNOTSUPP) {
                pr_debug("discard op failed, not supported\n");
index f09040435e2e541730e05748e08622ad57d0cbc1..97de13b14175eb8ced14d1649a3be461a464ee8a 100644 (file)
@@ -484,7 +484,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 {
        struct xen_vbd *vbd;
        struct block_device *bdev;
-       struct request_queue *q;
 
        vbd = &blkif->vbd;
        vbd->handle   = handle;
@@ -516,11 +515,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
        if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
                vbd->type |= VDISK_REMOVABLE;
 
-       q = bdev_get_queue(bdev);
-       if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+       if (bdev_write_cache(bdev))
                vbd->flush_support = true;
-
-       if (q && blk_queue_secure_erase(q))
+       if (bdev_max_secure_erase_sectors(bdev))
                vbd->discard_secure = true;
 
        vbd->feature_gnt_persistent = feature_persistent;
@@ -578,22 +575,21 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
        int err;
        int state = 0;
        struct block_device *bdev = be->blkif->vbd.bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
 
        if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
                return;
 
-       if (blk_queue_discard(q)) {
+       if (bdev_max_discard_sectors(bdev)) {
                err = xenbus_printf(xbt, dev->nodename,
                        "discard-granularity", "%u",
-                       q->limits.discard_granularity);
+                       bdev_discard_granularity(bdev));
                if (err) {
                        dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
                        return;
                }
                err = xenbus_printf(xbt, dev->nodename,
                        "discard-alignment", "%u",
-                       q->limits.discard_alignment);
+                       bdev_discard_alignment(bdev));
                if (err) {
                        dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
                        return;
index 003056d4f7f5f078b9dfda5c48833cc2a125a9a8..55e004d03ced0411394953e0f3272992025dde34 100644 (file)
@@ -229,8 +229,6 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);
 
-#define GRANT_INVALID_REF      0
-
 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
 
@@ -321,7 +319,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
                        gnt_list_entry->page = granted_page;
                }
 
-               gnt_list_entry->gref = GRANT_INVALID_REF;
+               gnt_list_entry->gref = INVALID_GRANT_REF;
                list_add(&gnt_list_entry->node, &rinfo->grants);
                i++;
        }
@@ -350,7 +348,7 @@ static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
                                          node);
        list_del(&gnt_list_entry->node);
 
-       if (gnt_list_entry->gref != GRANT_INVALID_REF)
+       if (gnt_list_entry->gref != INVALID_GRANT_REF)
                rinfo->persistent_gnts_c--;
 
        return gnt_list_entry;
@@ -372,7 +370,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
        struct grant *gnt_list_entry = get_free_grant(rinfo);
        struct blkfront_info *info = rinfo->dev_info;
 
-       if (gnt_list_entry->gref != GRANT_INVALID_REF)
+       if (gnt_list_entry->gref != INVALID_GRANT_REF)
                return gnt_list_entry;
 
        /* Assign a gref to this page */
@@ -396,7 +394,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
        struct grant *gnt_list_entry = get_free_grant(rinfo);
        struct blkfront_info *info = rinfo->dev_info;
 
-       if (gnt_list_entry->gref != GRANT_INVALID_REF)
+       if (gnt_list_entry->gref != INVALID_GRANT_REF)
                return gnt_list_entry;
 
        /* Assign a gref to this page */
@@ -944,13 +942,13 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
        blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
 
        if (info->feature_discard) {
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
                blk_queue_max_discard_sectors(rq, get_capacity(gd));
                rq->limits.discard_granularity = info->discard_granularity ?:
                                                 info->physical_sector_size;
                rq->limits.discard_alignment = info->discard_alignment;
                if (info->feature_secdiscard)
-                       blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
+                       blk_queue_max_secure_erase_sectors(rq,
+                                                          get_capacity(gd));
        }
 
        /* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -1221,7 +1219,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
                list_for_each_entry_safe(persistent_gnt, n,
                                         &rinfo->grants, node) {
                        list_del(&persistent_gnt->node);
-                       if (persistent_gnt->gref != GRANT_INVALID_REF) {
+                       if (persistent_gnt->gref != INVALID_GRANT_REF) {
                                gnttab_end_foreign_access(persistent_gnt->gref,
                                                          0UL);
                                rinfo->persistent_gnts_c--;
@@ -1282,15 +1280,8 @@ free_shadow:
        flush_work(&rinfo->work);
 
        /* Free resources associated with old device channel. */
-       for (i = 0; i < info->nr_ring_pages; i++) {
-               if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
-                       gnttab_end_foreign_access(rinfo->ring_ref[i], 0);
-                       rinfo->ring_ref[i] = GRANT_INVALID_REF;
-               }
-       }
-       free_pages_exact(rinfo->ring.sring,
-                        info->nr_ring_pages * XEN_PAGE_SIZE);
-       rinfo->ring.sring = NULL;
+       xenbus_teardown_ring((void **)&rinfo->ring.sring, info->nr_ring_pages,
+                            rinfo->ring_ref);
 
        if (rinfo->irq)
                unbind_from_irqhandler(rinfo->irq, rinfo);
@@ -1475,7 +1466,7 @@ static int blkif_completion(unsigned long *id,
                         * to the tail of the list, so it will not be picked
                         * again unless we run out of persistent grants.
                         */
-                       s->grants_used[i]->gref = GRANT_INVALID_REF;
+                       s->grants_used[i]->gref = INVALID_GRANT_REF;
                        list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
                }
        }
@@ -1500,7 +1491,7 @@ static int blkif_completion(unsigned long *id,
                                        indirect_page = s->indirect_grants[i]->page;
                                        list_add(&indirect_page->lru, &rinfo->indirect_pages);
                                }
-                               s->indirect_grants[i]->gref = GRANT_INVALID_REF;
+                               s->indirect_grants[i]->gref = INVALID_GRANT_REF;
                                list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
                        }
                }
@@ -1606,8 +1597,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                blkif_req(req)->error = BLK_STS_NOTSUPP;
                                info->feature_discard = 0;
                                info->feature_secdiscard = 0;
-                               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
-                               blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+                               blk_queue_max_discard_sectors(rq, 0);
+                               blk_queue_max_secure_erase_sectors(rq, 0);
                        }
                        break;
                case BLKIF_OP_FLUSH_DISKCACHE:
@@ -1681,30 +1672,16 @@ static int setup_blkring(struct xenbus_device *dev,
                         struct blkfront_ring_info *rinfo)
 {
        struct blkif_sring *sring;
-       int err, i;
+       int err;
        struct blkfront_info *info = rinfo->dev_info;
        unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
-       grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
-
-       for (i = 0; i < info->nr_ring_pages; i++)
-               rinfo->ring_ref[i] = GRANT_INVALID_REF;
 
-       sring = alloc_pages_exact(ring_size, GFP_NOIO);
-       if (!sring) {
-               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
-               return -ENOMEM;
-       }
-       SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
-
-       err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
-       if (err < 0) {
-               free_pages_exact(sring, ring_size);
-               rinfo->ring.sring = NULL;
+       err = xenbus_setup_ring(dev, GFP_NOIO, (void **)&sring,
+                               info->nr_ring_pages, rinfo->ring_ref);
+       if (err)
                goto fail;
-       }
-       for (i = 0; i < info->nr_ring_pages; i++)
-               rinfo->ring_ref[i] = gref[i];
+
+       XEN_FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
 
        err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
        if (err)
@@ -2544,13 +2521,13 @@ static void purge_persistent_grants(struct blkfront_info *info)
 
                list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
                                         node) {
-                       if (gnt_list_entry->gref == GRANT_INVALID_REF ||
+                       if (gnt_list_entry->gref == INVALID_GRANT_REF ||
                            !gnttab_try_end_foreign_access(gnt_list_entry->gref))
                                continue;
 
                        list_del(&gnt_list_entry->node);
                        rinfo->persistent_gnts_c--;
-                       gnt_list_entry->gref = GRANT_INVALID_REF;
+                       gnt_list_entry->gref = INVALID_GRANT_REF;
                        list_add_tail(&gnt_list_entry->node, &grants);
                }
 
index e9474b02012deb758f55df23859a986072a8dd98..6853dd3c7d3a2e4c2d51d75baf1f688ccdca155c 100644 (file)
@@ -1675,9 +1675,10 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
        bv.bv_len = PAGE_SIZE;
        bv.bv_offset = 0;
 
-       start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
+       start_time = bdev_start_io_acct(bdev->bd_disk->part0,
+                       SECTORS_PER_PAGE, op, jiffies);
        ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
-       disk_end_io_acct(bdev->bd_disk, op, start_time);
+       bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
 out:
        /*
         * If I/O fails, just return error(ie, non-zero) without
@@ -1786,7 +1787,7 @@ static ssize_t reset_store(struct device *dev,
        int ret;
        unsigned short do_reset;
        struct zram *zram;
-       struct block_device *bdev;
+       struct gendisk *disk;
 
        ret = kstrtou16(buf, 10, &do_reset);
        if (ret)
@@ -1796,26 +1797,26 @@ static ssize_t reset_store(struct device *dev,
                return -EINVAL;
 
        zram = dev_to_zram(dev);
-       bdev = zram->disk->part0;
+       disk = zram->disk;
 
-       mutex_lock(&bdev->bd_disk->open_mutex);
+       mutex_lock(&disk->open_mutex);
        /* Do not reset an active device or claimed device */
-       if (bdev->bd_openers || zram->claim) {
-               mutex_unlock(&bdev->bd_disk->open_mutex);
+       if (disk_openers(disk) || zram->claim) {
+               mutex_unlock(&disk->open_mutex);
                return -EBUSY;
        }
 
        /* From now on, anyone can't open /dev/zram[0-9] */
        zram->claim = true;
-       mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_unlock(&disk->open_mutex);
 
        /* Make sure all the pending I/O are finished */
-       sync_blockdev(bdev);
+       sync_blockdev(disk->part0);
        zram_reset_device(zram);
 
-       mutex_lock(&bdev->bd_disk->open_mutex);
+       mutex_lock(&disk->open_mutex);
        zram->claim = false;
-       mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_unlock(&disk->open_mutex);
 
        return len;
 }
@@ -1952,7 +1953,6 @@ static int zram_add(void)
        blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
        zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
        blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
        /*
         * zram_bio_discard() will clear all logical blocks if logical block
@@ -1987,19 +1987,18 @@ out_free_dev:
 
 static int zram_remove(struct zram *zram)
 {
-       struct block_device *bdev = zram->disk->part0;
        bool claimed;
 
-       mutex_lock(&bdev->bd_disk->open_mutex);
-       if (bdev->bd_openers) {
-               mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_lock(&zram->disk->open_mutex);
+       if (disk_openers(zram->disk)) {
+               mutex_unlock(&zram->disk->open_mutex);
                return -EBUSY;
        }
 
        claimed = zram->claim;
        if (!claimed)
                zram->claim = true;
-       mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_unlock(&zram->disk->open_mutex);
 
        zram_debugfs_unregister(zram);
 
@@ -2011,7 +2010,7 @@ static int zram_remove(struct zram *zram)
                ;
        } else {
                /* Make sure all the pending I/O are finished */
-               sync_blockdev(bdev);
+               sync_blockdev(zram->disk->part0);
                zram_reset_device(zram);
        }
 
index 2dc9da683a13e30fe830aa3965b8290eb7129a53..416f723a2dbb337508222aac17f29177029c60d1 100644 (file)
    actually talk to the hardware. Suggestions are welcome.
    Patches that work are more welcome though.  ;-)
 
- To Do List:
- ----------------------------------
-
- -- Modify sysctl/proc interface. I plan on having one directory per
- drive, with entries for outputing general drive information, and sysctl
- based tunable parameters such as whether the tray should auto-close for
- that drive. Suggestions (or patches) for this welcome!
-
-
  Revision History
  ----------------------------------
  1.00  Date Unknown -- David van Leeuwen <david@tm.tno.nl>
@@ -648,6 +639,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
        mutex_unlock(&cdrom_mutex);
        return 0;
 }
+EXPORT_SYMBOL(register_cdrom);
 #undef ENSURE
 
 void unregister_cdrom(struct cdrom_device_info *cdi)
@@ -663,6 +655,7 @@ void unregister_cdrom(struct cdrom_device_info *cdi)
 
        cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
 }
+EXPORT_SYMBOL(unregister_cdrom);
 
 int cdrom_get_media_event(struct cdrom_device_info *cdi,
                          struct media_event_desc *med)
@@ -690,6 +683,7 @@ int cdrom_get_media_event(struct cdrom_device_info *cdi,
        memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
        return 0;
 }
+EXPORT_SYMBOL(cdrom_get_media_event);
 
 static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
                              struct rwrt_feature_desc *rfd)
@@ -1206,6 +1200,7 @@ err:
        cdi->use_count--;
        return ret;
 }
+EXPORT_SYMBOL(cdrom_open);
 
 /* This code is similar to that in open_for_data. The routine is called
    whenever an audio play operation is requested.
@@ -1301,6 +1296,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
                        cdo->tray_move(cdi, 1);
        }
 }
+EXPORT_SYMBOL(cdrom_release);
 
 static int cdrom_read_mech_status(struct cdrom_device_info *cdi, 
                                  struct cdrom_changer_info *buf)
@@ -1382,6 +1378,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi)
        kfree(info);
        return nslots;
 }
+EXPORT_SYMBOL(cdrom_number_of_slots);
 
 
 /* If SLOT < 0, unload the current slot.  Otherwise, try to load SLOT. */
@@ -1581,6 +1578,7 @@ void init_cdrom_command(struct packet_command *cgc, void *buf, int len,
        cgc->data_direction = type;
        cgc->timeout = CDROM_DEF_TIMEOUT;
 }
+EXPORT_SYMBOL(init_cdrom_command);
 
 /* DVD handling */
 
@@ -1999,6 +1997,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
        cgc->data_direction = CGC_DATA_READ;
        return cdo->generic_packet(cdi, cgc);
 }
+EXPORT_SYMBOL(cdrom_mode_sense);
 
 int cdrom_mode_select(struct cdrom_device_info *cdi,
                      struct packet_command *cgc)
@@ -2014,6 +2013,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi,
        cgc->data_direction = CGC_DATA_WRITE;
        return cdo->generic_packet(cdi, cgc);
 }
+EXPORT_SYMBOL(cdrom_mode_select);
 
 static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
                                 struct cdrom_subchnl *subchnl, int mcn)
@@ -2443,14 +2443,6 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
                        return -EINVAL;
        }
 
-       /*
-        * ->select_disc is a hook to allow a driver-specific way of
-        * seleting disc.  However, since there is no equivalent hook for
-        * cdrom_slot_status this may not actually be useful...
-        */
-       if (cdi->ops->select_disc)
-               return cdi->ops->select_disc(cdi, arg);
-
        cd_dbg(CD_CHANGER, "Using generic cdrom_select_disc()\n");
        return cdrom_select_disc(cdi, arg);
 }
@@ -2892,6 +2884,7 @@ use_toc:
        *last_written = toc.cdte_addr.lba;
        return 0;
 }
+EXPORT_SYMBOL(cdrom_get_last_written);
 
 /* return the next writable block. also for udf file system. */
 static int cdrom_get_next_writable(struct cdrom_device_info *cdi,
@@ -3429,18 +3422,7 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 
        return -ENOSYS;
 }
-
-EXPORT_SYMBOL(cdrom_get_last_written);
-EXPORT_SYMBOL(register_cdrom);
-EXPORT_SYMBOL(unregister_cdrom);
-EXPORT_SYMBOL(cdrom_open);
-EXPORT_SYMBOL(cdrom_release);
 EXPORT_SYMBOL(cdrom_ioctl);
-EXPORT_SYMBOL(cdrom_number_of_slots);
-EXPORT_SYMBOL(cdrom_mode_select);
-EXPORT_SYMBOL(cdrom_mode_sense);
-EXPORT_SYMBOL(init_cdrom_command);
-EXPORT_SYMBOL(cdrom_get_media_event);
 
 #ifdef CONFIG_SYSCTL
 
index dc78a4fb879eb6a0fea8d17c880156567c398105..84a4aa9312cf8d845990f7b06ebc328b41064c7f 100644 (file)
@@ -327,7 +327,7 @@ static int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
 {
        int i;
 
-       if (amd_cache_northbridges() < 0)
+       if (!amd_nb_num())
                return -ENODEV;
 
        if (!amd_nb_has_feature(AMD_NB_GART))
index 49b8f22fdcf03b8883e378d28e07ce13172df854..db40037eb3470f1fe88f846454a50133183c597f 100644 (file)
@@ -299,8 +299,7 @@ static int ipmb_slave_cb(struct i2c_client *client,
        return 0;
 }
 
-static int ipmb_probe(struct i2c_client *client,
-                       const struct i2c_device_id *id)
+static int ipmb_probe(struct i2c_client *client)
 {
        struct ipmb_dev *ipmb_dev;
        int ret;
@@ -369,7 +368,7 @@ static struct i2c_driver ipmb_driver = {
                .name = "ipmb-dev",
                .acpi_match_table = ACPI_PTR(acpi_ipmb_id),
        },
-       .probe = ipmb_probe,
+       .probe_new = ipmb_probe,
        .remove = ipmb_remove,
        .id_table = ipmb_id,
 };
index b81b862532fb01b3b851f69f7d246e3c46a7ad82..ab19b4b3317ecdf082a2f36cfd835368e2353936 100644 (file)
@@ -442,8 +442,7 @@ static int ipmi_ipmb_remove(struct i2c_client *client)
        return 0;
 }
 
-static int ipmi_ipmb_probe(struct i2c_client *client,
-                          const struct i2c_device_id *id)
+static int ipmi_ipmb_probe(struct i2c_client *client)
 {
        struct device *dev = &client->dev;
        struct ipmi_ipmb_dev *iidev;
@@ -476,6 +475,7 @@ static int ipmi_ipmb_probe(struct i2c_client *client,
        slave_np = of_parse_phandle(dev->of_node, "slave-dev", 0);
        if (slave_np) {
                slave_adap = of_get_i2c_adapter_by_node(slave_np);
+               of_node_put(slave_np);
                if (!slave_adap) {
                        dev_notice(&client->dev,
                                   "Could not find slave adapter\n");
@@ -570,7 +570,7 @@ static struct i2c_driver ipmi_ipmb_driver = {
                .name = DEVICE_NAME,
                .of_match_table = of_ipmi_ipmb_match,
        },
-       .probe          = ipmi_ipmb_probe,
+       .probe_new      = ipmi_ipmb_probe,
        .remove         = ipmi_ipmb_remove,
        .id_table       = ipmi_ipmb_id,
 };
index f1827257ef0e08ecca1d62f26562b7e80c3f5f08..703433493c8520fc4eb031225eba37cb87e1fb9c 100644 (file)
@@ -11,8 +11,8 @@
  * Copyright 2002 MontaVista Software Inc.
  */
 
-#define pr_fmt(fmt) "%s" fmt, "IPMI message handler: "
-#define dev_fmt pr_fmt
+#define pr_fmt(fmt) "IPMI message handler: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
 
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -145,6 +145,18 @@ module_param(default_max_retries, uint, 0644);
 MODULE_PARM_DESC(default_max_retries,
                 "The time (milliseconds) between retry sends in maintenance mode");
 
+/* The default maximum number of users that may register. */
+static unsigned int max_users = 30;
+module_param(max_users, uint, 0644);
+MODULE_PARM_DESC(max_users,
+                "The most users that may use the IPMI stack at one time.");
+
+/* The default maximum number of message a user may have outstanding. */
+static unsigned int max_msgs_per_user = 100;
+module_param(max_msgs_per_user, uint, 0644);
+MODULE_PARM_DESC(max_msgs_per_user,
+                "The most message a user may have outstanding.");
+
 /* Call every ~1000 ms. */
 #define IPMI_TIMEOUT_TIME      1000
 
@@ -187,6 +199,8 @@ struct ipmi_user {
        /* Does this interface receive IPMI events? */
        bool gets_events;
 
+       atomic_t nr_msgs;
+
        /* Free must run in process context for RCU cleanup. */
        struct work_struct remove_work;
 };
@@ -442,6 +456,10 @@ struct ipmi_smi {
         */
        struct list_head users;
        struct srcu_struct users_srcu;
+       atomic_t nr_users;
+       struct device_attribute nr_users_devattr;
+       struct device_attribute nr_msgs_devattr;
+
 
        /* Used for wake ups at startup. */
        wait_queue_head_t waitq;
@@ -927,11 +945,13 @@ static int deliver_response(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
                 * risk.  At this moment, simply skip it in that case.
                 */
                ipmi_free_recv_msg(msg);
+               atomic_dec(&msg->user->nr_msgs);
        } else {
                int index;
                struct ipmi_user *user = acquire_ipmi_user(msg->user, &index);
 
                if (user) {
+                       atomic_dec(&user->nr_msgs);
                        user->handler->ipmi_recv_hndl(msg, user->handler_data);
                        release_ipmi_user(user, index);
                } else {
@@ -1230,6 +1250,11 @@ int ipmi_create_user(unsigned int          if_num,
        goto out_kfree;
 
  found:
+       if (atomic_add_return(1, &intf->nr_users) > max_users) {
+               rv = -EBUSY;
+               goto out_kfree;
+       }
+
        INIT_WORK(&new_user->remove_work, free_user_work);
 
        rv = init_srcu_struct(&new_user->release_barrier);
@@ -1244,6 +1269,7 @@ int ipmi_create_user(unsigned int          if_num,
        /* Note that each existing user holds a refcount to the interface. */
        kref_get(&intf->refcount);
 
+       atomic_set(&new_user->nr_msgs, 0);
        kref_init(&new_user->refcount);
        new_user->handler = handler;
        new_user->handler_data = handler_data;
@@ -1262,6 +1288,7 @@ int ipmi_create_user(unsigned int          if_num,
        return 0;
 
 out_kfree:
+       atomic_dec(&intf->nr_users);
        srcu_read_unlock(&ipmi_interfaces_srcu, index);
        vfree(new_user);
        return rv;
@@ -1336,6 +1363,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
        /* Remove the user from the interface's sequence table. */
        spin_lock_irqsave(&intf->seq_lock, flags);
        list_del_rcu(&user->link);
+       atomic_dec(&intf->nr_users);
 
        for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) {
                if (intf->seq_table[i].inuse
@@ -2284,6 +2312,14 @@ static int i_ipmi_request(struct ipmi_user     *user,
        struct ipmi_recv_msg *recv_msg;
        int rv = 0;
 
+       if (user) {
+               if (atomic_add_return(1, &user->nr_msgs) > max_msgs_per_user) {
+                       /* Decrement will happen at the end of the routine. */
+                       rv = -EBUSY;
+                       goto out;
+               }
+       }
+
        if (supplied_recv)
                recv_msg = supplied_recv;
        else {
@@ -2296,7 +2332,7 @@ static int i_ipmi_request(struct ipmi_user     *user,
        recv_msg->user_msg_data = user_msg_data;
 
        if (supplied_smi)
-               smi_msg = (struct ipmi_smi_msg *) supplied_smi;
+               smi_msg = supplied_smi;
        else {
                smi_msg = ipmi_alloc_smi_msg();
                if (smi_msg == NULL) {
@@ -2348,13 +2384,16 @@ out_err:
                ipmi_free_smi_msg(smi_msg);
                ipmi_free_recv_msg(recv_msg);
        } else {
-               pr_debug("Send: %*ph\n", smi_msg->data_size, smi_msg->data);
+               dev_dbg(intf->si_dev, "Send: %*ph\n",
+                       smi_msg->data_size, smi_msg->data);
 
                smi_send(intf, intf->handlers, smi_msg, priority);
        }
        rcu_read_unlock();
 
 out:
+       if (rv && user)
+               atomic_dec(&user->nr_msgs);
        return rv;
 }
 
@@ -3471,6 +3510,36 @@ void ipmi_poll_interface(struct ipmi_user *user)
 }
 EXPORT_SYMBOL(ipmi_poll_interface);
 
+static ssize_t nr_users_show(struct device *dev,
+                            struct device_attribute *attr,
+                            char *buf)
+{
+       struct ipmi_smi *intf = container_of(attr,
+                        struct ipmi_smi, nr_users_devattr);
+
+       return sysfs_emit(buf, "%d\n", atomic_read(&intf->nr_users));
+}
+static DEVICE_ATTR_RO(nr_users);
+
+static ssize_t nr_msgs_show(struct device *dev,
+                           struct device_attribute *attr,
+                           char *buf)
+{
+       struct ipmi_smi *intf = container_of(attr,
+                        struct ipmi_smi, nr_msgs_devattr);
+       struct ipmi_user *user;
+       int index;
+       unsigned int count = 0;
+
+       index = srcu_read_lock(&intf->users_srcu);
+       list_for_each_entry_rcu(user, &intf->users, link)
+               count += atomic_read(&user->nr_msgs);
+       srcu_read_unlock(&intf->users_srcu, index);
+
+       return sysfs_emit(buf, "%u\n", count);
+}
+static DEVICE_ATTR_RO(nr_msgs);
+
 static void redo_bmc_reg(struct work_struct *work)
 {
        struct ipmi_smi *intf = container_of(work, struct ipmi_smi,
@@ -3529,6 +3598,7 @@ int ipmi_add_smi(struct module         *owner,
        if (slave_addr != 0)
                intf->addrinfo[0].address = slave_addr;
        INIT_LIST_HEAD(&intf->users);
+       atomic_set(&intf->nr_users, 0);
        intf->handlers = handlers;
        intf->send_info = send_info;
        spin_lock_init(&intf->seq_lock);
@@ -3592,6 +3662,20 @@ int ipmi_add_smi(struct module         *owner,
        if (rv)
                goto out_err_bmc_reg;
 
+       intf->nr_users_devattr = dev_attr_nr_users;
+       sysfs_attr_init(&intf->nr_users_devattr.attr);
+       rv = device_create_file(intf->si_dev, &intf->nr_users_devattr);
+       if (rv)
+               goto out_err_bmc_reg;
+
+       intf->nr_msgs_devattr = dev_attr_nr_msgs;
+       sysfs_attr_init(&intf->nr_msgs_devattr.attr);
+       rv = device_create_file(intf->si_dev, &intf->nr_msgs_devattr);
+       if (rv) {
+               device_remove_file(intf->si_dev, &intf->nr_users_devattr);
+               goto out_err_bmc_reg;
+       }
+
        /*
         * Keep memory order straight for RCU readers.  Make
         * sure everything else is committed to memory before
@@ -3691,6 +3775,9 @@ void ipmi_unregister_smi(struct ipmi_smi *intf)
 
        /* At this point no users can be added to the interface. */
 
+       device_remove_file(intf->si_dev, &intf->nr_msgs_devattr);
+       device_remove_file(intf->si_dev, &intf->nr_users_devattr);
+
        /*
         * Call all the watcher interfaces to tell them that
         * an interface is going away.
@@ -3839,7 +3926,8 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
                msg->data[10] = ipmb_checksum(&msg->data[6], 4);
                msg->data_size = 11;
 
-               pr_debug("Invalid command: %*ph\n", msg->data_size, msg->data);
+               dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
+                       msg->data_size, msg->data);
 
                rcu_read_lock();
                if (!intf->in_shutdown) {
@@ -3992,10 +4080,10 @@ static int handle_ipmb_direct_rcv_rsp(struct ipmi_smi *intf,
        struct ipmi_recv_msg *recv_msg;
        struct ipmi_ipmb_direct_addr *daddr;
 
-       recv_msg = (struct ipmi_recv_msg *) msg->user_data;
+       recv_msg = msg->user_data;
        if (recv_msg == NULL) {
                dev_warn(intf->si_dev,
-                        "IPMI message received with no owner. This could be because of a malformed message, or because of a hardware error.  Contact your hardware vendor for assistance.\n");
+                        "IPMI direct message received with no owner. This could be because of a malformed message, or because of a hardware error.  Contact your hardware vendor for assistance.\n");
                return 0;
        }
 
@@ -4410,10 +4498,10 @@ static int handle_bmc_rsp(struct ipmi_smi *intf,
        struct ipmi_recv_msg *recv_msg;
        struct ipmi_system_interface_addr *smi_addr;
 
-       recv_msg = (struct ipmi_recv_msg *) msg->user_data;
+       recv_msg = msg->user_data;
        if (recv_msg == NULL) {
                dev_warn(intf->si_dev,
-                        "IPMI message received with no owner. This could be because of a malformed message, or because of a hardware error.  Contact your hardware vendor for assistance.\n");
+                        "IPMI SMI message received with no owner. This could be because of a malformed message, or because of a hardware error.  Contact your hardware vendor for assistance.\n");
                return 0;
        }
 
@@ -4447,7 +4535,7 @@ static int handle_one_recv_msg(struct ipmi_smi *intf,
        unsigned char cc;
        bool is_cmd = !((msg->rsp[0] >> 2) & 1);
 
-       pr_debug("Recv: %*ph\n", msg->rsp_size, msg->rsp);
+       dev_dbg(intf->si_dev, "Recv: %*ph\n", msg->rsp_size, msg->rsp);
 
        if (msg->rsp_size < 2) {
                /* Message is too small to be correct. */
@@ -4831,7 +4919,8 @@ smi_from_recv_msg(struct ipmi_smi *intf, struct ipmi_recv_msg *recv_msg,
        smi_msg->data_size = recv_msg->msg.data_len;
        smi_msg->msgid = STORE_SEQ_IN_MSGID(seq, seqid);
 
-       pr_debug("Resend: %*ph\n", smi_msg->data_size, smi_msg->data);
+       dev_dbg(intf->si_dev, "Resend: %*ph\n",
+               smi_msg->data_size, smi_msg->data);
 
        return smi_msg;
 }
index bc3a18daf97a6857ce95ae3b7a868669d14339d9..163ec9749e557c67ef95a3a6c293d286ea026314 100644 (file)
@@ -94,12 +94,8 @@ static void dummy_recv_free(struct ipmi_recv_msg *msg)
 {
        atomic_dec(&dummy_count);
 }
-static struct ipmi_smi_msg halt_smi_msg = {
-       .done = dummy_smi_free
-};
-static struct ipmi_recv_msg halt_recv_msg = {
-       .done = dummy_recv_free
-};
+static struct ipmi_smi_msg halt_smi_msg = INIT_IPMI_SMI_MSG(dummy_smi_free);
+static struct ipmi_recv_msg halt_recv_msg = INIT_IPMI_RECV_MSG(dummy_recv_free);
 
 
 /*
index 5604a810fb3d2d7d40d463bb2a3e4d52940bae64..6e357ad76f2eba3a29513b4d69794d3660ac5b2a 100644 (file)
@@ -264,15 +264,16 @@ static void cleanup_one_si(struct smi_info *smi_info);
 static void cleanup_ipmi_si(void);
 
 #ifdef DEBUG_TIMING
-void debug_timestamp(char *msg)
+void debug_timestamp(struct smi_info *smi_info, char *msg)
 {
        struct timespec64 t;
 
        ktime_get_ts64(&t);
-       pr_debug("**%s: %lld.%9.9ld\n", msg, t.tv_sec, t.tv_nsec);
+       dev_dbg(smi_info->io.dev, "**%s: %lld.%9.9ld\n",
+               msg, t.tv_sec, t.tv_nsec);
 }
 #else
-#define debug_timestamp(x)
+#define debug_timestamp(smi_info, x)
 #endif
 
 static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list);
@@ -318,7 +319,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 
                smi_info->curr_msg = smi_info->waiting_msg;
                smi_info->waiting_msg = NULL;
-               debug_timestamp("Start2");
+               debug_timestamp(smi_info, "Start2");
                err = atomic_notifier_call_chain(&xaction_notifier_list,
                                0, smi_info);
                if (err & NOTIFY_STOP_MASK) {
@@ -538,7 +539,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
 {
        struct ipmi_smi_msg *msg;
 
-       debug_timestamp("Done");
+       debug_timestamp(smi_info, "Done");
        switch (smi_info->si_state) {
        case SI_NORMAL:
                if (!smi_info->curr_msg)
@@ -901,7 +902,7 @@ static void sender(void                *send_info,
        struct smi_info   *smi_info = send_info;
        unsigned long     flags;
 
-       debug_timestamp("Enqueue");
+       debug_timestamp(smi_info, "Enqueue");
 
        if (smi_info->run_to_completion) {
                /*
@@ -1079,7 +1080,7 @@ static void smi_timeout(struct timer_list *t)
        long              timeout;
 
        spin_lock_irqsave(&(smi_info->si_lock), flags);
-       debug_timestamp("Timer");
+       debug_timestamp(smi_info, "Timer");
 
        jiffies_now = jiffies;
        time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
@@ -1128,7 +1129,7 @@ irqreturn_t ipmi_si_irq_handler(int irq, void *data)
 
        smi_inc_stat(smi_info, interrupts);
 
-       debug_timestamp("Interrupt");
+       debug_timestamp(smi_info, "Interrupt");
 
        smi_event_handler(smi_info, 0);
        spin_unlock_irqrestore(&(smi_info->si_lock), flags);
index f199cc19484462ba020adfd820ff4510e791a73f..fc742ee9c0468000bcda6d566ba323bc2f7d861b 100644 (file)
@@ -814,6 +814,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
                break;
 
        case SSIF_GETTING_EVENTS:
+               if (!msg) {
+                       /* Should never happen, but just in case. */
+                       dev_warn(&ssif_info->client->dev,
+                                "No message set while getting events\n");
+                       ipmi_ssif_unlock_cond(ssif_info, flags);
+                       break;
+               }
+
                if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) {
                        /* Error getting event, probably done. */
                        msg->done(msg);
@@ -838,6 +846,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
                break;
 
        case SSIF_GETTING_MESSAGES:
+               if (!msg) {
+                       /* Should never happen, but just in case. */
+                       dev_warn(&ssif_info->client->dev,
+                                "No message set while getting messages\n");
+                       ipmi_ssif_unlock_cond(ssif_info, flags);
+                       break;
+               }
+
                if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) {
                        /* Error getting event, probably done. */
                        msg->done(msg);
@@ -861,6 +877,13 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
                        deliver_recv_msg(ssif_info, msg);
                }
                break;
+
+       default:
+               /* Should never happen, but just in case. */
+               dev_warn(&ssif_info->client->dev,
+                        "Invalid state in message done handling: %d\n",
+                        ssif_info->ssif_state);
+               ipmi_ssif_unlock_cond(ssif_info, flags);
        }
 
        flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
@@ -1053,7 +1076,7 @@ static void start_next_msg(struct ssif_info *ssif_info, unsigned long *flags)
 static void sender(void                *send_info,
                   struct ipmi_smi_msg *msg)
 {
-       struct ssif_info *ssif_info = (struct ssif_info *) send_info;
+       struct ssif_info *ssif_info = send_info;
        unsigned long oflags, *flags;
 
        BUG_ON(ssif_info->waiting_msg);
@@ -1090,7 +1113,7 @@ static int get_smi_info(void *send_info, struct ipmi_smi_info *data)
  */
 static void request_events(void *send_info)
 {
-       struct ssif_info *ssif_info = (struct ssif_info *) send_info;
+       struct ssif_info *ssif_info = send_info;
        unsigned long oflags, *flags;
 
        if (!ssif_info->has_event_buffer)
@@ -1107,7 +1130,7 @@ static void request_events(void *send_info)
  */
 static void ssif_set_need_watch(void *send_info, unsigned int watch_mask)
 {
-       struct ssif_info *ssif_info = (struct ssif_info *) send_info;
+       struct ssif_info *ssif_info = send_info;
        unsigned long oflags, *flags;
        long timeout = 0;
 
@@ -1619,7 +1642,7 @@ static int ssif_check_and_remove(struct i2c_client *client,
        return 0;
 }
 
-static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int ssif_probe(struct i2c_client *client)
 {
        unsigned char     msg[3];
        unsigned char     *resp;
@@ -2037,7 +2060,7 @@ static struct i2c_driver ssif_i2c_driver = {
        .driver         = {
                .name                   = DEVICE_NAME
        },
-       .probe          = ssif_probe,
+       .probe_new      = ssif_probe,
        .remove         = ssif_remove,
        .alert          = ssif_alert,
        .id_table       = ssif_id,
index 0604abdd249a14bda5f3c7388582e58a2c548735..5b4e677929cabaaa8256e34427226941b581f1fd 100644 (file)
@@ -354,12 +354,8 @@ static void msg_free_recv(struct ipmi_recv_msg *msg)
                        complete(&msg_wait);
        }
 }
-static struct ipmi_smi_msg smi_msg = {
-       .done = msg_free_smi
-};
-static struct ipmi_recv_msg recv_msg = {
-       .done = msg_free_recv
-};
+static struct ipmi_smi_msg smi_msg = INIT_IPMI_SMI_MSG(msg_free_smi);
+static struct ipmi_recv_msg recv_msg = INIT_IPMI_RECV_MSG(msg_free_recv);
 
 static int __ipmi_set_timeout(struct ipmi_smi_msg  *smi_msg,
                              struct ipmi_recv_msg *recv_msg,
@@ -475,12 +471,10 @@ static void panic_recv_free(struct ipmi_recv_msg *msg)
        atomic_dec(&panic_done_count);
 }
 
-static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg = {
-       .done = panic_smi_free
-};
-static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg = {
-       .done = panic_recv_free
-};
+static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg =
+       INIT_IPMI_SMI_MSG(panic_smi_free);
+static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg =
+       INIT_IPMI_RECV_MSG(panic_recv_free);
 
 static void panic_halt_ipmi_heartbeat(void)
 {
@@ -516,12 +510,10 @@ static void panic_halt_ipmi_heartbeat(void)
                atomic_sub(2, &panic_done_count);
 }
 
-static struct ipmi_smi_msg panic_halt_smi_msg = {
-       .done = panic_smi_free
-};
-static struct ipmi_recv_msg panic_halt_recv_msg = {
-       .done = panic_recv_free
-};
+static struct ipmi_smi_msg panic_halt_smi_msg =
+       INIT_IPMI_SMI_MSG(panic_smi_free);
+static struct ipmi_recv_msg panic_halt_recv_msg =
+       INIT_IPMI_RECV_MSG(panic_recv_free);
 
 /*
  * Special call, doesn't claim any locks.  This is only to be called
index 4c9adb4f3d5d73c98bd5c1ff800b01bfde61709c..b691b9d5950331257af811d7c6f50fe113b77c51 100644 (file)
  *   - Sysctl interface.
  *
  * The high level overview is that there is one input pool, into which
- * various pieces of data are hashed. Some of that data is then "credited" as
- * having a certain number of bits of entropy. When enough bits of entropy are
- * available, the hash is finalized and handed as a key to a stream cipher that
- * expands it indefinitely for various consumers. This key is periodically
- * refreshed as the various entropy collectors, described below, add data to the
- * input pool and credit it. There is currently no Fortuna-like scheduler
- * involved, which can lead to malicious entropy sources causing a premature
- * reseed, and the entropy estimates are, at best, conservative guesses.
+ * various pieces of data are hashed. Prior to initialization, some of that
+ * data is then "credited" as having a certain number of bits of entropy.
+ * When enough bits of entropy are available, the hash is finalized and
+ * handed as a key to a stream cipher that expands it indefinitely for
+ * various consumers. This key is periodically refreshed as the various
+ * entropy collectors, described below, add data to the input pool.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -53,6 +51,8 @@
 #include <linux/completion.h>
 #include <linux/uuid.h>
 #include <linux/uaccess.h>
+#include <linux/suspend.h>
+#include <linux/siphash.h>
 #include <crypto/chacha.h>
 #include <crypto/blake2s.h>
 #include <asm/processor.h>
  *********************************************************************/
 
 /*
- * crng_init =  0 --> Uninitialized
- *             1 --> Initialized
- *             2 --> Initialized from input_pool
- *
  * crng_init is protected by base_crng->lock, and only increases
- * its value (from 0->1->2).
+ * its value (from empty->early->ready).
  */
-static int crng_init = 0;
-#define crng_ready() (likely(crng_init > 1))
-/* Various types of waiters for crng_init->2 transition. */
+static enum {
+       CRNG_EMPTY = 0, /* Little to no entropy collected */
+       CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */
+       CRNG_READY = 2  /* Fully initialized with POOL_READY_BITS collected */
+} crng_init __read_mostly = CRNG_EMPTY;
+static DEFINE_STATIC_KEY_FALSE(crng_is_ready);
+#define crng_ready() (static_branch_likely(&crng_is_ready) || crng_init >= CRNG_READY)
+/* Various types of waiters for crng_init->CRNG_READY transition. */
 static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 static struct fasync_struct *fasync;
-static DEFINE_SPINLOCK(random_ready_chain_lock);
-static RAW_NOTIFIER_HEAD(random_ready_chain);
 
 /* Control how we warn userspace. */
-static struct ratelimit_state unseeded_warning =
-       RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
 static struct ratelimit_state urandom_warning =
        RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
-static int ratelimit_disable __read_mostly;
+static int ratelimit_disable __read_mostly =
+       IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM);
 module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
 MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
 
@@ -110,6 +108,11 @@ bool rng_is_initialized(void)
 }
 EXPORT_SYMBOL(rng_is_initialized);
 
+static void __cold crng_set_ready(struct work_struct *work)
+{
+       static_branch_enable(&crng_is_ready);
+}
+
 /* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
 static void try_to_generate_entropy(void);
 
@@ -137,73 +140,10 @@ int wait_for_random_bytes(void)
 }
 EXPORT_SYMBOL(wait_for_random_bytes);
 
-/*
- * Add a callback function that will be invoked when the input
- * pool is initialised.
- *
- * returns: 0 if callback is successfully added
- *         -EALREADY if pool is already initialised (callback not called)
- */
-int register_random_ready_notifier(struct notifier_block *nb)
-{
-       unsigned long flags;
-       int ret = -EALREADY;
-
-       if (crng_ready())
-               return ret;
-
-       spin_lock_irqsave(&random_ready_chain_lock, flags);
-       if (!crng_ready())
-               ret = raw_notifier_chain_register(&random_ready_chain, nb);
-       spin_unlock_irqrestore(&random_ready_chain_lock, flags);
-       return ret;
-}
-
-/*
- * Delete a previously registered readiness callback function.
- */
-int unregister_random_ready_notifier(struct notifier_block *nb)
-{
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&random_ready_chain_lock, flags);
-       ret = raw_notifier_chain_unregister(&random_ready_chain, nb);
-       spin_unlock_irqrestore(&random_ready_chain_lock, flags);
-       return ret;
-}
-
-static void process_random_ready_list(void)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&random_ready_chain_lock, flags);
-       raw_notifier_call_chain(&random_ready_chain, 0, NULL);
-       spin_unlock_irqrestore(&random_ready_chain_lock, flags);
-}
-
-#define warn_unseeded_randomness(previous) \
-       _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
-
-static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
-{
-#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
-       const bool print_once = false;
-#else
-       static bool print_once __read_mostly;
-#endif
-
-       if (print_once || crng_ready() ||
-           (previous && (caller == READ_ONCE(*previous))))
-               return;
-       WRITE_ONCE(*previous, caller);
-#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
-       print_once = true;
-#endif
-       if (__ratelimit(&unseeded_warning))
-               printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
-                               func_name, caller, crng_init);
-}
+#define warn_unseeded_randomness() \
+       if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \
+               printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \
+                               __func__, (void *)_RET_IP_, crng_init)
 
 
 /*********************************************************************
@@ -216,7 +156,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
  *
  * There are a few exported interfaces for use by other drivers:
  *
- *     void get_random_bytes(void *buf, size_t nbytes)
+ *     void get_random_bytes(void *buf, size_t len)
  *     u32 get_random_u32()
  *     u64 get_random_u64()
  *     unsigned int get_random_int()
@@ -232,8 +172,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
  *********************************************************************/
 
 enum {
-       CRNG_RESEED_INTERVAL = 300 * HZ,
-       CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE
+       CRNG_RESEED_START_INTERVAL = HZ,
+       CRNG_RESEED_INTERVAL = 60 * HZ
 };
 
 static struct {
@@ -256,24 +196,17 @@ static DEFINE_PER_CPU(struct crng, crngs) = {
        .lock = INIT_LOCAL_LOCK(crngs.lock),
 };
 
-/* Used by crng_reseed() to extract a new seed from the input pool. */
-static bool drain_entropy(void *buf, size_t nbytes, bool force);
+/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */
+static void extract_entropy(void *buf, size_t len);
 
-/*
- * This extracts a new crng key from the input pool, but only if there is a
- * sufficient amount of entropy available or force is true, in order to
- * mitigate bruteforcing of newly added bits.
- */
-static void crng_reseed(bool force)
+/* This extracts a new crng key from the input pool. */
+static void crng_reseed(void)
 {
        unsigned long flags;
        unsigned long next_gen;
        u8 key[CHACHA_KEY_SIZE];
-       bool finalize_init = false;
 
-       /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */
-       if (!drain_entropy(key, sizeof(key), force))
-               return;
+       extract_entropy(key, sizeof(key));
 
        /*
         * We copy the new key into the base_crng, overwriting the old one,
@@ -288,28 +221,10 @@ static void crng_reseed(bool force)
                ++next_gen;
        WRITE_ONCE(base_crng.generation, next_gen);
        WRITE_ONCE(base_crng.birth, jiffies);
-       if (!crng_ready()) {
-               crng_init = 2;
-               finalize_init = true;
-       }
+       if (!static_branch_likely(&crng_is_ready))
+               crng_init = CRNG_READY;
        spin_unlock_irqrestore(&base_crng.lock, flags);
        memzero_explicit(key, sizeof(key));
-       if (finalize_init) {
-               process_random_ready_list();
-               wake_up_interruptible(&crng_init_wait);
-               kill_fasync(&fasync, SIGIO, POLL_IN);
-               pr_notice("crng init done\n");
-               if (unseeded_warning.missed) {
-                       pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
-                                 unseeded_warning.missed);
-                       unseeded_warning.missed = 0;
-               }
-               if (urandom_warning.missed) {
-                       pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
-                                 urandom_warning.missed);
-                       urandom_warning.missed = 0;
-               }
-       }
 }
 
 /*
@@ -345,10 +260,10 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
 }
 
 /*
- * Return whether the crng seed is considered to be sufficiently
- * old that a reseeding might be attempted. This happens if the last
- * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at
- * an interval proportional to the uptime.
+ * Return whether the crng seed is considered to be sufficiently old
+ * that a reseeding is needed. This happens if the last reseeding
+ * was CRNG_RESEED_INTERVAL ago, or during early boot, at an interval
+ * proportional to the uptime.
  */
 static bool crng_has_old_seed(void)
 {
@@ -360,10 +275,10 @@ static bool crng_has_old_seed(void)
                if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
                        WRITE_ONCE(early_boot, false);
                else
-                       interval = max_t(unsigned int, 5 * HZ,
+                       interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL,
                                         (unsigned int)uptime / 2 * HZ);
        }
-       return time_after(jiffies, READ_ONCE(base_crng.birth) + interval);
+       return time_is_before_jiffies(READ_ONCE(base_crng.birth) + interval);
 }
 
 /*
@@ -382,28 +297,31 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
        /*
         * For the fast path, we check whether we're ready, unlocked first, and
         * then re-check once locked later. In the case where we're really not
-        * ready, we do fast key erasure with the base_crng directly, because
-        * this is what crng_pre_init_inject() mutates during early init.
+        * ready, we do fast key erasure with the base_crng directly, extracting
+        * when crng_init is CRNG_EMPTY.
         */
        if (!crng_ready()) {
                bool ready;
 
                spin_lock_irqsave(&base_crng.lock, flags);
                ready = crng_ready();
-               if (!ready)
+               if (!ready) {
+                       if (crng_init == CRNG_EMPTY)
+                               extract_entropy(base_crng.key, sizeof(base_crng.key));
                        crng_fast_key_erasure(base_crng.key, chacha_state,
                                              random_data, random_data_len);
+               }
                spin_unlock_irqrestore(&base_crng.lock, flags);
                if (!ready)
                        return;
        }
 
        /*
-        * If the base_crng is old enough, we try to reseed, which in turn
-        * bumps the generation counter that we check below.
+        * If the base_crng is old enough, we reseed, which in turn bumps the
+        * generation counter that we check below.
         */
        if (unlikely(crng_has_old_seed()))
-               crng_reseed(false);
+               crng_reseed();
 
        local_lock_irqsave(&crngs.lock, flags);
        crng = raw_cpu_ptr(&crngs);
@@ -433,68 +351,24 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
        local_unlock_irqrestore(&crngs.lock, flags);
 }
 
-/*
- * This function is for crng_init == 0 only. It loads entropy directly
- * into the crng's key, without going through the input pool. It is,
- * generally speaking, not very safe, but we use this only at early
- * boot time when it's better to have something there rather than
- * nothing.
- *
- * If account is set, then the crng_init_cnt counter is incremented.
- * This shouldn't be set by functions like add_device_randomness(),
- * where we can't trust the buffer passed to it is guaranteed to be
- * unpredictable (so it might not have any entropy at all).
- */
-static void crng_pre_init_inject(const void *input, size_t len, bool account)
-{
-       static int crng_init_cnt = 0;
-       struct blake2s_state hash;
-       unsigned long flags;
-
-       blake2s_init(&hash, sizeof(base_crng.key));
-
-       spin_lock_irqsave(&base_crng.lock, flags);
-       if (crng_init != 0) {
-               spin_unlock_irqrestore(&base_crng.lock, flags);
-               return;
-       }
-
-       blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
-       blake2s_update(&hash, input, len);
-       blake2s_final(&hash, base_crng.key);
-
-       if (account) {
-               crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
-               if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
-                       ++base_crng.generation;
-                       crng_init = 1;
-               }
-       }
-
-       spin_unlock_irqrestore(&base_crng.lock, flags);
-
-       if (crng_init == 1)
-               pr_notice("fast init done\n");
-}
-
-static void _get_random_bytes(void *buf, size_t nbytes)
+static void _get_random_bytes(void *buf, size_t len)
 {
        u32 chacha_state[CHACHA_STATE_WORDS];
        u8 tmp[CHACHA_BLOCK_SIZE];
-       size_t len;
+       size_t first_block_len;
 
-       if (!nbytes)
+       if (!len)
                return;
 
-       len = min_t(size_t, 32, nbytes);
-       crng_make_state(chacha_state, buf, len);
-       nbytes -= len;
-       buf += len;
+       first_block_len = min_t(size_t, 32, len);
+       crng_make_state(chacha_state, buf, first_block_len);
+       len -= first_block_len;
+       buf += first_block_len;
 
-       while (nbytes) {
-               if (nbytes < CHACHA_BLOCK_SIZE) {
+       while (len) {
+               if (len < CHACHA_BLOCK_SIZE) {
                        chacha20_block(chacha_state, tmp);
-                       memcpy(buf, tmp, nbytes);
+                       memcpy(buf, tmp, len);
                        memzero_explicit(tmp, sizeof(tmp));
                        break;
                }
@@ -502,7 +376,7 @@ static void _get_random_bytes(void *buf, size_t nbytes)
                chacha20_block(chacha_state, buf);
                if (unlikely(chacha_state[12] == 0))
                        ++chacha_state[13];
-               nbytes -= CHACHA_BLOCK_SIZE;
+               len -= CHACHA_BLOCK_SIZE;
                buf += CHACHA_BLOCK_SIZE;
        }
 
@@ -512,29 +386,24 @@ static void _get_random_bytes(void *buf, size_t nbytes)
 /*
  * This function is the exported kernel interface.  It returns some
  * number of good random numbers, suitable for key generation, seeding
- * TCP sequence numbers, etc.  It does not rely on the hardware random
- * number generator.  For random bytes direct from the hardware RNG
- * (when available), use get_random_bytes_arch(). In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once
- * at any point prior.
+ * TCP sequence numbers, etc. In order to ensure that the randomness
+ * by this function is okay, the function wait_for_random_bytes()
+ * should be called and return 0 at least once at any point prior.
  */
-void get_random_bytes(void *buf, size_t nbytes)
+void get_random_bytes(void *buf, size_t len)
 {
-       static void *previous;
-
-       warn_unseeded_randomness(&previous);
-       _get_random_bytes(buf, nbytes);
+       warn_unseeded_randomness();
+       _get_random_bytes(buf, len);
 }
 EXPORT_SYMBOL(get_random_bytes);
 
-static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
+static ssize_t get_random_bytes_user(struct iov_iter *iter)
 {
-       size_t len, left, ret = 0;
        u32 chacha_state[CHACHA_STATE_WORDS];
-       u8 output[CHACHA_BLOCK_SIZE];
+       u8 block[CHACHA_BLOCK_SIZE];
+       size_t ret = 0, copied;
 
-       if (!nbytes)
+       if (unlikely(!iov_iter_count(iter)))
                return 0;
 
        /*
@@ -548,30 +417,22 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
         * use chacha_state after, so we can simply return those bytes to
         * the user directly.
         */
-       if (nbytes <= CHACHA_KEY_SIZE) {
-               ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes);
+       if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) {
+               ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter);
                goto out_zero_chacha;
        }
 
        for (;;) {
-               chacha20_block(chacha_state, output);
+               chacha20_block(chacha_state, block);
                if (unlikely(chacha_state[12] == 0))
                        ++chacha_state[13];
 
-               len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
-               left = copy_to_user(buf, output, len);
-               if (left) {
-                       ret += len - left;
-                       break;
-               }
-
-               buf += len;
-               ret += len;
-               nbytes -= len;
-               if (!nbytes)
+               copied = copy_to_iter(block, sizeof(block), iter);
+               ret += copied;
+               if (!iov_iter_count(iter) || copied != sizeof(block))
                        break;
 
-               BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0);
+               BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
                if (ret % PAGE_SIZE == 0) {
                        if (signal_pending(current))
                                break;
@@ -579,7 +440,7 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
                }
        }
 
-       memzero_explicit(output, sizeof(output));
+       memzero_explicit(block, sizeof(block));
 out_zero_chacha:
        memzero_explicit(chacha_state, sizeof(chacha_state));
        return ret ? ret : -EFAULT;
@@ -591,98 +452,69 @@ out_zero_chacha:
  * provided by this function is okay, the function wait_for_random_bytes()
  * should be called and return 0 at least once at any point prior.
  */
-struct batched_entropy {
-       union {
-               /*
-                * We make this 1.5x a ChaCha block, so that we get the
-                * remaining 32 bytes from fast key erasure, plus one full
-                * block from the detached ChaCha state. We can increase
-                * the size of this later if needed so long as we keep the
-                * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.
-                */
-               u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))];
-               u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
-       };
-       local_lock_t lock;
-       unsigned long generation;
-       unsigned int position;
-};
-
 
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-       .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock),
-       .position = UINT_MAX
-};
-
-u64 get_random_u64(void)
-{
-       u64 ret;
-       unsigned long flags;
-       struct batched_entropy *batch;
-       static void *previous;
-       unsigned long next_gen;
-
-       warn_unseeded_randomness(&previous);
-
-       local_lock_irqsave(&batched_entropy_u64.lock, flags);
-       batch = raw_cpu_ptr(&batched_entropy_u64);
-
-       next_gen = READ_ONCE(base_crng.generation);
-       if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
-           next_gen != batch->generation) {
-               _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
-               batch->position = 0;
-               batch->generation = next_gen;
-       }
-
-       ret = batch->entropy_u64[batch->position];
-       batch->entropy_u64[batch->position] = 0;
-       ++batch->position;
-       local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
-       return ret;
-}
-EXPORT_SYMBOL(get_random_u64);
-
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-       .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock),
-       .position = UINT_MAX
-};
-
-u32 get_random_u32(void)
-{
-       u32 ret;
-       unsigned long flags;
-       struct batched_entropy *batch;
-       static void *previous;
-       unsigned long next_gen;
-
-       warn_unseeded_randomness(&previous);
-
-       local_lock_irqsave(&batched_entropy_u32.lock, flags);
-       batch = raw_cpu_ptr(&batched_entropy_u32);
-
-       next_gen = READ_ONCE(base_crng.generation);
-       if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
-           next_gen != batch->generation) {
-               _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
-               batch->position = 0;
-               batch->generation = next_gen;
-       }
-
-       ret = batch->entropy_u32[batch->position];
-       batch->entropy_u32[batch->position] = 0;
-       ++batch->position;
-       local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
-       return ret;
-}
-EXPORT_SYMBOL(get_random_u32);
+#define DEFINE_BATCHED_ENTROPY(type)                                           \
+struct batch_ ##type {                                                         \
+       /*                                                                      \
+        * We make this 1.5x a ChaCha block, so that we get the                 \
+        * remaining 32 bytes from fast key erasure, plus one full              \
+        * block from the detached ChaCha state. We can increase                \
+        * the size of this later if needed so long as we keep the              \
+        * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.               \
+        */                                                                     \
+       type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))];               \
+       local_lock_t lock;                                                      \
+       unsigned long generation;                                               \
+       unsigned int position;                                                  \
+};                                                                             \
+                                                                               \
+static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = {       \
+       .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock),                  \
+       .position = UINT_MAX                                                    \
+};                                                                             \
+                                                                               \
+type get_random_ ##type(void)                                                  \
+{                                                                              \
+       type ret;                                                               \
+       unsigned long flags;                                                    \
+       struct batch_ ##type *batch;                                            \
+       unsigned long next_gen;                                                 \
+                                                                               \
+       warn_unseeded_randomness();                                             \
+                                                                               \
+       if  (!crng_ready()) {                                                   \
+               _get_random_bytes(&ret, sizeof(ret));                           \
+               return ret;                                                     \
+       }                                                                       \
+                                                                               \
+       local_lock_irqsave(&batched_entropy_ ##type.lock, flags);               \
+       batch = raw_cpu_ptr(&batched_entropy_##type);                           \
+                                                                               \
+       next_gen = READ_ONCE(base_crng.generation);                             \
+       if (batch->position >= ARRAY_SIZE(batch->entropy) ||                    \
+           next_gen != batch->generation) {                                    \
+               _get_random_bytes(batch->entropy, sizeof(batch->entropy));      \
+               batch->position = 0;                                            \
+               batch->generation = next_gen;                                   \
+       }                                                                       \
+                                                                               \
+       ret = batch->entropy[batch->position];                                  \
+       batch->entropy[batch->position] = 0;                                    \
+       ++batch->position;                                                      \
+       local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags);          \
+       return ret;                                                             \
+}                                                                              \
+EXPORT_SYMBOL(get_random_ ##type);
+
+DEFINE_BATCHED_ENTROPY(u64)
+DEFINE_BATCHED_ENTROPY(u32)
 
 #ifdef CONFIG_SMP
 /*
  * This function is called when the CPU is coming up, with entry
  * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
  */
-int random_prepare_cpu(unsigned int cpu)
+int __cold random_prepare_cpu(unsigned int cpu)
 {
        /*
         * When the cpu comes back online, immediately invalidate both
@@ -696,65 +528,6 @@ int random_prepare_cpu(unsigned int cpu)
 }
 #endif
 
-/**
- * randomize_page - Generate a random, page aligned address
- * @start:     The smallest acceptable address the caller will take.
- * @range:     The size of the area, starting at @start, within which the
- *             random address must fall.
- *
- * If @start + @range would overflow, @range is capped.
- *
- * NOTE: Historical use of randomize_range, which this replaces, presumed that
- * @start was already page aligned.  We now align it regardless.
- *
- * Return: A page aligned address within [start, start + range).  On error,
- * @start is returned.
- */
-unsigned long randomize_page(unsigned long start, unsigned long range)
-{
-       if (!PAGE_ALIGNED(start)) {
-               range -= PAGE_ALIGN(start) - start;
-               start = PAGE_ALIGN(start);
-       }
-
-       if (start > ULONG_MAX - range)
-               range = ULONG_MAX - start;
-
-       range >>= PAGE_SHIFT;
-
-       if (range == 0)
-               return start;
-
-       return start + (get_random_long() % range << PAGE_SHIFT);
-}
-
-/*
- * This function will use the architecture-specific hardware random
- * number generator if it is available. It is not recommended for
- * use. Use get_random_bytes() instead. It returns the number of
- * bytes filled in.
- */
-size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
-{
-       size_t left = nbytes;
-       u8 *p = buf;
-
-       while (left) {
-               unsigned long v;
-               size_t chunk = min_t(size_t, left, sizeof(unsigned long));
-
-               if (!arch_get_random_long(&v))
-                       break;
-
-               memcpy(p, &v, chunk);
-               p += chunk;
-               left -= chunk;
-       }
-
-       return nbytes - left;
-}
-EXPORT_SYMBOL(get_random_bytes_arch);
-
 
 /**********************************************************************
  *
@@ -762,33 +535,28 @@ EXPORT_SYMBOL(get_random_bytes_arch);
  *
  * Callers may add entropy via:
  *
- *     static void mix_pool_bytes(const void *in, size_t nbytes)
+ *     static void mix_pool_bytes(const void *buf, size_t len)
  *
  * After which, if added entropy should be credited:
  *
- *     static void credit_entropy_bits(size_t nbits)
+ *     static void credit_init_bits(size_t bits)
  *
- * Finally, extract entropy via these two, with the latter one
- * setting the entropy count to zero and extracting only if there
- * is POOL_MIN_BITS entropy credited prior or force is true:
+ * Finally, extract entropy via:
  *
- *     static void extract_entropy(void *buf, size_t nbytes)
- *     static bool drain_entropy(void *buf, size_t nbytes, bool force)
+ *     static void extract_entropy(void *buf, size_t len)
  *
  **********************************************************************/
 
 enum {
        POOL_BITS = BLAKE2S_HASH_SIZE * 8,
-       POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
+       POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */
+       POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */
 };
 
-/* For notifying userspace should write into /dev/random. */
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-
 static struct {
        struct blake2s_state hash;
        spinlock_t lock;
-       unsigned int entropy_count;
+       unsigned int init_bits;
 } input_pool = {
        .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
                    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
@@ -797,48 +565,30 @@ static struct {
        .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 };
 
-static void _mix_pool_bytes(const void *in, size_t nbytes)
+static void _mix_pool_bytes(const void *buf, size_t len)
 {
-       blake2s_update(&input_pool.hash, in, nbytes);
+       blake2s_update(&input_pool.hash, buf, len);
 }
 
 /*
- * This function adds bytes into the entropy "pool".  It does not
- * update the entropy estimate.  The caller should call
- * credit_entropy_bits if this is appropriate.
+ * This function adds bytes into the input pool. It does not
+ * update the initialization bit counter; the caller should call
+ * credit_init_bits if this is appropriate.
  */
-static void mix_pool_bytes(const void *in, size_t nbytes)
+static void mix_pool_bytes(const void *buf, size_t len)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&input_pool.lock, flags);
-       _mix_pool_bytes(in, nbytes);
+       _mix_pool_bytes(buf, len);
        spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 
-static void credit_entropy_bits(size_t nbits)
-{
-       unsigned int entropy_count, orig, add;
-
-       if (!nbits)
-               return;
-
-       add = min_t(size_t, nbits, POOL_BITS);
-
-       do {
-               orig = READ_ONCE(input_pool.entropy_count);
-               entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
-       } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
-
-       if (!crng_ready() && entropy_count >= POOL_MIN_BITS)
-               crng_reseed(false);
-}
-
 /*
  * This is an HKDF-like construction for using the hashed collected entropy
  * as a PRF key, that's then expanded block-by-block.
  */
-static void extract_entropy(void *buf, size_t nbytes)
+static void extract_entropy(void *buf, size_t len)
 {
        unsigned long flags;
        u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
@@ -867,12 +617,12 @@ static void extract_entropy(void *buf, size_t nbytes)
        spin_unlock_irqrestore(&input_pool.lock, flags);
        memzero_explicit(next_key, sizeof(next_key));
 
-       while (nbytes) {
-               i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
+       while (len) {
+               i = min_t(size_t, len, BLAKE2S_HASH_SIZE);
                /* output = HASHPRF(seed, RDSEED || ++counter) */
                ++block.counter;
                blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
-               nbytes -= i;
+               len -= i;
                buf += i;
        }
 
@@ -880,23 +630,42 @@ static void extract_entropy(void *buf, size_t nbytes)
        memzero_explicit(&block, sizeof(block));
 }
 
-/*
- * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force
- * is true, and then we set the entropy count to zero (but don't actually touch
- * any data). Only then can we extract a new key with extract_entropy().
- */
-static bool drain_entropy(void *buf, size_t nbytes, bool force)
+#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits)
+
+static void __cold _credit_init_bits(size_t bits)
 {
-       unsigned int entropy_count;
+       static struct execute_work set_ready;
+       unsigned int new, orig, add;
+       unsigned long flags;
+
+       if (!bits)
+               return;
+
+       add = min_t(size_t, bits, POOL_BITS);
+
        do {
-               entropy_count = READ_ONCE(input_pool.entropy_count);
-               if (!force && entropy_count < POOL_MIN_BITS)
-                       return false;
-       } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
-       extract_entropy(buf, nbytes);
-       wake_up_interruptible(&random_write_wait);
-       kill_fasync(&fasync, SIGIO, POLL_OUT);
-       return true;
+               orig = READ_ONCE(input_pool.init_bits);
+               new = min_t(unsigned int, POOL_BITS, orig + add);
+       } while (cmpxchg(&input_pool.init_bits, orig, new) != orig);
+
+       if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
+               crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */
+               execute_in_process_context(crng_set_ready, &set_ready);
+               wake_up_interruptible(&crng_init_wait);
+               kill_fasync(&fasync, SIGIO, POLL_IN);
+               pr_notice("crng init done\n");
+               if (urandom_warning.missed)
+                       pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
+                                 urandom_warning.missed);
+       } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) {
+               spin_lock_irqsave(&base_crng.lock, flags);
+               /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */
+               if (crng_init == CRNG_EMPTY) {
+                       extract_entropy(base_crng.key, sizeof(base_crng.key));
+                       crng_init = CRNG_EARLY;
+               }
+               spin_unlock_irqrestore(&base_crng.lock, flags);
+       }
 }
 
 
@@ -907,15 +676,13 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force)
  * The following exported functions are used for pushing entropy into
  * the above entropy accumulation routines:
  *
- *     void add_device_randomness(const void *buf, size_t size);
- *     void add_input_randomness(unsigned int type, unsigned int code,
- *                               unsigned int value);
- *     void add_disk_randomness(struct gendisk *disk);
- *     void add_hwgenerator_randomness(const void *buffer, size_t count,
- *                                     size_t entropy);
- *     void add_bootloader_randomness(const void *buf, size_t size);
- *     void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+ *     void add_device_randomness(const void *buf, size_t len);
+ *     void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy);
+ *     void add_bootloader_randomness(const void *buf, size_t len);
+ *     void add_vmfork_randomness(const void *unique_vm_id, size_t len);
  *     void add_interrupt_randomness(int irq);
+ *     void add_input_randomness(unsigned int type, unsigned int code, unsigned int value);
+ *     void add_disk_randomness(struct gendisk *disk);
  *
  * add_device_randomness() adds data to the input pool that
  * is likely to differ between two devices (or possibly even per boot).
@@ -925,26 +692,13 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force)
  * that might otherwise be identical and have very little entropy
  * available to them (particularly common in the embedded world).
  *
- * add_input_randomness() uses the input layer interrupt timing, as well
- * as the event type information from the hardware.
- *
- * add_disk_randomness() uses what amounts to the seek time of block
- * layer request events, on a per-disk_devt basis, as input to the
- * entropy pool. Note that high-speed solid state drives with very low
- * seek times do not make for good sources of entropy, as their seek
- * times are usually fairly consistent.
- *
- * The above two routines try to estimate how many bits of entropy
- * to credit. They do this by keeping track of the first and second
- * order deltas of the event timings.
- *
  * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
  * entropy as specified by the caller. If the entropy pool is full it will
  * block until more entropy is needed.
  *
- * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
- * add_device_randomness(), depending on whether or not the configuration
- * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
+ * add_bootloader_randomness() is called by bootloader drivers, such as EFI
+ * and device tree, and credits its input depending on whether or not the
+ * configuration option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
  *
  * add_vmfork_randomness() adds a unique (but not necessarily secret) ID
  * representing the current instance of a VM to the pool, without crediting,
@@ -955,8 +709,22 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force)
  * as inputs, it feeds the input pool roughly once a second or after 64
  * interrupts, crediting 1 bit of entropy for whichever comes first.
  *
+ * add_input_randomness() uses the input layer interrupt timing, as well
+ * as the event type information from the hardware.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
+ *
+ * The last two routines try to estimate how many bits of entropy
+ * to credit. They do this by keeping track of the first and second
+ * order deltas of the event timings.
+ *
  **********************************************************************/
 
+static bool used_arch_random;
 static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
 static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);
 static int __init parse_trust_cpu(char *arg)
@@ -970,51 +738,90 @@ static int __init parse_trust_bootloader(char *arg)
 early_param("random.trust_cpu", parse_trust_cpu);
 early_param("random.trust_bootloader", parse_trust_bootloader);
 
+static int random_pm_notification(struct notifier_block *nb, unsigned long action, void *data)
+{
+       unsigned long flags, entropy = random_get_entropy();
+
+       /*
+        * Encode a representation of how long the system has been suspended,
+        * in a way that is distinct from prior system suspends.
+        */
+       ktime_t stamps[] = { ktime_get(), ktime_get_boottime(), ktime_get_real() };
+
+       spin_lock_irqsave(&input_pool.lock, flags);
+       _mix_pool_bytes(&action, sizeof(action));
+       _mix_pool_bytes(stamps, sizeof(stamps));
+       _mix_pool_bytes(&entropy, sizeof(entropy));
+       spin_unlock_irqrestore(&input_pool.lock, flags);
+
+       if (crng_ready() && (action == PM_RESTORE_PREPARE ||
+           (action == PM_POST_SUSPEND &&
+            !IS_ENABLED(CONFIG_PM_AUTOSLEEP) && !IS_ENABLED(CONFIG_ANDROID)))) {
+               crng_reseed();
+               pr_notice("crng reseeded on system resumption\n");
+       }
+       return 0;
+}
+
+static struct notifier_block pm_notifier = { .notifier_call = random_pm_notification };
+
 /*
  * The first collection of entropy occurs at system boot while interrupts
- * are still turned off. Here we push in RDSEED, a timestamp, and utsname().
- * Depending on the above configuration knob, RDSEED may be considered
- * sufficient for initialization. Note that much earlier setup may already
- * have pushed entropy into the input pool by the time we get here.
+ * are still turned off. Here we push in latent entropy, RDSEED, a timestamp,
+ * utsname(), and the command line. Depending on the above configuration knob,
+ * RDSEED may be considered sufficient for initialization. Note that much
+ * earlier setup may already have pushed entropy into the input pool by the
+ * time we get here.
  */
-int __init rand_initialize(void)
+int __init random_init(const char *command_line)
 {
-       size_t i;
        ktime_t now = ktime_get_real();
-       bool arch_init = true;
-       unsigned long rv;
+       unsigned int i, arch_bytes;
+       unsigned long entropy;
 
 #if defined(LATENT_ENTROPY_PLUGIN)
        static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy;
        _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));
 #endif
 
-       for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) {
-               if (!arch_get_random_seed_long_early(&rv) &&
-                   !arch_get_random_long_early(&rv)) {
-                       rv = random_get_entropy();
-                       arch_init = false;
+       for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE;
+            i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) {
+               if (!arch_get_random_seed_long_early(&entropy) &&
+                   !arch_get_random_long_early(&entropy)) {
+                       entropy = random_get_entropy();
+                       arch_bytes -= sizeof(entropy);
                }
-               _mix_pool_bytes(&rv, sizeof(rv));
+               _mix_pool_bytes(&entropy, sizeof(entropy));
        }
        _mix_pool_bytes(&now, sizeof(now));
        _mix_pool_bytes(utsname(), sizeof(*(utsname())));
+       _mix_pool_bytes(command_line, strlen(command_line));
+       add_latent_entropy();
 
-       extract_entropy(base_crng.key, sizeof(base_crng.key));
-       ++base_crng.generation;
+       if (crng_ready())
+               crng_reseed();
+       else if (trust_cpu)
+               credit_init_bits(arch_bytes * 8);
+       used_arch_random = arch_bytes * 8 >= POOL_READY_BITS;
 
-       if (arch_init && trust_cpu && !crng_ready()) {
-               crng_init = 2;
-               pr_notice("crng init done (trusting CPU's manufacturer)\n");
-       }
+       WARN_ON(register_pm_notifier(&pm_notifier));
 
-       if (ratelimit_disable) {
-               urandom_warning.interval = 0;
-               unseeded_warning.interval = 0;
-       }
+       WARN(!random_get_entropy(), "Missing cycle counter and fallback timer; RNG "
+                                   "entropy collection will consequently suffer.");
        return 0;
 }
 
+/*
+ * Returns whether arch randomness has been mixed into the initial
+ * state of the RNG, regardless of whether or not that randomness
+ * was credited. Knowing this is only good for a very limited set
+ * of uses, such as early init printk pointer obfuscation.
+ */
+bool rng_has_arch_random(void)
+{
+       return used_arch_random;
+}
+
 /*
  * Add device- or boot-specific data to the input pool to help
  * initialize it.
@@ -1023,164 +830,46 @@ int __init rand_initialize(void)
  * the entropy pool having similar initial state across largely
  * identical devices.
  */
-void add_device_randomness(const void *buf, size_t size)
+void add_device_randomness(const void *buf, size_t len)
 {
-       unsigned long cycles = random_get_entropy();
-       unsigned long flags, now = jiffies;
-
-       if (crng_init == 0 && size)
-               crng_pre_init_inject(buf, size, false);
+       unsigned long entropy = random_get_entropy();
+       unsigned long flags;
 
        spin_lock_irqsave(&input_pool.lock, flags);
-       _mix_pool_bytes(&cycles, sizeof(cycles));
-       _mix_pool_bytes(&now, sizeof(now));
-       _mix_pool_bytes(buf, size);
+       _mix_pool_bytes(&entropy, sizeof(entropy));
+       _mix_pool_bytes(buf, len);
        spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 EXPORT_SYMBOL(add_device_randomness);
 
-/* There is one of these per entropy source */
-struct timer_rand_state {
-       unsigned long last_time;
-       long last_delta, last_delta2;
-};
-
-/*
- * This function adds entropy to the entropy "pool" by using timing
- * delays.  It uses the timer_rand_state structure to make an estimate
- * of how many bits of entropy this call has added to the pool.
- *
- * The number "num" is also added to the pool - it should somehow describe
- * the type of event which just happened.  This is currently 0-255 for
- * keyboard scan codes, and 256 upwards for interrupts.
- */
-static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
-{
-       unsigned long cycles = random_get_entropy(), now = jiffies, flags;
-       long delta, delta2, delta3;
-
-       spin_lock_irqsave(&input_pool.lock, flags);
-       _mix_pool_bytes(&cycles, sizeof(cycles));
-       _mix_pool_bytes(&now, sizeof(now));
-       _mix_pool_bytes(&num, sizeof(num));
-       spin_unlock_irqrestore(&input_pool.lock, flags);
-
-       /*
-        * Calculate number of bits of randomness we probably added.
-        * We take into account the first, second and third-order deltas
-        * in order to make our estimate.
-        */
-       delta = now - READ_ONCE(state->last_time);
-       WRITE_ONCE(state->last_time, now);
-
-       delta2 = delta - READ_ONCE(state->last_delta);
-       WRITE_ONCE(state->last_delta, delta);
-
-       delta3 = delta2 - READ_ONCE(state->last_delta2);
-       WRITE_ONCE(state->last_delta2, delta2);
-
-       if (delta < 0)
-               delta = -delta;
-       if (delta2 < 0)
-               delta2 = -delta2;
-       if (delta3 < 0)
-               delta3 = -delta3;
-       if (delta > delta2)
-               delta = delta2;
-       if (delta > delta3)
-               delta = delta3;
-
-       /*
-        * delta is now minimum absolute delta.
-        * Round down by 1 bit on general principles,
-        * and limit entropy estimate to 12 bits.
-        */
-       credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11));
-}
-
-void add_input_randomness(unsigned int type, unsigned int code,
-                         unsigned int value)
-{
-       static unsigned char last_value;
-       static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
-
-       /* Ignore autorepeat and the like. */
-       if (value == last_value)
-               return;
-
-       last_value = value;
-       add_timer_randomness(&input_timer_state,
-                            (type << 4) ^ code ^ (code >> 4) ^ value);
-}
-EXPORT_SYMBOL_GPL(add_input_randomness);
-
-#ifdef CONFIG_BLOCK
-void add_disk_randomness(struct gendisk *disk)
-{
-       if (!disk || !disk->random)
-               return;
-       /* First major is 1, so we get >= 0x200 here. */
-       add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
-}
-EXPORT_SYMBOL_GPL(add_disk_randomness);
-
-void rand_initialize_disk(struct gendisk *disk)
-{
-       struct timer_rand_state *state;
-
-       /*
-        * If kzalloc returns null, we just won't use that entropy
-        * source.
-        */
-       state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-       if (state) {
-               state->last_time = INITIAL_JIFFIES;
-               disk->random = state;
-       }
-}
-#endif
-
 /*
  * Interface for in-kernel drivers of true hardware RNGs.
  * Those devices may produce endless random bits and will be throttled
  * when our pool is full.
  */
-void add_hwgenerator_randomness(const void *buffer, size_t count,
-                               size_t entropy)
+void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy)
 {
-       if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) {
-               crng_pre_init_inject(buffer, count, true);
-               mix_pool_bytes(buffer, count);
-               return;
-       }
+       mix_pool_bytes(buf, len);
+       credit_init_bits(entropy);
 
        /*
-        * Throttle writing if we're above the trickle threshold.
-        * We'll be woken up again once below POOL_MIN_BITS, when
-        * the calling thread is about to terminate, or once
-        * CRNG_RESEED_INTERVAL has elapsed.
+        * Throttle writing to once every CRNG_RESEED_INTERVAL, unless
+        * we're not yet initialized.
         */
-       wait_event_interruptible_timeout(random_write_wait,
-                       !system_wq || kthread_should_stop() ||
-                       input_pool.entropy_count < POOL_MIN_BITS,
-                       CRNG_RESEED_INTERVAL);
-       mix_pool_bytes(buffer, count);
-       credit_entropy_bits(entropy);
+       if (!kthread_should_stop() && crng_ready())
+               schedule_timeout_interruptible(CRNG_RESEED_INTERVAL);
 }
 EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
 
 /*
- * Handle random seed passed by bootloader.
- * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
- * it would be regarded as device data.
- * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+ * Handle random seed passed by bootloader, and credit it if
+ * CONFIG_RANDOM_TRUST_BOOTLOADER is set.
  */
-void add_bootloader_randomness(const void *buf, size_t size)
+void __cold add_bootloader_randomness(const void *buf, size_t len)
 {
+       mix_pool_bytes(buf, len);
        if (trust_bootloader)
-               add_hwgenerator_randomness(buf, size, size * 8);
-       else
-               add_device_randomness(buf, size);
+               credit_init_bits(len * 8);
 }
 EXPORT_SYMBOL_GPL(add_bootloader_randomness);
 
@@ -1192,11 +881,11 @@ static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
  * don't credit it, but we do immediately force a reseed after so
  * that it's used by the crng posthaste.
  */
-void add_vmfork_randomness(const void *unique_vm_id, size_t size)
+void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len)
 {
-       add_device_randomness(unique_vm_id, size);
+       add_device_randomness(unique_vm_id, len);
        if (crng_ready()) {
-               crng_reseed(true);
+               crng_reseed();
                pr_notice("crng reseeded due to virtual machine fork\n");
        }
        blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
@@ -1205,13 +894,13 @@ void add_vmfork_randomness(const void *unique_vm_id, size_t size)
 EXPORT_SYMBOL_GPL(add_vmfork_randomness);
 #endif
 
-int register_random_vmfork_notifier(struct notifier_block *nb)
+int __cold register_random_vmfork_notifier(struct notifier_block *nb)
 {
        return blocking_notifier_chain_register(&vmfork_chain, nb);
 }
 EXPORT_SYMBOL_GPL(register_random_vmfork_notifier);
 
-int unregister_random_vmfork_notifier(struct notifier_block *nb)
+int __cold unregister_random_vmfork_notifier(struct notifier_block *nb)
 {
        return blocking_notifier_chain_unregister(&vmfork_chain, nb);
 }
@@ -1223,17 +912,15 @@ struct fast_pool {
        unsigned long pool[4];
        unsigned long last;
        unsigned int count;
-       u16 reg_idx;
 };
 
 static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
 #ifdef CONFIG_64BIT
-       /* SipHash constants */
-       .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL,
-                 0x6c7967656e657261UL, 0x7465646279746573UL }
+#define FASTMIX_PERM SIPHASH_PERMUTATION
+       .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 }
 #else
-       /* HalfSipHash constants */
-       .pool = { 0, 0, 0x6c796765U, 0x74656462U }
+#define FASTMIX_PERM HSIPHASH_PERMUTATION
+       .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 }
 #endif
 };
 
@@ -1241,27 +928,16 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
  * This is [Half]SipHash-1-x, starting from an empty key. Because
  * the key is fixed, it assumes that its inputs are non-malicious,
  * and therefore this has no security on its own. s represents the
- * 128 or 256-bit SipHash state, while v represents a 128-bit input.
+ * four-word SipHash state, while v represents a two-word input.
  */
-static void fast_mix(unsigned long s[4], const unsigned long *v)
+static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2)
 {
-       size_t i;
-
-       for (i = 0; i < 16 / sizeof(long); ++i) {
-               s[3] ^= v[i];
-#ifdef CONFIG_64BIT
-               s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32);
-               s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2];
-               s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0];
-               s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32);
-#else
-               s[0] += s[1]; s[1] = rol32(s[1],  5); s[1] ^= s[0]; s[0] = rol32(s[0], 16);
-               s[2] += s[3]; s[3] = rol32(s[3],  8); s[3] ^= s[2];
-               s[0] += s[3]; s[3] = rol32(s[3],  7); s[3] ^= s[0];
-               s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16);
-#endif
-               s[0] ^= v[i];
-       }
+       s[3] ^= v1;
+       FASTMIX_PERM(s[0], s[1], s[2], s[3]);
+       s[0] ^= v1;
+       s[3] ^= v2;
+       FASTMIX_PERM(s[0], s[1], s[2], s[3]);
+       s[0] ^= v2;
 }
 
 #ifdef CONFIG_SMP
@@ -1269,7 +945,7 @@ static void fast_mix(unsigned long s[4], const unsigned long *v)
  * This function is called when the CPU has just come online, with
  * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
  */
-int random_online_cpu(unsigned int cpu)
+int __cold random_online_cpu(unsigned int cpu)
 {
        /*
         * During CPU shutdown and before CPU onlining, add_interrupt_
@@ -1287,33 +963,18 @@ int random_online_cpu(unsigned int cpu)
 }
 #endif
 
-static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs)
-{
-       unsigned long *ptr = (unsigned long *)regs;
-       unsigned int idx;
-
-       if (regs == NULL)
-               return 0;
-       idx = READ_ONCE(f->reg_idx);
-       if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long))
-               idx = 0;
-       ptr += idx++;
-       WRITE_ONCE(f->reg_idx, idx);
-       return *ptr;
-}
-
 static void mix_interrupt_randomness(struct work_struct *work)
 {
        struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
        /*
-        * The size of the copied stack pool is explicitly 16 bytes so that we
-        * tax mix_pool_byte()'s compression function the same amount on all
-        * platforms. This means on 64-bit we copy half the pool into this,
-        * while on 32-bit we copy all of it. The entropy is supposed to be
-        * sufficiently dispersed between bits that in the sponge-like
-        * half case, on average we don't wind up "losing" some.
+        * The size of the copied stack pool is explicitly 2 longs so that we
+        * only ever ingest half of the siphash output each time, retaining
+        * the other half as the next "key" that carries over. The entropy is
+        * supposed to be sufficiently dispersed between bits so on average
+        * we don't wind up "losing" some.
         */
-       u8 pool[16];
+       unsigned long pool[2];
+       unsigned int count;
 
        /* Check to see if we're running on the wrong CPU due to hotplug. */
        local_irq_disable();
@@ -1327,17 +988,13 @@ static void mix_interrupt_randomness(struct work_struct *work)
         * consistent view, before we reenable irqs again.
         */
        memcpy(pool, fast_pool->pool, sizeof(pool));
+       count = fast_pool->count;
        fast_pool->count = 0;
        fast_pool->last = jiffies;
        local_irq_enable();
 
-       if (unlikely(crng_init == 0)) {
-               crng_pre_init_inject(pool, sizeof(pool), true);
-               mix_pool_bytes(pool, sizeof(pool));
-       } else {
-               mix_pool_bytes(pool, sizeof(pool));
-               credit_entropy_bits(1);
-       }
+       mix_pool_bytes(pool, sizeof(pool));
+       credit_init_bits(max(1u, (count & U16_MAX) / 64));
 
        memzero_explicit(pool, sizeof(pool));
 }
@@ -1345,37 +1002,19 @@ static void mix_interrupt_randomness(struct work_struct *work)
 void add_interrupt_randomness(int irq)
 {
        enum { MIX_INFLIGHT = 1U << 31 };
-       unsigned long cycles = random_get_entropy(), now = jiffies;
+       unsigned long entropy = random_get_entropy();
        struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
        struct pt_regs *regs = get_irq_regs();
        unsigned int new_count;
-       union {
-               u32 u32[4];
-               u64 u64[2];
-               unsigned long longs[16 / sizeof(long)];
-       } irq_data;
-
-       if (cycles == 0)
-               cycles = get_reg(fast_pool, regs);
-
-       if (sizeof(unsigned long) == 8) {
-               irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq;
-               irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_;
-       } else {
-               irq_data.u32[0] = cycles ^ irq;
-               irq_data.u32[1] = now;
-               irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_;
-               irq_data.u32[3] = get_reg(fast_pool, regs);
-       }
 
-       fast_mix(fast_pool->pool, irq_data.longs);
+       fast_mix(fast_pool->pool, entropy,
+                (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq));
        new_count = ++fast_pool->count;
 
        if (new_count & MIX_INFLIGHT)
                return;
 
-       if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) ||
-                              unlikely(crng_init == 0)))
+       if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ))
                return;
 
        if (unlikely(!fast_pool->mix.func))
@@ -1385,6 +1024,132 @@ void add_interrupt_randomness(int irq)
 }
 EXPORT_SYMBOL_GPL(add_interrupt_randomness);
 
+/* There is one of these per entropy source */
+struct timer_rand_state {
+       unsigned long last_time;
+       long last_delta, last_delta2;
+};
+
+/*
+ * This function adds entropy to the entropy "pool" by using timing
+ * delays. It uses the timer_rand_state structure to make an estimate
+ * of how many bits of entropy this call has added to the pool. The
+ * value "num" is also added to the pool; it should somehow describe
+ * the type of event that just happened.
+ */
+static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
+{
+       unsigned long entropy = random_get_entropy(), now = jiffies, flags;
+       long delta, delta2, delta3;
+       unsigned int bits;
+
+       /*
+        * If we're in a hard IRQ, add_interrupt_randomness() will be called
+        * sometime after, so mix into the fast pool.
+        */
+       if (in_hardirq()) {
+               fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num);
+       } else {
+               spin_lock_irqsave(&input_pool.lock, flags);
+               _mix_pool_bytes(&entropy, sizeof(entropy));
+               _mix_pool_bytes(&num, sizeof(num));
+               spin_unlock_irqrestore(&input_pool.lock, flags);
+       }
+
+       if (crng_ready())
+               return;
+
+       /*
+        * Calculate number of bits of randomness we probably added.
+        * We take into account the first, second and third-order deltas
+        * in order to make our estimate.
+        */
+       delta = now - READ_ONCE(state->last_time);
+       WRITE_ONCE(state->last_time, now);
+
+       delta2 = delta - READ_ONCE(state->last_delta);
+       WRITE_ONCE(state->last_delta, delta);
+
+       delta3 = delta2 - READ_ONCE(state->last_delta2);
+       WRITE_ONCE(state->last_delta2, delta2);
+
+       if (delta < 0)
+               delta = -delta;
+       if (delta2 < 0)
+               delta2 = -delta2;
+       if (delta3 < 0)
+               delta3 = -delta3;
+       if (delta > delta2)
+               delta = delta2;
+       if (delta > delta3)
+               delta = delta3;
+
+       /*
+        * delta is now minimum absolute delta. Round down by 1 bit
+        * on general principles, and limit entropy estimate to 11 bits.
+        */
+       bits = min(fls(delta >> 1), 11);
+
+       /*
+        * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness()
+        * will run after this, which uses a different crediting scheme of 1 bit
+        * per every 64 interrupts. In order to let that function do accounting
+        * close to the one in this function, we credit a full 64/64 bit per bit,
+        * and then subtract one to account for the extra one added.
+        */
+       if (in_hardirq())
+               this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1;
+       else
+               _credit_init_bits(bits);
+}
+
+void add_input_randomness(unsigned int type, unsigned int code, unsigned int value)
+{
+       static unsigned char last_value;
+       static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
+
+       /* Ignore autorepeat and the like. */
+       if (value == last_value)
+               return;
+
+       last_value = value;
+       add_timer_randomness(&input_timer_state,
+                            (type << 4) ^ code ^ (code >> 4) ^ value);
+}
+EXPORT_SYMBOL_GPL(add_input_randomness);
+
+#ifdef CONFIG_BLOCK
+void add_disk_randomness(struct gendisk *disk)
+{
+       if (!disk || !disk->random)
+               return;
+       /* First major is 1, so we get >= 0x200 here. */
+       add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
+}
+EXPORT_SYMBOL_GPL(add_disk_randomness);
+
+void __cold rand_initialize_disk(struct gendisk *disk)
+{
+       struct timer_rand_state *state;
+
+       /*
+        * If kzalloc returns null, we just won't use that entropy
+        * source.
+        */
+       state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+       if (state) {
+               state->last_time = INITIAL_JIFFIES;
+               disk->random = state;
+       }
+}
+#endif
+
+struct entropy_timer_state {
+       unsigned long entropy;
+       struct timer_list timer;
+       unsigned int samples, samples_per_bit;
+};
+
 /*
  * Each time the timer fires, we expect that we got an unpredictable
  * jump in the cycle counter. Even if the timer is running on another
@@ -1398,40 +1163,50 @@ EXPORT_SYMBOL_GPL(add_interrupt_randomness);
  *
  * So the re-arming always happens in the entropy loop itself.
  */
-static void entropy_timer(struct timer_list *t)
+static void __cold entropy_timer(struct timer_list *timer)
 {
-       credit_entropy_bits(1);
+       struct entropy_timer_state *state = container_of(timer, struct entropy_timer_state, timer);
+
+       if (++state->samples == state->samples_per_bit) {
+               credit_init_bits(1);
+               state->samples = 0;
+       }
 }
 
 /*
  * If we have an actual cycle counter, see if we can
  * generate enough entropy with timing noise
  */
-static void try_to_generate_entropy(void)
+static void __cold try_to_generate_entropy(void)
 {
-       struct {
-               unsigned long cycles;
-               struct timer_list timer;
-       } stack;
-
-       stack.cycles = random_get_entropy();
-
-       /* Slow counter - or none. Don't even bother */
-       if (stack.cycles == random_get_entropy())
+       enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = 32 };
+       struct entropy_timer_state stack;
+       unsigned int i, num_different = 0;
+       unsigned long last = random_get_entropy();
+
+       for (i = 0; i < NUM_TRIAL_SAMPLES - 1; ++i) {
+               stack.entropy = random_get_entropy();
+               if (stack.entropy != last)
+                       ++num_different;
+               last = stack.entropy;
+       }
+       stack.samples_per_bit = DIV_ROUND_UP(NUM_TRIAL_SAMPLES, num_different + 1);
+       if (stack.samples_per_bit > MAX_SAMPLES_PER_BIT)
                return;
 
+       stack.samples = 0;
        timer_setup_on_stack(&stack.timer, entropy_timer, 0);
        while (!crng_ready() && !signal_pending(current)) {
                if (!timer_pending(&stack.timer))
                        mod_timer(&stack.timer, jiffies + 1);
-               mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
+               mix_pool_bytes(&stack.entropy, sizeof(stack.entropy));
                schedule();
-               stack.cycles = random_get_entropy();
+               stack.entropy = random_get_entropy();
        }
 
        del_timer_sync(&stack.timer);
        destroy_timer_on_stack(&stack.timer);
-       mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
+       mix_pool_bytes(&stack.entropy, sizeof(stack.entropy));
 }
 
 
@@ -1463,9 +1238,12 @@ static void try_to_generate_entropy(void)
  *
  **********************************************************************/
 
-SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
-               flags)
+SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags)
 {
+       struct iov_iter iter;
+       struct iovec iov;
+       int ret;
+
        if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
                return -EINVAL;
 
@@ -1476,72 +1254,60 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
        if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
                return -EINVAL;
 
-       if (count > INT_MAX)
-               count = INT_MAX;
-
-       if (!(flags & GRND_INSECURE) && !crng_ready()) {
-               int ret;
-
+       if (!crng_ready() && !(flags & GRND_INSECURE)) {
                if (flags & GRND_NONBLOCK)
                        return -EAGAIN;
                ret = wait_for_random_bytes();
                if (unlikely(ret))
                        return ret;
        }
-       return get_random_bytes_user(buf, count);
+
+       ret = import_single_range(READ, ubuf, len, &iov, &iter);
+       if (unlikely(ret))
+               return ret;
+       return get_random_bytes_user(&iter);
 }
 
 static __poll_t random_poll(struct file *file, poll_table *wait)
 {
-       __poll_t mask;
-
        poll_wait(file, &crng_init_wait, wait);
-       poll_wait(file, &random_write_wait, wait);
-       mask = 0;
-       if (crng_ready())
-               mask |= EPOLLIN | EPOLLRDNORM;
-       if (input_pool.entropy_count < POOL_MIN_BITS)
-               mask |= EPOLLOUT | EPOLLWRNORM;
-       return mask;
+       return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM;
 }
 
-static int write_pool(const char __user *ubuf, size_t count)
+static ssize_t write_pool_user(struct iov_iter *iter)
 {
-       size_t len;
-       int ret = 0;
        u8 block[BLAKE2S_BLOCK_SIZE];
+       ssize_t ret = 0;
+       size_t copied;
+
+       if (unlikely(!iov_iter_count(iter)))
+               return 0;
+
+       for (;;) {
+               copied = copy_from_iter(block, sizeof(block), iter);
+               ret += copied;
+               mix_pool_bytes(block, copied);
+               if (!iov_iter_count(iter) || copied != sizeof(block))
+                       break;
 
-       while (count) {
-               len = min(count, sizeof(block));
-               if (copy_from_user(block, ubuf, len)) {
-                       ret = -EFAULT;
-                       goto out;
+               BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
+               if (ret % PAGE_SIZE == 0) {
+                       if (signal_pending(current))
+                               break;
+                       cond_resched();
                }
-               count -= len;
-               ubuf += len;
-               mix_pool_bytes(block, len);
-               cond_resched();
        }
 
-out:
        memzero_explicit(block, sizeof(block));
-       return ret;
+       return ret ? ret : -EFAULT;
 }
 
-static ssize_t random_write(struct file *file, const char __user *buffer,
-                           size_t count, loff_t *ppos)
+static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter)
 {
-       int ret;
-
-       ret = write_pool(buffer, count);
-       if (ret)
-               return ret;
-
-       return (ssize_t)count;
+       return write_pool_user(iter);
 }
 
-static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
-                           loff_t *ppos)
+static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
 {
        static int maxwarn = 10;
 
@@ -1552,37 +1318,38 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
        if (!crng_ready())
                try_to_generate_entropy();
 
-       if (!crng_ready() && maxwarn > 0) {
-               maxwarn--;
-               if (__ratelimit(&urandom_warning))
-                       pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
-                                 current->comm, nbytes);
+       if (!crng_ready()) {
+               if (!ratelimit_disable && maxwarn <= 0)
+                       ++urandom_warning.missed;
+               else if (ratelimit_disable || __ratelimit(&urandom_warning)) {
+                       --maxwarn;
+                       pr_notice("%s: uninitialized urandom read (%zu bytes read)\n",
+                                 current->comm, iov_iter_count(iter));
+               }
        }
 
-       return get_random_bytes_user(buf, nbytes);
+       return get_random_bytes_user(iter);
 }
 
-static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
-                          loff_t *ppos)
+static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
 {
        int ret;
 
        ret = wait_for_random_bytes();
        if (ret != 0)
                return ret;
-       return get_random_bytes_user(buf, nbytes);
+       return get_random_bytes_user(iter);
 }
 
 static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
-       int size, ent_count;
        int __user *p = (int __user *)arg;
-       int retval;
+       int ent_count;
 
        switch (cmd) {
        case RNDGETENTCNT:
                /* Inherently racy, no point locking. */
-               if (put_user(input_pool.entropy_count, p))
+               if (put_user(input_pool.init_bits, p))
                        return -EFAULT;
                return 0;
        case RNDADDTOENTCNT:
@@ -1592,41 +1359,46 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
                        return -EFAULT;
                if (ent_count < 0)
                        return -EINVAL;
-               credit_entropy_bits(ent_count);
+               credit_init_bits(ent_count);
                return 0;
-       case RNDADDENTROPY:
+       case RNDADDENTROPY: {
+               struct iov_iter iter;
+               struct iovec iov;
+               ssize_t ret;
+               int len;
+
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (get_user(ent_count, p++))
                        return -EFAULT;
                if (ent_count < 0)
                        return -EINVAL;
-               if (get_user(size, p++))
+               if (get_user(len, p++))
                        return -EFAULT;
-               retval = write_pool((const char __user *)p, size);
-               if (retval < 0)
-                       return retval;
-               credit_entropy_bits(ent_count);
+               ret = import_single_range(WRITE, p, len, &iov, &iter);
+               if (unlikely(ret))
+                       return ret;
+               ret = write_pool_user(&iter);
+               if (unlikely(ret < 0))
+                       return ret;
+               /* Since we're crediting, enforce that it was all written into the pool. */
+               if (unlikely(ret != len))
+                       return -EFAULT;
+               credit_init_bits(ent_count);
                return 0;
+       }
        case RNDZAPENTCNT:
        case RNDCLEARPOOL:
-               /*
-                * Clear the entropy pool counters. We no longer clear
-                * the entropy pool, as that's silly.
-                */
+               /* No longer has any effect. */
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
-               if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) {
-                       wake_up_interruptible(&random_write_wait);
-                       kill_fasync(&fasync, SIGIO, POLL_OUT);
-               }
                return 0;
        case RNDRESEEDCRNG:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (!crng_ready())
                        return -ENODATA;
-               crng_reseed(false);
+               crng_reseed();
                return 0;
        default:
                return -EINVAL;
@@ -1639,22 +1411,26 @@ static int random_fasync(int fd, struct file *filp, int on)
 }
 
 const struct file_operations random_fops = {
-       .read = random_read,
-       .write = random_write,
+       .read_iter = random_read_iter,
+       .write_iter = random_write_iter,
        .poll = random_poll,
        .unlocked_ioctl = random_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .fasync = random_fasync,
        .llseek = noop_llseek,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
 };
 
 const struct file_operations urandom_fops = {
-       .read = urandom_read,
-       .write = random_write,
+       .read_iter = urandom_read_iter,
+       .write_iter = random_write_iter,
        .unlocked_ioctl = random_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .fasync = random_fasync,
        .llseek = noop_llseek,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
 };
 
 
@@ -1678,7 +1454,7 @@ const struct file_operations urandom_fops = {
  *
  * - write_wakeup_threshold - the amount of entropy in the input pool
  *   below which write polls to /dev/random will unblock, requesting
- *   more entropy, tied to the POOL_MIN_BITS constant. It is writable
+ *   more entropy, tied to the POOL_READY_BITS constant. It is writable
  *   to avoid breaking old userspaces, but writing to it does not
  *   change any behavior of the RNG.
  *
@@ -1693,7 +1469,7 @@ const struct file_operations urandom_fops = {
 #include <linux/sysctl.h>
 
 static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
-static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS;
+static int sysctl_random_write_wakeup_bits = POOL_READY_BITS;
 static int sysctl_poolsize = POOL_BITS;
 static u8 sysctl_bootid[UUID_SIZE];
 
@@ -1702,7 +1478,7 @@ static u8 sysctl_bootid[UUID_SIZE];
  * UUID. The difference is in whether table->data is NULL; if it is,
  * then a new UUID is generated and returned to the user.
  */
-static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
+static int proc_do_uuid(struct ctl_table *table, int write, void *buf,
                        size_t *lenp, loff_t *ppos)
 {
        u8 tmp_uuid[UUID_SIZE], *uuid;
@@ -1729,14 +1505,14 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
        }
 
        snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
-       return proc_dostring(&fake_table, 0, buffer, lenp, ppos);
+       return proc_dostring(&fake_table, 0, buf, lenp, ppos);
 }
 
 /* The same as proc_dointvec, but writes don't change anything. */
-static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer,
+static int proc_do_rointvec(struct ctl_table *table, int write, void *buf,
                            size_t *lenp, loff_t *ppos)
 {
-       return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos);
+       return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos);
 }
 
 static struct ctl_table random_table[] = {
@@ -1749,7 +1525,7 @@ static struct ctl_table random_table[] = {
        },
        {
                .procname       = "entropy_avail",
-               .data           = &input_pool.entropy_count,
+               .data           = &input_pool.init_bits,
                .maxlen         = sizeof(int),
                .mode           = 0444,
                .proc_handler   = proc_dointvec,
@@ -1783,8 +1559,8 @@ static struct ctl_table random_table[] = {
 };
 
 /*
- * rand_initialize() is called before sysctl_init(),
- * so we cannot call register_sysctl_init() in rand_initialize()
+ * random_init() is called before sysctl_init(),
+ * so we cannot call register_sysctl_init() in random_init()
  */
 static int __init random_sysctls_init(void)
 {
index 4704fa553098b5dacdc04b6097892c91f779162b..c1eb5d223839579e087a27774bf3adb9f2b45f21 100644 (file)
@@ -400,7 +400,16 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,  u32 *value,
        if (!rc) {
                out = (struct tpm2_get_cap_out *)
                        &buf.data[TPM_HEADER_SIZE];
-               *value = be32_to_cpu(out->value);
+               /*
+                * To prevent failing boot up of some systems, Infineon TPM2.0
+                * returns SUCCESS on TPM2_Startup in field upgrade mode. Also
+                * the TPM2_Getcapability command returns a zero length list
+                * in field upgrade mode.
+                */
+               if (be32_to_cpu(out->property_cnt) > 0)
+                       *value = be32_to_cpu(out->value);
+               else
+                       rc = -ENODATA;
        }
        tpm_buf_destroy(&buf);
        return rc;
@@ -745,7 +754,11 @@ int tpm2_auto_startup(struct tpm_chip *chip)
        rc = tpm2_get_cc_attrs_tbl(chip);
 
 out:
-       if (rc == TPM2_RC_UPGRADE) {
+       /*
+        * Infineon TPM in field upgrade mode will return no data for the number
+        * of supported commands.
+        */
+       if (rc == TPM2_RC_UPGRADE || rc == -ENODATA) {
                dev_info(&chip->dev, "TPM in field upgrade mode, requires firmware upgrade\n");
                chip->flags |= TPM_CHIP_FLAG_FIRMWARE_UPGRADE;
                rc = 0;
index 6e3235565a4d8ee86cd79aea7d034d03a2d6938a..5c233423c56fa99a1e4572a75f298abcb72541d6 100644 (file)
@@ -177,7 +177,7 @@ static u8 ftpm_tee_tpm_op_status(struct tpm_chip *chip)
 
 static bool ftpm_tee_tpm_req_canceled(struct tpm_chip *chip, u8 status)
 {
-       return 0;
+       return false;
 }
 
 static const struct tpm_class_ops ftpm_tee_tpm_ops = {
index 3af4c07a9342ff115a2e3de3561c2b09793a6a6f..d3989b257f4222e2a581e04d58b24cafc569a63f 100644 (file)
@@ -681,6 +681,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
        if (!wait_event_timeout(ibmvtpm->crq_queue.wq,
                                ibmvtpm->rtce_buf != NULL,
                                HZ)) {
+               rc = -ENODEV;
                dev_err(dev, "CRQ response timed out\n");
                goto init_irq_cleanup;
        }
index d3f2e5364c275f4fd91f437ad853ccdc343e94c4..bcff6429e0b4f2b0e2f748a545d95a7408846473 100644 (file)
@@ -153,50 +153,46 @@ static int check_acpi_tpm2(struct device *dev)
 #endif
 
 static int tpm_tcg_read_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
-                             u8 *result)
+                             u8 *result, enum tpm_tis_io_mode io_mode)
 {
        struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       while (len--)
-               *result++ = ioread8(phy->iobase + addr);
+       __le16 result_le16;
+       __le32 result_le32;
+
+       switch (io_mode) {
+       case TPM_TIS_PHYS_8:
+               while (len--)
+                       *result++ = ioread8(phy->iobase + addr);
+               break;
+       case TPM_TIS_PHYS_16:
+               result_le16 = cpu_to_le16(ioread16(phy->iobase + addr));
+               memcpy(result, &result_le16, sizeof(u16));
+               break;
+       case TPM_TIS_PHYS_32:
+               result_le32 = cpu_to_le32(ioread32(phy->iobase + addr));
+               memcpy(result, &result_le32, sizeof(u32));
+               break;
+       }
 
        return 0;
 }
 
 static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
-                              const u8 *value)
+                              const u8 *value, enum tpm_tis_io_mode io_mode)
 {
        struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
 
-       while (len--)
-               iowrite8(*value++, phy->iobase + addr);
-
-       return 0;
-}
-
-static int tpm_tcg_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
-{
-       struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       *result = ioread16(phy->iobase + addr);
-
-       return 0;
-}
-
-static int tpm_tcg_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
-{
-       struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       *result = ioread32(phy->iobase + addr);
-
-       return 0;
-}
-
-static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
-{
-       struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       iowrite32(value, phy->iobase + addr);
+       switch (io_mode) {
+       case TPM_TIS_PHYS_8:
+               while (len--)
+                       iowrite8(*value++, phy->iobase + addr);
+               break;
+       case TPM_TIS_PHYS_16:
+               return -EINVAL;
+       case TPM_TIS_PHYS_32:
+               iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr);
+               break;
+       }
 
        return 0;
 }
@@ -204,9 +200,6 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
 static const struct tpm_tis_phy_ops tpm_tcg = {
        .read_bytes = tpm_tcg_read_bytes,
        .write_bytes = tpm_tcg_write_bytes,
-       .read16 = tpm_tcg_read16,
-       .read32 = tpm_tcg_read32,
-       .write32 = tpm_tcg_write32,
 };
 
 static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info)
index 3be24f221e32af812e276acac08f88c29e60eb15..6c203f36b8a1b2700e8942562237aa61ec2c299b 100644 (file)
@@ -104,54 +104,88 @@ struct tpm_tis_data {
        unsigned int timeout_max; /* usecs */
 };
 
+/*
+ * IO modes to indicate how many bytes should be read/written at once in the
+ * tpm_tis_phy_ops read_bytes/write_bytes calls. Use TPM_TIS_PHYS_8 to
+ * receive/transmit byte-wise, TPM_TIS_PHYS_16 for two bytes etc.
+ */
+enum tpm_tis_io_mode {
+       TPM_TIS_PHYS_8,
+       TPM_TIS_PHYS_16,
+       TPM_TIS_PHYS_32,
+};
+
 struct tpm_tis_phy_ops {
+       /* data is passed in little endian */
        int (*read_bytes)(struct tpm_tis_data *data, u32 addr, u16 len,
-                         u8 *result);
+                         u8 *result, enum tpm_tis_io_mode mode);
        int (*write_bytes)(struct tpm_tis_data *data, u32 addr, u16 len,
-                          const u8 *value);
-       int (*read16)(struct tpm_tis_data *data, u32 addr, u16 *result);
-       int (*read32)(struct tpm_tis_data *data, u32 addr, u32 *result);
-       int (*write32)(struct tpm_tis_data *data, u32 addr, u32 src);
+                          const u8 *value, enum tpm_tis_io_mode mode);
 };
 
 static inline int tpm_tis_read_bytes(struct tpm_tis_data *data, u32 addr,
                                     u16 len, u8 *result)
 {
-       return data->phy_ops->read_bytes(data, addr, len, result);
+       return data->phy_ops->read_bytes(data, addr, len, result,
+                                        TPM_TIS_PHYS_8);
 }
 
 static inline int tpm_tis_read8(struct tpm_tis_data *data, u32 addr, u8 *result)
 {
-       return data->phy_ops->read_bytes(data, addr, 1, result);
+       return data->phy_ops->read_bytes(data, addr, 1, result, TPM_TIS_PHYS_8);
 }
 
 static inline int tpm_tis_read16(struct tpm_tis_data *data, u32 addr,
                                 u16 *result)
 {
-       return data->phy_ops->read16(data, addr, result);
+       __le16 result_le;
+       int rc;
+
+       rc = data->phy_ops->read_bytes(data, addr, sizeof(u16),
+                                      (u8 *)&result_le, TPM_TIS_PHYS_16);
+       if (!rc)
+               *result = le16_to_cpu(result_le);
+
+       return rc;
 }
 
 static inline int tpm_tis_read32(struct tpm_tis_data *data, u32 addr,
                                 u32 *result)
 {
-       return data->phy_ops->read32(data, addr, result);
+       __le32 result_le;
+       int rc;
+
+       rc = data->phy_ops->read_bytes(data, addr, sizeof(u32),
+                                      (u8 *)&result_le, TPM_TIS_PHYS_32);
+       if (!rc)
+               *result = le32_to_cpu(result_le);
+
+       return rc;
 }
 
 static inline int tpm_tis_write_bytes(struct tpm_tis_data *data, u32 addr,
                                      u16 len, const u8 *value)
 {
-       return data->phy_ops->write_bytes(data, addr, len, value);
+       return data->phy_ops->write_bytes(data, addr, len, value,
+                                         TPM_TIS_PHYS_8);
 }
 
 static inline int tpm_tis_write8(struct tpm_tis_data *data, u32 addr, u8 value)
 {
-       return data->phy_ops->write_bytes(data, addr, 1, &value);
+       return data->phy_ops->write_bytes(data, addr, 1, &value,
+                                         TPM_TIS_PHYS_8);
 }
 
 static inline int tpm_tis_write32(struct tpm_tis_data *data, u32 addr,
                                  u32 value)
 {
-       return data->phy_ops->write32(data, addr, value);
+       __le32 value_le;
+       int rc;
+
+       value_le = cpu_to_le32(value);
+       rc =  data->phy_ops->write_bytes(data, addr, sizeof(u32),
+                                        (u8 *)&value_le, TPM_TIS_PHYS_32);
+       return rc;
 }
 
 static inline bool is_bsw(void)
index f6c0affbb4567432dd80896bd3a0750ed4851e2d..974479a1ec5a081f547e37ccf68daf6769afa239 100644 (file)
@@ -31,6 +31,7 @@
 #define TPM_CR50_TIMEOUT_SHORT_MS      2               /* Short timeout during transactions */
 #define TPM_CR50_TIMEOUT_NOIRQ_MS      20              /* Timeout for TPM ready without IRQ */
 #define TPM_CR50_I2C_DID_VID           0x00281ae0L     /* Device and vendor ID reg value */
+#define TPM_TI50_I2C_DID_VID           0x504a6666L     /* Device and vendor ID reg value */
 #define TPM_CR50_I2C_MAX_RETRIES       3               /* Max retries due to I2C errors */
 #define TPM_CR50_I2C_RETRY_DELAY_LO    55              /* Min usecs between retries on I2C */
 #define TPM_CR50_I2C_RETRY_DELAY_HI    65              /* Max usecs between retries on I2C */
@@ -742,15 +743,15 @@ static int tpm_cr50_i2c_probe(struct i2c_client *client)
        }
 
        vendor = le32_to_cpup((__le32 *)buf);
-       if (vendor != TPM_CR50_I2C_DID_VID) {
+       if (vendor != TPM_CR50_I2C_DID_VID && vendor != TPM_TI50_I2C_DID_VID) {
                dev_err(dev, "Vendor ID did not match! ID was %08x\n", vendor);
                tpm_cr50_release_locality(chip, true);
                return -ENODEV;
        }
 
-       dev_info(dev, "cr50 TPM 2.0 (i2c 0x%02x irq %d id 0x%x)\n",
+       dev_info(dev, "%s TPM 2.0 (i2c 0x%02x irq %d id 0x%x)\n",
+                vendor == TPM_TI50_I2C_DID_VID ? "ti50" : "cr50",
                 client->addr, client->irq, vendor >> 16);
-
        return tpm_chip_register(chip);
 }
 
@@ -768,8 +769,8 @@ static int tpm_cr50_i2c_remove(struct i2c_client *client)
        struct device *dev = &client->dev;
 
        if (!chip) {
-               dev_err(dev, "Could not get client data at remove\n");
-               return -ENODEV;
+               dev_crit(dev, "Could not get client data at remove, memory corruption ahead\n");
+               return 0;
        }
 
        tpm_chip_unregister(chip);
index bba73979c368707cfd2a3678ce5f60e66500492d..d0f66f6f193189c22e21357d86a1926ec39c15e8 100644 (file)
@@ -31,10 +31,6 @@ extern int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
 extern int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
                                u8 *in, const u8 *out);
 
-extern int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result);
-extern int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result);
-extern int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value);
-
 #ifdef CONFIG_TCG_TIS_SPI_CR50
 extern int cr50_spi_probe(struct spi_device *spi);
 #else
index 7bf123d3c537ff79a78f63b7bd6cc42db98463c9..f4937280e940615d5ad9c9dee61c31d1435e6304 100644 (file)
@@ -222,13 +222,13 @@ static int tpm_tis_spi_cr50_transfer(struct tpm_tis_data *data, u32 addr, u16 le
 }
 
 static int tpm_tis_spi_cr50_read_bytes(struct tpm_tis_data *data, u32 addr,
-                                      u16 len, u8 *result)
+                                      u16 len, u8 *result, enum tpm_tis_io_mode io_mode)
 {
        return tpm_tis_spi_cr50_transfer(data, addr, len, result, NULL);
 }
 
 static int tpm_tis_spi_cr50_write_bytes(struct tpm_tis_data *data, u32 addr,
-                                       u16 len, const u8 *value)
+                                       u16 len, const u8 *value, enum tpm_tis_io_mode io_mode)
 {
        return tpm_tis_spi_cr50_transfer(data, addr, len, NULL, value);
 }
@@ -236,9 +236,6 @@ static int tpm_tis_spi_cr50_write_bytes(struct tpm_tis_data *data, u32 addr,
 static const struct tpm_tis_phy_ops tpm_spi_cr50_phy_ops = {
        .read_bytes = tpm_tis_spi_cr50_read_bytes,
        .write_bytes = tpm_tis_spi_cr50_write_bytes,
-       .read16 = tpm_tis_spi_read16,
-       .read32 = tpm_tis_spi_read32,
-       .write32 = tpm_tis_spi_write32,
 };
 
 static void cr50_print_fw_version(struct tpm_tis_data *data)
index 184396b3af501c36355ae8707740e7b9be8a0b60..a0963a3e92bdd86da6627412eeff1e0b8a00aa6a 100644 (file)
@@ -141,55 +141,17 @@ exit:
 }
 
 static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr,
-                                 u16 len, u8 *result)
+                                 u16 len, u8 *result, enum tpm_tis_io_mode io_mode)
 {
        return tpm_tis_spi_transfer(data, addr, len, result, NULL);
 }
 
 static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr,
-                                  u16 len, const u8 *value)
+                                  u16 len, const u8 *value, enum tpm_tis_io_mode io_mode)
 {
        return tpm_tis_spi_transfer(data, addr, len, NULL, value);
 }
 
-int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
-{
-       __le16 result_le;
-       int rc;
-
-       rc = data->phy_ops->read_bytes(data, addr, sizeof(u16),
-                                      (u8 *)&result_le);
-       if (!rc)
-               *result = le16_to_cpu(result_le);
-
-       return rc;
-}
-
-int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
-{
-       __le32 result_le;
-       int rc;
-
-       rc = data->phy_ops->read_bytes(data, addr, sizeof(u32),
-                                      (u8 *)&result_le);
-       if (!rc)
-               *result = le32_to_cpu(result_le);
-
-       return rc;
-}
-
-int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
-{
-       __le32 value_le;
-       int rc;
-
-       value_le = cpu_to_le32(value);
-       rc = data->phy_ops->write_bytes(data, addr, sizeof(u32),
-                                       (u8 *)&value_le);
-
-       return rc;
-}
-
 int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
                     int irq, const struct tpm_tis_phy_ops *phy_ops)
 {
@@ -205,9 +167,6 @@ int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
 static const struct tpm_tis_phy_ops tpm_spi_phy_ops = {
        .read_bytes = tpm_tis_spi_read_bytes,
        .write_bytes = tpm_tis_spi_write_bytes,
-       .read16 = tpm_tis_spi_read16,
-       .read32 = tpm_tis_spi_read32,
-       .write32 = tpm_tis_spi_write32,
 };
 
 static int tpm_tis_spi_probe(struct spi_device *dev)
index e47bdd27270492f5176f2fcc7a549c17fc5d20be..679196c614017aac6994c7402e1612b85a44aa8f 100644 (file)
@@ -35,72 +35,53 @@ static inline struct tpm_tis_synquacer_phy *to_tpm_tis_tcg_phy(struct tpm_tis_da
 }
 
 static int tpm_tis_synquacer_read_bytes(struct tpm_tis_data *data, u32 addr,
-                                       u16 len, u8 *result)
+                                       u16 len, u8 *result,
+                                       enum tpm_tis_io_mode io_mode)
 {
        struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       while (len--)
-               *result++ = ioread8(phy->iobase + addr);
+       switch (io_mode) {
+       case TPM_TIS_PHYS_8:
+               while (len--)
+                       *result++ = ioread8(phy->iobase + addr);
+               break;
+       case TPM_TIS_PHYS_16:
+               result[1] = ioread8(phy->iobase + addr + 1);
+               result[0] = ioread8(phy->iobase + addr);
+               break;
+       case TPM_TIS_PHYS_32:
+               result[3] = ioread8(phy->iobase + addr + 3);
+               result[2] = ioread8(phy->iobase + addr + 2);
+               result[1] = ioread8(phy->iobase + addr + 1);
+               result[0] = ioread8(phy->iobase + addr);
+               break;
+       }
 
        return 0;
 }
 
 static int tpm_tis_synquacer_write_bytes(struct tpm_tis_data *data, u32 addr,
-                                        u16 len, const u8 *value)
+                                        u16 len, const u8 *value,
+                                        enum tpm_tis_io_mode io_mode)
 {
        struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       while (len--)
-               iowrite8(*value++, phy->iobase + addr);
-
-       return 0;
-}
-
-static int tpm_tis_synquacer_read16_bw(struct tpm_tis_data *data,
-                                      u32 addr, u16 *result)
-{
-       struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       /*
-        * Due to the limitation of SPI controller on SynQuacer,
-        * 16/32 bits access must be done in byte-wise and descending order.
-        */
-       *result = (ioread8(phy->iobase + addr + 1) << 8) |
-                 (ioread8(phy->iobase + addr));
-
-       return 0;
-}
-
-static int tpm_tis_synquacer_read32_bw(struct tpm_tis_data *data,
-                                      u32 addr, u32 *result)
-{
-       struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       /*
-        * Due to the limitation of SPI controller on SynQuacer,
-        * 16/32 bits access must be done in byte-wise and descending order.
-        */
-       *result = (ioread8(phy->iobase + addr + 3) << 24) |
-                 (ioread8(phy->iobase + addr + 2) << 16) |
-                 (ioread8(phy->iobase + addr + 1) << 8) |
-                 (ioread8(phy->iobase + addr));
-
-       return 0;
-}
-
-static int tpm_tis_synquacer_write32_bw(struct tpm_tis_data *data,
-                                       u32 addr, u32 value)
-{
-       struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
-
-       /*
-        * Due to the limitation of SPI controller on SynQuacer,
-        * 16/32 bits access must be done in byte-wise and descending order.
-        */
-       iowrite8(value >> 24, phy->iobase + addr + 3);
-       iowrite8(value >> 16, phy->iobase + addr + 2);
-       iowrite8(value >> 8, phy->iobase + addr + 1);
-       iowrite8(value, phy->iobase + addr);
+       switch (io_mode) {
+       case TPM_TIS_PHYS_8:
+               while (len--)
+                       iowrite8(*value++, phy->iobase + addr);
+               break;
+       case TPM_TIS_PHYS_16:
+               return -EINVAL;
+       case TPM_TIS_PHYS_32:
+               /*
+                * Due to the limitation of SPI controller on SynQuacer,
+                * 16/32 bits access must be done in byte-wise and descending order.
+                */
+               iowrite8(value[3], phy->iobase + addr + 3);
+               iowrite8(value[2], phy->iobase + addr + 2);
+               iowrite8(value[1], phy->iobase + addr + 1);
+               iowrite8(value[0], phy->iobase + addr);
+               break;
+       }
 
        return 0;
 }
@@ -108,9 +89,6 @@ static int tpm_tis_synquacer_write32_bw(struct tpm_tis_data *data,
 static const struct tpm_tis_phy_ops tpm_tcg_bw = {
        .read_bytes     = tpm_tis_synquacer_read_bytes,
        .write_bytes    = tpm_tis_synquacer_write_bytes,
-       .read16         = tpm_tis_synquacer_read16_bw,
-       .read32         = tpm_tis_synquacer_read32_bw,
-       .write32        = tpm_tis_synquacer_write32_bw,
 };
 
 static int tpm_tis_synquacer_init(struct device *dev,
index 69df04ae2401b8f6ebaaa8e7bb17fdb52fdf5e5d..3792918262617983cd21d424a736955c395400b8 100644 (file)
@@ -253,20 +253,12 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
        struct xenbus_transaction xbt;
        const char *message = NULL;
        int rv;
-       grant_ref_t gref;
 
-       priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-       if (!priv->shr) {
-               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
-               return -ENOMEM;
-       }
-
-       rv = xenbus_grant_ring(dev, priv->shr, 1, &gref);
+       rv = xenbus_setup_ring(dev, GFP_KERNEL, (void **)&priv->shr, 1,
+                              &priv->ring_ref);
        if (rv < 0)
                return rv;
 
-       priv->ring_ref = gref;
-
        rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
        if (rv)
                return rv;
@@ -331,11 +323,7 @@ static void ring_free(struct tpm_private *priv)
        if (!priv)
                return;
 
-       if (priv->ring_ref)
-               gnttab_end_foreign_access(priv->ring_ref,
-                               (unsigned long)priv->shr);
-       else
-               free_page((unsigned long)priv->shr);
+       xenbus_teardown_ring((void **)&priv->shr, 1, &priv->ring_ref);
 
        if (priv->irq)
                unbind_from_irqhandler(priv->irq, priv);
index 23cc8297ec4c071bd6dd952e5966526a29df5677..d429ba52a71908b584e9ccca3dd8cf8f3a576798 100644 (file)
@@ -117,6 +117,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req,
                tmp_rate = parent_rate;
        else
                tmp_rate = parent_rate / div;
+
+       if (tmp_rate < req->min_rate || tmp_rate > req->max_rate)
+               return;
+
        tmp_diff = abs(req->rate - tmp_rate);
 
        if (*best_diff < 0 || *best_diff >= tmp_diff) {
index 3ad20e75fd23f6b7e60de0b59b87f52285f66736..48a1eb9f2d551cc17b45c45925232dedbf577f49 100644 (file)
@@ -941,6 +941,7 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
        u64 temp = (u64)parent_rate << CM_DIV_FRAC_BITS;
        u32 div, mindiv, maxdiv;
 
+       do_div(temp, rate);
        div = temp;
        div &= ~unused_frac_mask;
 
index 2f3ddc908ebd32b0a3ab282bb86ba1c541790eff..d65398497d5f64b98565066fdfdc16a2be58d15b 100644 (file)
@@ -298,10 +298,6 @@ static const struct sunxi_ccu_desc sun6i_rtc_ccu_desc = {
        .hw_clks        = &sun6i_rtc_ccu_hw_clks,
 };
 
-static const struct clk_parent_data sun50i_h6_osc32k_fanout_parents[] = {
-       { .hw = &osc32k_clk.common.hw },
-};
-
 static const struct clk_parent_data sun50i_h616_osc32k_fanout_parents[] = {
        { .hw = &osc32k_clk.common.hw },
        { .fw_name = "pll-32k" },
@@ -314,13 +310,6 @@ static const struct clk_parent_data sun50i_r329_osc32k_fanout_parents[] = {
        { .hw = &osc24M_32k_clk.common.hw }
 };
 
-static const struct sun6i_rtc_match_data sun50i_h6_rtc_ccu_data = {
-       .have_ext_osc32k        = true,
-       .have_iosc_calibration  = true,
-       .osc32k_fanout_parents  = sun50i_h6_osc32k_fanout_parents,
-       .osc32k_fanout_nparents = ARRAY_SIZE(sun50i_h6_osc32k_fanout_parents),
-};
-
 static const struct sun6i_rtc_match_data sun50i_h616_rtc_ccu_data = {
        .have_iosc_calibration  = true,
        .rtc_32k_single_parent  = true,
@@ -335,10 +324,6 @@ static const struct sun6i_rtc_match_data sun50i_r329_rtc_ccu_data = {
 };
 
 static const struct of_device_id sun6i_rtc_ccu_match[] = {
-       {
-               .compatible     = "allwinner,sun50i-h6-rtc",
-               .data           = &sun50i_h6_rtc_ccu_data,
-       },
        {
                .compatible     = "allwinner,sun50i-h616-rtc",
                .data           = &sun50i_h616_rtc_ccu_data,
index 1589ae7d5abb632cee3f866893414f462334da28..06866bfa1826907c3456119b350485ee99e0839d 100644 (file)
@@ -711,4 +711,11 @@ config MICROCHIP_PIT64B
          modes and high resolution. It is used as a clocksource
          and a clockevent.
 
+config GOLDFISH_TIMER
+       bool "Clocksource using goldfish-rtc"
+       depends on M68K || COMPILE_TEST
+       depends on RTC_DRV_GOLDFISH
+       help
+         Support for the timer/counter of goldfish-rtc
+
 endmenu
index 9c85ee2bb373505d03370af1f275944f6a1ceaf3..b839beb6ea539f8cd133543e0ebed81720f81111 100644 (file)
@@ -88,3 +88,4 @@ obj-$(CONFIG_GX6605S_TIMER)           += timer-gx6605s.o
 obj-$(CONFIG_HYPERV_TIMER)             += hyperv_timer.o
 obj-$(CONFIG_MICROCHIP_PIT64B)         += timer-microchip-pit64b.o
 obj-$(CONFIG_MSC313E_TIMER)            += timer-msc313e.o
+obj-$(CONFIG_GOLDFISH_TIMER)           += timer-goldfish.o
diff --git a/drivers/clocksource/timer-goldfish.c b/drivers/clocksource/timer-goldfish.c
new file mode 100644 (file)
index 0000000..0512d5e
--- /dev/null
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/goldfish.h>
+#include <clocksource/timer-goldfish.h>
+
+struct goldfish_timer {
+       struct clocksource cs;
+       struct clock_event_device ced;
+       struct resource res;
+       void __iomem *base;
+};
+
+static struct goldfish_timer *ced_to_gf(struct clock_event_device *ced)
+{
+       return container_of(ced, struct goldfish_timer, ced);
+}
+
+static struct goldfish_timer *cs_to_gf(struct clocksource *cs)
+{
+       return container_of(cs, struct goldfish_timer, cs);
+}
+
+static u64 goldfish_timer_read(struct clocksource *cs)
+{
+       struct goldfish_timer *timerdrv = cs_to_gf(cs);
+       void __iomem *base = timerdrv->base;
+       u32 time_low, time_high;
+       u64 ticks;
+
+       /*
+        * time_low: get low bits of current time and update time_high
+        * time_high: get high bits of time at last time_low read
+        */
+       time_low = gf_ioread32(base + TIMER_TIME_LOW);
+       time_high = gf_ioread32(base + TIMER_TIME_HIGH);
+
+       ticks = ((u64)time_high << 32) | time_low;
+
+       return ticks;
+}
+
+static int goldfish_timer_set_oneshot(struct clock_event_device *evt)
+{
+       struct goldfish_timer *timerdrv = ced_to_gf(evt);
+       void __iomem *base = timerdrv->base;
+
+       gf_iowrite32(0, base + TIMER_ALARM_HIGH);
+       gf_iowrite32(0, base + TIMER_ALARM_LOW);
+       gf_iowrite32(1, base + TIMER_IRQ_ENABLED);
+
+       return 0;
+}
+
+static int goldfish_timer_shutdown(struct clock_event_device *evt)
+{
+       struct goldfish_timer *timerdrv = ced_to_gf(evt);
+       void __iomem *base = timerdrv->base;
+
+       gf_iowrite32(0, base + TIMER_IRQ_ENABLED);
+
+       return 0;
+}
+
+static int goldfish_timer_next_event(unsigned long delta,
+                                    struct clock_event_device *evt)
+{
+       struct goldfish_timer *timerdrv = ced_to_gf(evt);
+       void __iomem *base = timerdrv->base;
+       u64 now;
+
+       now = goldfish_timer_read(&timerdrv->cs);
+
+       now += delta;
+
+       gf_iowrite32(upper_32_bits(now), base + TIMER_ALARM_HIGH);
+       gf_iowrite32(lower_32_bits(now), base + TIMER_ALARM_LOW);
+
+       return 0;
+}
+
+static irqreturn_t goldfish_timer_irq(int irq, void *dev_id)
+{
+       struct goldfish_timer *timerdrv = dev_id;
+       struct clock_event_device *evt = &timerdrv->ced;
+       void __iomem *base = timerdrv->base;
+
+       gf_iowrite32(1, base + TIMER_CLEAR_INTERRUPT);
+
+       evt->event_handler(evt);
+
+       return IRQ_HANDLED;
+}
+
+int __init goldfish_timer_init(int irq, void __iomem *base)
+{
+       struct goldfish_timer *timerdrv;
+       int ret;
+
+       timerdrv = kzalloc(sizeof(*timerdrv), GFP_KERNEL);
+       if (!timerdrv)
+               return -ENOMEM;
+
+       timerdrv->base = base;
+
+       timerdrv->ced = (struct clock_event_device){
+               .name                   = "goldfish_timer",
+               .features               = CLOCK_EVT_FEAT_ONESHOT,
+               .set_state_shutdown     = goldfish_timer_shutdown,
+               .set_state_oneshot      = goldfish_timer_set_oneshot,
+               .set_next_event         = goldfish_timer_next_event,
+       };
+
+       timerdrv->res = (struct resource){
+               .name  = "goldfish_timer",
+               .start = (unsigned long)base,
+               .end   = (unsigned long)base + 0xfff,
+       };
+
+       ret = request_resource(&iomem_resource, &timerdrv->res);
+       if (ret) {
+               pr_err("Cannot allocate '%s' resource\n", timerdrv->res.name);
+               return ret;
+       }
+
+       timerdrv->cs = (struct clocksource){
+               .name           = "goldfish_timer",
+               .rating         = 400,
+               .read           = goldfish_timer_read,
+               .mask           = CLOCKSOURCE_MASK(64),
+               .flags          = 0,
+               .max_idle_ns    = LONG_MAX,
+       };
+
+       clocksource_register_hz(&timerdrv->cs, NSEC_PER_SEC);
+
+       ret = request_irq(irq, goldfish_timer_irq, IRQF_TIMER,
+                         "goldfish_timer", timerdrv);
+       if (ret) {
+               pr_err("Couldn't register goldfish-timer interrupt\n");
+               return ret;
+       }
+
+       clockevents_config_and_register(&timerdrv->ced, NSEC_PER_SEC,
+                                       1, 0xffffffff);
+
+       return 0;
+}
index 82d370ae6a4a5c045a27eeee05289fd6246e6b60..d092c9bb4ba39c40e848a2994cd0855c0fc8afb7 100644 (file)
@@ -389,6 +389,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
        return ret;
 }
 
+static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
+                                             unsigned int target_freq)
+{
+       struct cppc_cpudata *cpu_data = policy->driver_data;
+       unsigned int cpu = policy->cpu;
+       u32 desired_perf;
+       int ret;
+
+       desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
+       cpu_data->perf_ctrls.desired_perf = desired_perf;
+       ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
+
+       if (ret) {
+               pr_debug("Failed to set target on CPU:%d. ret:%d\n",
+                        cpu, ret);
+               return 0;
+       }
+
+       return target_freq;
+}
+
 static int cppc_verify_policy(struct cpufreq_policy_data *policy)
 {
        cpufreq_verify_within_cpu_limits(policy);
@@ -420,12 +441,197 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
        return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
 }
 
+static DEFINE_PER_CPU(unsigned int, efficiency_class);
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
+
+/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
+#define CPPC_EM_CAP_STEP       (20)
+/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
+#define CPPC_EM_COST_STEP      (1)
+/* Add a cost gap correspnding to the energy of 4 CPUs. */
+#define CPPC_EM_COST_GAP       (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
+                               / CPPC_EM_CAP_STEP)
+
+static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
+{
+       struct cppc_perf_caps *perf_caps;
+       unsigned int min_cap, max_cap;
+       struct cppc_cpudata *cpu_data;
+       int cpu = policy->cpu;
+
+       cpu_data = policy->driver_data;
+       perf_caps = &cpu_data->perf_caps;
+       max_cap = arch_scale_cpu_capacity(cpu);
+       min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
+       if ((min_cap == 0) || (max_cap < min_cap))
+               return 0;
+       return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
+}
+
+/*
+ * The cost is defined as:
+ *   cost = power * max_frequency / frequency
+ */
+static inline unsigned long compute_cost(int cpu, int step)
+{
+       return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
+                       step * CPPC_EM_COST_STEP;
+}
+
+static int cppc_get_cpu_power(struct device *cpu_dev,
+               unsigned long *power, unsigned long *KHz)
+{
+       unsigned long perf_step, perf_prev, perf, perf_check;
+       unsigned int min_step, max_step, step, step_check;
+       unsigned long prev_freq = *KHz;
+       unsigned int min_cap, max_cap;
+       struct cpufreq_policy *policy;
+
+       struct cppc_perf_caps *perf_caps;
+       struct cppc_cpudata *cpu_data;
+
+       policy = cpufreq_cpu_get_raw(cpu_dev->id);
+       cpu_data = policy->driver_data;
+       perf_caps = &cpu_data->perf_caps;
+       max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+       min_cap = div_u64(max_cap * perf_caps->lowest_perf,
+                       perf_caps->highest_perf);
+
+       perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+       min_step = min_cap / CPPC_EM_CAP_STEP;
+       max_step = max_cap / CPPC_EM_CAP_STEP;
+
+       perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+       step = perf_prev / perf_step;
+
+       if (step > max_step)
+               return -EINVAL;
+
+       if (min_step == max_step) {
+               step = max_step;
+               perf = perf_caps->highest_perf;
+       } else if (step < min_step) {
+               step = min_step;
+               perf = perf_caps->lowest_perf;
+       } else {
+               step++;
+               if (step == max_step)
+                       perf = perf_caps->highest_perf;
+               else
+                       perf = step * perf_step;
+       }
+
+       *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+       perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+       step_check = perf_check / perf_step;
+
+       /*
+        * To avoid bad integer approximation, check that new frequency value
+        * increased and that the new frequency will be converted to the
+        * desired step value.
+        */
+       while ((*KHz == prev_freq) || (step_check != step)) {
+               perf++;
+               *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+               perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+               step_check = perf_check / perf_step;
+       }
+
+       /*
+        * With an artificial EM, only the cost value is used. Still the power
+        * is populated such as 0 < power < EM_MAX_POWER. This allows to add
+        * more sense to the artificial performance states.
+        */
+       *power = compute_cost(cpu_dev->id, step);
+
+       return 0;
+}
+
+static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
+               unsigned long *cost)
+{
+       unsigned long perf_step, perf_prev;
+       struct cppc_perf_caps *perf_caps;
+       struct cpufreq_policy *policy;
+       struct cppc_cpudata *cpu_data;
+       unsigned int max_cap;
+       int step;
+
+       policy = cpufreq_cpu_get_raw(cpu_dev->id);
+       cpu_data = policy->driver_data;
+       perf_caps = &cpu_data->perf_caps;
+       max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+
+       perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
+       perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+       step = perf_prev / perf_step;
+
+       *cost = compute_cost(cpu_dev->id, step);
+
+       return 0;
+}
+
+static int populate_efficiency_class(void)
+{
+       struct acpi_madt_generic_interrupt *gicc;
+       DECLARE_BITMAP(used_classes, 256) = {};
+       int class, cpu, index;
+
+       for_each_possible_cpu(cpu) {
+               gicc = acpi_cpu_get_madt_gicc(cpu);
+               class = gicc->efficiency_class;
+               bitmap_set(used_classes, class, 1);
+       }
+
+       if (bitmap_weight(used_classes, 256) <= 1) {
+               pr_debug("Efficiency classes are all equal (=%d). "
+                       "No EM registered", class);
+               return -EINVAL;
+       }
+
+       /*
+        * Squeeze efficiency class values on [0:#efficiency_class-1].
+        * Values are per spec in [0:255].
+        */
+       index = 0;
+       for_each_set_bit(class, used_classes, 256) {
+               for_each_possible_cpu(cpu) {
+                       gicc = acpi_cpu_get_madt_gicc(cpu);
+                       if (gicc->efficiency_class == class)
+                               per_cpu(efficiency_class, cpu) = index;
+               }
+               index++;
+       }
+       cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
+
+       return 0;
+}
+
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+       struct cppc_cpudata *cpu_data;
+       struct em_data_callback em_cb =
+               EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
+
+       cpu_data = policy->driver_data;
+       em_dev_register_perf_domain(get_cpu_device(policy->cpu),
+                       get_perf_level_count(policy), &em_cb,
+                       cpu_data->shared_cpu_map, 0);
+}
+
 #else
 
 static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
 {
        return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
 }
+static int populate_efficiency_class(void)
+{
+       return 0;
+}
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+}
 #endif
 
 
@@ -536,6 +742,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
                goto out;
        }
 
+       policy->fast_switch_possible = cppc_allow_fast_switch();
+       policy->dvfs_possible_from_any_cpu = true;
+
        /*
         * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
         * is supported.
@@ -681,6 +890,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
        .verify = cppc_verify_policy,
        .target = cppc_cpufreq_set_target,
        .get = cppc_cpufreq_get_rate,
+       .fast_switch = cppc_cpufreq_fast_switch,
        .init = cppc_cpufreq_cpu_init,
        .exit = cppc_cpufreq_cpu_exit,
        .set_boost = cppc_cpufreq_set_boost,
@@ -742,6 +952,7 @@ static int __init cppc_cpufreq_init(void)
 
        cppc_check_hisi_workaround();
        cppc_freq_invariance_init();
+       populate_efficiency_class();
 
        ret = cpufreq_register_driver(&cppc_cpufreq_driver);
        if (ret)
index 80f535cc8a757fe8222ed9ff4eafc55de0d471e6..2cad42774164799dc1895116f5ca7783e46b281a 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/tick.h>
+#include <linux/units.h>
 #include <trace/events/power.h>
 
 static LIST_HEAD(cpufreq_policy_list);
@@ -947,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
        struct cpufreq_policy *policy = to_policy(kobj);
        struct freq_attr *fattr = to_attr(attr);
-       ssize_t ret;
+       ssize_t ret = -EBUSY;
 
        if (!fattr->show)
                return -EIO;
 
        down_read(&policy->rwsem);
-       ret = fattr->show(policy, buf);
+       if (likely(!policy_is_inactive(policy)))
+               ret = fattr->show(policy, buf);
        up_read(&policy->rwsem);
 
        return ret;
@@ -964,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 {
        struct cpufreq_policy *policy = to_policy(kobj);
        struct freq_attr *fattr = to_attr(attr);
-       ssize_t ret = -EINVAL;
+       ssize_t ret = -EBUSY;
 
        if (!fattr->store)
                return -EIO;
@@ -978,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 
        if (cpu_online(policy->cpu)) {
                down_write(&policy->rwsem);
-               ret = fattr->store(policy, buf, count);
+               if (likely(!policy_is_inactive(policy)))
+                       ret = fattr->store(policy, buf, count);
                up_write(&policy->rwsem);
        }
 
@@ -1019,11 +1022,12 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu,
                dev_err(dev, "cpufreq symlink creation failed\n");
 }
 
-static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu,
                                   struct device *dev)
 {
        dev_dbg(dev, "%s: Removing symlink\n", __func__);
        sysfs_remove_link(&dev->kobj, "cpufreq");
+       cpumask_clear_cpu(cpu, policy->real_cpus);
 }
 
 static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -1337,12 +1341,12 @@ static int cpufreq_online(unsigned int cpu)
                down_write(&policy->rwsem);
                policy->cpu = cpu;
                policy->governor = NULL;
-               up_write(&policy->rwsem);
        } else {
                new_policy = true;
                policy = cpufreq_policy_alloc(cpu);
                if (!policy)
                        return -ENOMEM;
+               down_write(&policy->rwsem);
        }
 
        if (!new_policy && cpufreq_driver->online) {
@@ -1382,7 +1386,6 @@ static int cpufreq_online(unsigned int cpu)
                cpumask_copy(policy->related_cpus, policy->cpus);
        }
 
-       down_write(&policy->rwsem);
        /*
         * affected cpus must always be the one, which are online. We aren't
         * managing offline cpus here.
@@ -1531,9 +1534,9 @@ static int cpufreq_online(unsigned int cpu)
 
 out_destroy_policy:
        for_each_cpu(j, policy->real_cpus)
-               remove_cpu_dev_symlink(policy, get_cpu_device(j));
+               remove_cpu_dev_symlink(policy, j, get_cpu_device(j));
 
-       up_write(&policy->rwsem);
+       cpumask_clear(policy->cpus);
 
 out_offline_policy:
        if (cpufreq_driver->offline)
@@ -1544,6 +1547,8 @@ out_exit_policy:
                cpufreq_driver->exit(policy);
 
 out_free_policy:
+       up_write(&policy->rwsem);
+
        cpufreq_policy_free(policy);
        return ret;
 }
@@ -1575,47 +1580,36 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
        return 0;
 }
 
-static int cpufreq_offline(unsigned int cpu)
+static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy)
 {
-       struct cpufreq_policy *policy;
        int ret;
 
-       pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
-
-       policy = cpufreq_cpu_get_raw(cpu);
-       if (!policy) {
-               pr_debug("%s: No cpu_data found\n", __func__);
-               return 0;
-       }
-
-       down_write(&policy->rwsem);
        if (has_target())
                cpufreq_stop_governor(policy);
 
        cpumask_clear_cpu(cpu, policy->cpus);
 
-       if (policy_is_inactive(policy)) {
-               if (has_target())
-                       strncpy(policy->last_governor, policy->governor->name,
-                               CPUFREQ_NAME_LEN);
-               else
-                       policy->last_policy = policy->policy;
-       } else if (cpu == policy->cpu) {
-               /* Nominate new CPU */
-               policy->cpu = cpumask_any(policy->cpus);
-       }
-
-       /* Start governor again for active policy */
        if (!policy_is_inactive(policy)) {
+               /* Nominate a new CPU if necessary. */
+               if (cpu == policy->cpu)
+                       policy->cpu = cpumask_any(policy->cpus);
+
+               /* Start the governor again for the active policy. */
                if (has_target()) {
                        ret = cpufreq_start_governor(policy);
                        if (ret)
                                pr_err("%s: Failed to start governor\n", __func__);
                }
 
-               goto unlock;
+               return;
        }
 
+       if (has_target())
+               strncpy(policy->last_governor, policy->governor->name,
+                       CPUFREQ_NAME_LEN);
+       else
+               policy->last_policy = policy->policy;
+
        if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
                cpufreq_cooling_unregister(policy->cdev);
                policy->cdev = NULL;
@@ -1634,8 +1628,24 @@ static int cpufreq_offline(unsigned int cpu)
                cpufreq_driver->exit(policy);
                policy->freq_table = NULL;
        }
+}
+
+static int cpufreq_offline(unsigned int cpu)
+{
+       struct cpufreq_policy *policy;
+
+       pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
+
+       policy = cpufreq_cpu_get_raw(cpu);
+       if (!policy) {
+               pr_debug("%s: No cpu_data found\n", __func__);
+               return 0;
+       }
+
+       down_write(&policy->rwsem);
+
+       __cpufreq_offline(cpu, policy);
 
-unlock:
        up_write(&policy->rwsem);
        return 0;
 }
@@ -1653,19 +1663,25 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
        if (!policy)
                return;
 
-       if (cpu_online(cpu))
-               cpufreq_offline(cpu);
+       down_write(&policy->rwsem);
 
-       cpumask_clear_cpu(cpu, policy->real_cpus);
-       remove_cpu_dev_symlink(policy, dev);
+       if (cpu_online(cpu))
+               __cpufreq_offline(cpu, policy);
 
-       if (cpumask_empty(policy->real_cpus)) {
-               /* We did light-weight exit earlier, do full tear down now */
-               if (cpufreq_driver->offline)
-                       cpufreq_driver->exit(policy);
+       remove_cpu_dev_symlink(policy, cpu, dev);
 
-               cpufreq_policy_free(policy);
+       if (!cpumask_empty(policy->real_cpus)) {
+               up_write(&policy->rwsem);
+               return;
        }
+
+       /* We did light-weight exit earlier, do full tear down now */
+       if (cpufreq_driver->offline)
+               cpufreq_driver->exit(policy);
+
+       up_write(&policy->rwsem);
+
+       cpufreq_policy_free(policy);
 }
 
 /**
@@ -1707,6 +1723,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b
                return new_freq;
 
        if (policy->cur != new_freq) {
+               /*
+                * For some platforms, the frequency returned by hardware may be
+                * slightly different from what is provided in the frequency
+                * table, for example hardware may return 499 MHz instead of 500
+                * MHz. In such cases it is better to avoid getting into
+                * unnecessary frequency updates.
+                */
+               if (abs(policy->cur - new_freq) < HZ_PER_MHZ)
+                       return policy->cur;
+
                cpufreq_out_of_sync(policy, new_freq);
                if (update)
                        schedule_work(&policy->update);
index 0d42cf8b88d8ac3f7638d0fc980104a1cba0c1d1..85da677c43d6bde7077a80e1562b6b4b9a7faf8f 100644 (file)
@@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
        gov->free(policy_dbs);
 }
 
+static void cpufreq_dbs_data_release(struct kobject *kobj)
+{
+       struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj));
+       struct dbs_governor *gov = dbs_data->gov;
+
+       gov->exit(dbs_data);
+       kfree(dbs_data);
+}
+
 int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 {
        struct dbs_governor *gov = dbs_governor_of(policy);
@@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
                goto free_policy_dbs_info;
        }
 
+       dbs_data->gov = gov;
        gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
 
        ret = gov->init(dbs_data);
@@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
        policy->governor_data = policy_dbs;
 
        gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
+       gov->kobj_type.release = cpufreq_dbs_data_release;
        ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
                                   get_governor_parent_kobj(policy),
                                   "%s", gov->gov.name);
@@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
 
        policy->governor_data = NULL;
 
-       if (!count) {
-               if (!have_governor_per_policy())
-                       gov->gdbs_data = NULL;
-
-               gov->exit(dbs_data);
-               kfree(dbs_data);
-       }
+       if (!count && !have_governor_per_policy())
+               gov->gdbs_data = NULL;
 
        free_policy_dbs_info(policy_dbs, gov);
 
index a5a0bc3cc23ecff7da8996ee3ecf10a5b7fa898f..168c23fd7fcac75626a070784dce936629adcec2 100644 (file)
@@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 /* Governor demand based switching data (per-policy or global). */
 struct dbs_data {
        struct gov_attr_set attr_set;
+       struct dbs_governor *gov;
        void *tuners;
        unsigned int ignore_nice_load;
        unsigned int sampling_rate;
index 846bb3a7878803344bd9b916a046071ff0957620..57cdb36798854a941d1258b611c6e57bceddafcb 100644 (file)
@@ -1322,6 +1322,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
        mutex_unlock(&intel_pstate_limits_lock);
 
        intel_pstate_update_policies();
+       arch_set_max_freq_ratio(global.no_turbo);
 
        mutex_unlock(&intel_pstate_driver_lock);
 
@@ -2424,6 +2425,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
        X86_MATCH(BROADWELL_X,          core_funcs),
        X86_MATCH(SKYLAKE_X,            core_funcs),
        X86_MATCH(ICELAKE_X,            core_funcs),
+       X86_MATCH(SAPPHIRERAPIDS_X,     core_funcs),
        {}
 };
 
index 0a94c56ddad26ce0cc16828e32f5ac5a3c6c23ee..813cccbfe9348b58479d4906517a00639355fae0 100644 (file)
@@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
 };
 
 static int __maybe_unused
-mtk_cpufreq_get_cpu_power(unsigned long *mW,
-                         unsigned long *KHz, struct device *cpu_dev)
+mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
+                         unsigned long *KHz)
 {
        struct mtk_cpufreq_data *data;
        struct cpufreq_policy *policy;
index 815645170c4deca8239247ddb93ab5befb1b7a6e..039a66bbe1bef2e6c0a1d5931bd3a0b891514b46 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <asm/hw_irq.h>
 #include <asm/io.h>
-#include <asm/prom.h>
 #include <asm/time.h>
 #include <asm/smp.h>
 
index 4f20c6a9108df55b8a768ce411bb33fff70b2bd2..20f64a8b0a354d8a7aad6caa12aff6969a20ba38 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/device.h>
 #include <linux/hardirq.h>
 #include <linux/of_device.h>
-#include <asm/prom.h>
+
 #include <asm/machdep.h>
 #include <asm/irq.h>
 #include <asm/pmac_feature.h>
index d7542a106e6b8e8a02a6a93d2f21eba3939ce1bb..ba9c31d98bd60c3f1f1309c93d70bf368823df3f 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 #include <linux/of_device.h>
-#include <asm/prom.h>
+
 #include <asm/machdep.h>
 #include <asm/irq.h>
 #include <asm/sections.h>
index c58abb4cca3a254b028bec292a888099e8e7c7b0..e3313ce63b38840a9f9df64efc0da08b6a906994 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/of_platform.h>
 
 #include <asm/machdep.h>
-#include <asm/prom.h>
 #include <asm/cell-regs.h>
 
 #include "ppc_cbe_cpufreq.h"
index 037fe23bc6ed0feeb76384126706a24cab82b863..4fba3637b115c184611282d011cf23df6737df37 100644 (file)
@@ -13,9 +13,9 @@
 #include <linux/init.h>
 #include <linux/of_platform.h>
 #include <linux/pm_qos.h>
+#include <linux/slab.h>
 
 #include <asm/processor.h>
-#include <asm/prom.h>
 #include <asm/pmi.h>
 #include <asm/cell-regs.h>
 
index 919fa6e3f4620f2f8452b5e995006358b0761591..6d2a4cf46db708601b6428fd5a04134af6ff2566 100644 (file)
@@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 }
 
 static int __maybe_unused
-scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
-                  struct device *cpu_dev)
+scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
+                  unsigned long *KHz)
 {
        unsigned long Hz;
        int ret, domain;
index 755bbdfc5b82ff67cdd3878b6083113181758a9b..3db4fca1172b4355877426a8a5f1b4aa95f942b7 100644 (file)
@@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
        struct generic_pm_domain *pd;
        struct psci_pd_provider *pd_provider;
        struct dev_power_governor *pd_gov;
-       int ret = -ENOMEM, state_count = 0;
+       int ret = -ENOMEM;
 
        pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
        if (!pd)
@@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
                pd->flags |= GENPD_FLAG_ALWAYS_ON;
 
        /* Use governor for CPU PM domains if it has some states to manage. */
-       pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+       pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
 
        ret = pm_genpd_init(pd, pd_gov, false);
        if (ret)
index b51b5df084500183667dae6adb9a12dc32b76dcf..540105ca0781f1c3bfa42ec0b1f1fa84e5bec55c 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/cpuidle.h>
 
@@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
        return 0;
 }
 
+static void psci_idle_syscore_switch(bool suspend)
+{
+       bool cleared = false;
+       struct device *dev;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev;
+
+               if (dev && suspend) {
+                       dev_pm_genpd_suspend(dev);
+               } else if (dev) {
+                       dev_pm_genpd_resume(dev);
+
+                       /* Account for userspace having offlined a CPU. */
+                       if (pm_runtime_status_suspended(dev))
+                               pm_runtime_set_active(dev);
+
+                       /* Clear domain state to re-start fresh. */
+                       if (!cleared) {
+                               psci_set_domain_state(0);
+                               cleared = true;
+                       }
+               }
+       }
+}
+
+static int psci_idle_syscore_suspend(void)
+{
+       psci_idle_syscore_switch(true);
+       return 0;
+}
+
+static void psci_idle_syscore_resume(void)
+{
+       psci_idle_syscore_switch(false);
+}
+
+static struct syscore_ops psci_idle_syscore_ops = {
+       .suspend = psci_idle_syscore_suspend,
+       .resume = psci_idle_syscore_resume,
+};
+
 static void psci_idle_init_cpuhp(void)
 {
        int err;
@@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void)
        if (!psci_cpuidle_use_cpuhp)
                return;
 
+       register_syscore_ops(&psci_idle_syscore_ops);
+
        err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
                                        "cpuidle/psci:online",
                                        psci_idle_cpuhp_up,
index 5c852e6719924bd74847c75ad6ab15e4496b17f4..1151e5e2ba824c51c4c3ba1bbd822438f97bcca5 100644 (file)
@@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np)
        struct generic_pm_domain *pd;
        struct sbi_pd_provider *pd_provider;
        struct dev_power_governor *pd_gov;
-       int ret = -ENOMEM, state_count = 0;
+       int ret = -ENOMEM;
 
        pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
        if (!pd)
@@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np)
                pd->flags |= GENPD_FLAG_ALWAYS_ON;
 
        /* Use governor for CPU PM domains if it has some states to manage. */
-       pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+       pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
 
        ret = pm_genpd_init(pd, pd_gov, false);
        if (ret)
index 84ea7cba5ee5bf01365c7bde374d7fc9565bd58e..ea9f8b1ae981cf86046a6cbac6af9e51c845d075 100644 (file)
@@ -151,6 +151,9 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
          Selecting this will register the SEC4 hardware rng to
          the hw_random API for supplying the kernel entropy pool.
 
+config CRYPTO_DEV_FSL_CAAM_BLOB_GEN
+       bool
+
 endif # CRYPTO_DEV_FSL_CAAM_JR
 
 endif # CRYPTO_DEV_FSL_CAAM
index 3570286eb9ceea75715e868615f796bdbe704d60..25f7ae5a4642e738a6594954e0709a635716055d 100644 (file)
@@ -21,6 +21,7 @@ caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
 caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
 caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
 caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caampkc.o pkc_desc.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_BLOB_GEN) += blob_gen.o
 
 caam-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += qi.o
 ifneq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI),)
diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c
new file mode 100644 (file)
index 0000000..6345c72
--- /dev/null
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 Pengutronix, Steffen Trumtrar <kernel@pengutronix.de>
+ * Copyright (C) 2021 Pengutronix, Ahmad Fatoum <kernel@pengutronix.de>
+ */
+
+#define pr_fmt(fmt) "caam blob_gen: " fmt
+
+#include <linux/device.h>
+#include <soc/fsl/caam-blob.h>
+
+#include "compat.h"
+#include "desc_constr.h"
+#include "desc.h"
+#include "error.h"
+#include "intern.h"
+#include "jr.h"
+#include "regs.h"
+
+#define CAAM_BLOB_DESC_BYTES_MAX                                       \
+       /* Command to initialize & stating length of descriptor */      \
+       (CAAM_CMD_SZ +                                                  \
+       /* Command to append the key-modifier + key-modifier data */    \
+        CAAM_CMD_SZ + CAAM_BLOB_KEYMOD_LENGTH +                        \
+       /* Command to include input key + pointer to the input key */   \
+        CAAM_CMD_SZ + CAAM_PTR_SZ_MAX +                                \
+       /* Command to include output key + pointer to the output key */ \
+        CAAM_CMD_SZ + CAAM_PTR_SZ_MAX +                                \
+       /* Command describing the operation to perform */               \
+        CAAM_CMD_SZ)
+
+struct caam_blob_priv {
+       struct device jrdev;
+};
+
+struct caam_blob_job_result {
+       int err;
+       struct completion completion;
+};
+
+static void caam_blob_job_done(struct device *dev, u32 *desc, u32 err, void *context)
+{
+       struct caam_blob_job_result *res = context;
+       int ecode = 0;
+
+       dev_dbg(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+
+       if (err)
+               ecode = caam_jr_strstatus(dev, err);
+
+       res->err = ecode;
+
+       /*
+        * Upon completion, desc points to a buffer containing a CAAM job
+        * descriptor which encapsulates data into an externally-storable
+        * blob.
+        */
+       complete(&res->completion);
+}
+
+int caam_process_blob(struct caam_blob_priv *priv,
+                     struct caam_blob_info *info, bool encap)
+{
+       struct caam_blob_job_result testres;
+       struct device *jrdev = &priv->jrdev;
+       dma_addr_t dma_in, dma_out;
+       int op = OP_PCLID_BLOB;
+       size_t output_len;
+       u32 *desc;
+       int ret;
+
+       if (info->key_mod_len > CAAM_BLOB_KEYMOD_LENGTH)
+               return -EINVAL;
+
+       if (encap) {
+               op |= OP_TYPE_ENCAP_PROTOCOL;
+               output_len = info->input_len + CAAM_BLOB_OVERHEAD;
+       } else {
+               op |= OP_TYPE_DECAP_PROTOCOL;
+               output_len = info->input_len - CAAM_BLOB_OVERHEAD;
+       }
+
+       desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL | GFP_DMA);
+       if (!desc)
+               return -ENOMEM;
+
+       dma_in = dma_map_single(jrdev, info->input, info->input_len,
+                               DMA_TO_DEVICE);
+       if (dma_mapping_error(jrdev, dma_in)) {
+               dev_err(jrdev, "unable to map input DMA buffer\n");
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       dma_out = dma_map_single(jrdev, info->output, output_len,
+                                DMA_FROM_DEVICE);
+       if (dma_mapping_error(jrdev, dma_out)) {
+               dev_err(jrdev, "unable to map output DMA buffer\n");
+               ret = -ENOMEM;
+               goto out_unmap_in;
+       }
+
+       /*
+        * A data blob is encrypted using a blob key (BK); a random number.
+        * The BK is used as an AES-CCM key. The initial block (B0) and the
+        * initial counter (Ctr0) are generated automatically and stored in
+        * Class 1 Context DWords 0+1+2+3. The random BK is stored in the
+        * Class 1 Key Register. Operation Mode is set to AES-CCM.
+        */
+
+       init_job_desc(desc, 0);
+       append_key_as_imm(desc, info->key_mod, info->key_mod_len,
+                         info->key_mod_len, CLASS_2 | KEY_DEST_CLASS_REG);
+       append_seq_in_ptr_intlen(desc, dma_in, info->input_len, 0);
+       append_seq_out_ptr_intlen(desc, dma_out, output_len, 0);
+       append_operation(desc, op);
+
+       print_hex_dump_debug("data@"__stringify(__LINE__)": ",
+                            DUMP_PREFIX_ADDRESS, 16, 1, info->input,
+                            info->input_len, false);
+       print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+                            DUMP_PREFIX_ADDRESS, 16, 1, desc,
+                            desc_bytes(desc), false);
+
+       testres.err = 0;
+       init_completion(&testres.completion);
+
+       ret = caam_jr_enqueue(jrdev, desc, caam_blob_job_done, &testres);
+       if (ret == -EINPROGRESS) {
+               wait_for_completion(&testres.completion);
+               ret = testres.err;
+               print_hex_dump_debug("output@"__stringify(__LINE__)": ",
+                                    DUMP_PREFIX_ADDRESS, 16, 1, info->output,
+                                    output_len, false);
+       }
+
+       if (ret == 0)
+               info->output_len = output_len;
+
+       dma_unmap_single(jrdev, dma_out, output_len, DMA_FROM_DEVICE);
+out_unmap_in:
+       dma_unmap_single(jrdev, dma_in, info->input_len, DMA_TO_DEVICE);
+out_free:
+       kfree(desc);
+
+       return ret;
+}
+EXPORT_SYMBOL(caam_process_blob);
+
+struct caam_blob_priv *caam_blob_gen_init(void)
+{
+       struct caam_drv_private *ctrlpriv;
+       struct device *jrdev;
+
+       /*
+        * caam_blob_gen_init() may expectedly fail with -ENODEV, e.g. when
+        * CAAM driver didn't probe or when SoC lacks BLOB support. An
+        * error would be harsh in this case, so we stick to info level.
+        */
+
+       jrdev = caam_jr_alloc();
+       if (IS_ERR(jrdev)) {
+               pr_info("job ring requested, but none currently available\n");
+               return ERR_PTR(-ENODEV);
+       }
+
+       ctrlpriv = dev_get_drvdata(jrdev->parent);
+       if (!ctrlpriv->blob_present) {
+               dev_info(jrdev, "no hardware blob generation support\n");
+               caam_jr_free(jrdev);
+               return ERR_PTR(-ENODEV);
+       }
+
+       return container_of(jrdev, struct caam_blob_priv, jrdev);
+}
+EXPORT_SYMBOL(caam_blob_gen_init);
+
+void caam_blob_gen_exit(struct caam_blob_priv *priv)
+{
+       caam_jr_free(&priv->jrdev);
+}
+EXPORT_SYMBOL(caam_blob_gen_exit);
index ca0361b2dbb07cc4bf4c2325c5231f76c98456b6..38c4d88a9d03d7fe0f0285fffc538cf30833dbdd 100644 (file)
@@ -820,12 +820,25 @@ static int caam_probe(struct platform_device *pdev)
                return -ENOMEM;
        }
 
-       if (ctrlpriv->era < 10)
+       comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ls);
+       ctrlpriv->blob_present = !!(comp_params & CTPR_LS_BLOB);
+
+       /*
+        * Some SoCs like the LS1028A (non-E) indicate CTPR_LS_BLOB support,
+        * but fail when actually using it due to missing AES support, so
+        * check both here.
+        */
+       if (ctrlpriv->era < 10) {
                rng_vid = (rd_reg32(&ctrl->perfmon.cha_id_ls) &
                           CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT;
-       else
+               ctrlpriv->blob_present = ctrlpriv->blob_present &&
+                       (rd_reg32(&ctrl->perfmon.cha_num_ls) & CHA_ID_LS_AES_MASK);
+       } else {
                rng_vid = (rd_reg32(&ctrl->vreg.rng) & CHA_VER_VID_MASK) >>
                           CHA_VER_VID_SHIFT;
+               ctrlpriv->blob_present = ctrlpriv->blob_present &&
+                       (rd_reg32(&ctrl->vreg.aesa) & CHA_VER_MISC_AES_NUM_MASK);
+       }
 
        /*
         * If SEC has RNG version >= 4 and RNG state handle has not been
index 7d45b21bd55ade8164d226fad03242941993735c..e92210e2ab76339f370504477dea57d0770d5960 100644 (file)
@@ -92,6 +92,7 @@ struct caam_drv_private {
         */
        u8 total_jobrs;         /* Total Job Rings in device */
        u8 qi_present;          /* Nonzero if QI present in device */
+       u8 blob_present;        /* Nonzero if BLOB support present in device */
        u8 mc_en;               /* Nonzero if MC f/w is active */
        int secvio_irq;         /* Security violation interrupt number */
        int virt_en;            /* Virtualization enabled in CAAM */
index 3738625c025092840a96e9528e7bbf250ce1cb3a..66d6dad841bb2aa5d4f2871c4dfe990cbd9e60f0 100644 (file)
@@ -320,7 +320,8 @@ struct version_regs {
 #define CHA_VER_VID_MASK       (0xffull << CHA_VER_VID_SHIFT)
 
 /* CHA Miscellaneous Information - AESA_MISC specific */
-#define CHA_VER_MISC_AES_GCM   BIT(1 + CHA_VER_MISC_SHIFT)
+#define CHA_VER_MISC_AES_NUM_MASK      GENMASK(7, 0)
+#define CHA_VER_MISC_AES_GCM           BIT(1 + CHA_VER_MISC_SHIFT)
 
 /* CHA Miscellaneous Information - PKHA_MISC specific */
 #define CHA_VER_MISC_PKHA_NO_CRYPT     BIT(7 + CHA_VER_MISC_SHIFT)
@@ -414,6 +415,7 @@ struct caam_perfmon {
 #define CTPR_MS_PG_SZ_MASK     0x10
 #define CTPR_MS_PG_SZ_SHIFT    4
        u32 comp_parms_ms;      /* CTPR - Compile Parameters Register   */
+#define CTPR_LS_BLOB           BIT(1)
        u32 comp_parms_ls;      /* CTPR - Compile Parameters Register   */
        u64 rsvd1[2];
 
index 11f30fd48c1414780006ec57b7fca020ce1e891d..031b5f701a0a35b40316be71ae41eeb02f06dcff 100644 (file)
@@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
                } else {
                        /* copy only remaining bytes */
                        memcpy(data, &val, max - currsize);
+                       break;
                }
        } while (currsize < max);
 
index a525a609dfc6096b4837649935f5cb664564cded..01474daf45483af90a96e353eac87a31bef162f4 100644 (file)
@@ -112,16 +112,16 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
 }
 
 /**
- * get_freq_range() - Get the current freq range
+ * devfreq_get_freq_range() - Get the current freq range
  * @devfreq:   the devfreq instance
  * @min_freq:  the min frequency
  * @max_freq:  the max frequency
  *
  * This takes into consideration all constraints.
  */
-static void get_freq_range(struct devfreq *devfreq,
-                          unsigned long *min_freq,
-                          unsigned long *max_freq)
+void devfreq_get_freq_range(struct devfreq *devfreq,
+                           unsigned long *min_freq,
+                           unsigned long *max_freq)
 {
        unsigned long *freq_table = devfreq->profile->freq_table;
        s32 qos_min_freq, qos_max_freq;
@@ -158,6 +158,7 @@ static void get_freq_range(struct devfreq *devfreq,
        if (*min_freq > *max_freq)
                *min_freq = *max_freq;
 }
+EXPORT_SYMBOL(devfreq_get_freq_range);
 
 /**
  * devfreq_get_freq_level() - Lookup freq_table for the frequency
@@ -418,7 +419,7 @@ int devfreq_update_target(struct devfreq *devfreq, unsigned long freq)
        err = devfreq->governor->get_target_freq(devfreq, &freq);
        if (err)
                return err;
-       get_freq_range(devfreq, &min_freq, &max_freq);
+       devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
 
        if (freq < min_freq) {
                freq = min_freq;
@@ -785,6 +786,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 {
        struct devfreq *devfreq;
        struct devfreq_governor *governor;
+       unsigned long min_freq, max_freq;
        int err = 0;
 
        if (!dev || !profile || !governor_name) {
@@ -849,6 +851,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
                goto err_dev;
        }
 
+       devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
+
        devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
        devfreq->opp_table = dev_pm_opp_get_opp_table(dev);
        if (IS_ERR(devfreq->opp_table))
@@ -1587,7 +1591,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
        unsigned long min_freq, max_freq;
 
        mutex_lock(&df->lock);
-       get_freq_range(df, &min_freq, &max_freq);
+       devfreq_get_freq_range(df, &min_freq, &max_freq);
        mutex_unlock(&df->lock);
 
        return sprintf(buf, "%lu\n", min_freq);
@@ -1641,7 +1645,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
        unsigned long min_freq, max_freq;
 
        mutex_lock(&df->lock);
-       get_freq_range(df, &min_freq, &max_freq);
+       devfreq_get_freq_range(df, &min_freq, &max_freq);
        mutex_unlock(&df->lock);
 
        return sprintf(buf, "%lu\n", max_freq);
@@ -1955,7 +1959,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data)
 
                mutex_lock(&devfreq->lock);
                cur_freq = devfreq->previous_freq;
-               get_freq_range(devfreq, &min_freq, &max_freq);
+               devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
                timer = devfreq->profile->timer;
 
                if (IS_SUPPORTED_ATTR(devfreq->governor->attrs, POLLING_INTERVAL))
index 002a7d67e39d469b421f3f5f2d6413ea2400340b..0adfebc0467a3db39278814fa66d2b1f25d61f7a 100644 (file)
 #define DEVFREQ_GOV_ATTR_POLLING_INTERVAL              BIT(0)
 #define DEVFREQ_GOV_ATTR_TIMER                         BIT(1)
 
+/**
+ * struct devfreq_cpu_data - Hold the per-cpu data
+ * @node:      list node
+ * @dev:       reference to cpu device.
+ * @first_cpu: the cpumask of the first cpu of a policy.
+ * @opp_table: reference to cpu opp table.
+ * @cur_freq:  the current frequency of the cpu.
+ * @min_freq:  the min frequency of the cpu.
+ * @max_freq:  the max frequency of the cpu.
+ *
+ * This structure stores the required cpu_data of a cpu.
+ * This is auto-populated by the governor.
+ */
+struct devfreq_cpu_data {
+       struct list_head node;
+
+       struct device *dev;
+       unsigned int first_cpu;
+
+       struct opp_table *opp_table;
+       unsigned int cur_freq;
+       unsigned int min_freq;
+       unsigned int max_freq;
+};
+
 /**
  * struct devfreq_governor - Devfreq policy governor
  * @node:              list node - contains registered devfreq governors
@@ -89,6 +114,8 @@ int devm_devfreq_add_governor(struct device *dev,
 
 int devfreq_update_status(struct devfreq *devfreq, unsigned long freq);
 int devfreq_update_target(struct devfreq *devfreq, unsigned long freq);
+void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq,
+                           unsigned long *max_freq);
 
 static inline int devfreq_update_stats(struct devfreq *df)
 {
index fc09324a03e03fd8d6ed841f80cd0e019415f59f..72c67979ebe11217b9abd66c39222d5c37eab9ef 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+       // SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/drivers/devfreq/governor_passive.c
  *
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
 #include <linux/device.h>
 #include <linux/devfreq.h>
 #include "governor.h"
 
-static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
-                                       unsigned long *freq)
+#define HZ_PER_KHZ     1000
+
+static struct devfreq_cpu_data *
+get_parent_cpu_data(struct devfreq_passive_data *p_data,
+                   struct cpufreq_policy *policy)
 {
-       struct devfreq_passive_data *p_data
-                       = (struct devfreq_passive_data *)devfreq->data;
-       struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
-       unsigned long child_freq = ULONG_MAX;
-       struct dev_pm_opp *opp, *p_opp;
-       int i, count;
+       struct devfreq_cpu_data *parent_cpu_data;
 
-       /*
-        * If the devfreq device with passive governor has the specific method
-        * to determine the next frequency, should use the get_target_freq()
-        * of struct devfreq_passive_data.
-        */
-       if (p_data->get_target_freq)
-               return p_data->get_target_freq(devfreq, freq);
+       if (!p_data || !policy)
+               return NULL;
 
-       /*
-        * If the parent and passive devfreq device uses the OPP table,
-        * get the next frequency by using the OPP table.
-        */
+       list_for_each_entry(parent_cpu_data, &p_data->cpu_data_list, node)
+               if (parent_cpu_data->first_cpu == cpumask_first(policy->related_cpus))
+                       return parent_cpu_data;
 
-       /*
-        * - parent devfreq device uses the governors except for passive.
-        * - passive devfreq device uses the passive governor.
-        *
-        * Each devfreq has the OPP table. After deciding the new frequency
-        * from the governor of parent devfreq device, the passive governor
-        * need to get the index of new frequency on OPP table of parent
-        * device. And then the index is used for getting the suitable
-        * new frequency for passive devfreq device.
-        */
-       if (!devfreq->profile || !devfreq->profile->freq_table
-               || devfreq->profile->max_state <= 0)
-               return -EINVAL;
+       return NULL;
+}
 
-       /*
-        * The passive governor have to get the correct frequency from OPP
-        * list of parent device. Because in this case, *freq is temporary
-        * value which is decided by ondemand governor.
-        */
-       if (devfreq->opp_table && parent_devfreq->opp_table) {
-               p_opp = devfreq_recommended_opp(parent_devfreq->dev.parent,
-                                               freq, 0);
-               if (IS_ERR(p_opp))
-                       return PTR_ERR(p_opp);
+static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
+                                               struct opp_table *p_opp_table,
+                                               struct opp_table *opp_table,
+                                               unsigned long *freq)
+{
+       struct dev_pm_opp *opp = NULL, *p_opp = NULL;
+       unsigned long target_freq;
 
-               opp = dev_pm_opp_xlate_required_opp(parent_devfreq->opp_table,
-                                                   devfreq->opp_table, p_opp);
-               dev_pm_opp_put(p_opp);
+       if (!p_dev || !p_opp_table || !opp_table || !freq)
+               return 0;
 
-               if (IS_ERR(opp))
-                       goto no_required_opp;
+       p_opp = devfreq_recommended_opp(p_dev, freq, 0);
+       if (IS_ERR(p_opp))
+               return 0;
 
-               *freq = dev_pm_opp_get_freq(opp);
-               dev_pm_opp_put(opp);
+       opp = dev_pm_opp_xlate_required_opp(p_opp_table, opp_table, p_opp);
+       dev_pm_opp_put(p_opp);
 
+       if (IS_ERR(opp))
                return 0;
+
+       target_freq = dev_pm_opp_get_freq(opp);
+       dev_pm_opp_put(opp);
+
+       return target_freq;
+}
+
+static int get_target_freq_with_cpufreq(struct devfreq *devfreq,
+                                       unsigned long *target_freq)
+{
+       struct devfreq_passive_data *p_data =
+                               (struct devfreq_passive_data *)devfreq->data;
+       struct devfreq_cpu_data *parent_cpu_data;
+       struct cpufreq_policy *policy;
+       unsigned long cpu, cpu_cur, cpu_min, cpu_max, cpu_percent;
+       unsigned long dev_min, dev_max;
+       unsigned long freq = 0;
+       int ret = 0;
+
+       for_each_online_cpu(cpu) {
+               policy = cpufreq_cpu_get(cpu);
+               if (!policy) {
+                       ret = -EINVAL;
+                       continue;
+               }
+
+               parent_cpu_data = get_parent_cpu_data(p_data, policy);
+               if (!parent_cpu_data) {
+                       cpufreq_cpu_put(policy);
+                       continue;
+               }
+
+               /* Get target freq via required opps */
+               cpu_cur = parent_cpu_data->cur_freq * HZ_PER_KHZ;
+               freq = get_target_freq_by_required_opp(parent_cpu_data->dev,
+                                       parent_cpu_data->opp_table,
+                                       devfreq->opp_table, &cpu_cur);
+               if (freq) {
+                       *target_freq = max(freq, *target_freq);
+                       cpufreq_cpu_put(policy);
+                       continue;
+               }
+
+               /* Use interpolation if required opps is not available */
+               devfreq_get_freq_range(devfreq, &dev_min, &dev_max);
+
+               cpu_min = parent_cpu_data->min_freq;
+               cpu_max = parent_cpu_data->max_freq;
+               cpu_cur = parent_cpu_data->cur_freq;
+
+               cpu_percent = ((cpu_cur - cpu_min) * 100) / (cpu_max - cpu_min);
+               freq = dev_min + mult_frac(dev_max - dev_min, cpu_percent, 100);
+
+               *target_freq = max(freq, *target_freq);
+               cpufreq_cpu_put(policy);
        }
 
-no_required_opp:
-       /*
-        * Get the OPP table's index of decided frequency by governor
-        * of parent device.
-        */
+       return ret;
+}
+
+static int get_target_freq_with_devfreq(struct devfreq *devfreq,
+                                       unsigned long *freq)
+{
+       struct devfreq_passive_data *p_data
+                       = (struct devfreq_passive_data *)devfreq->data;
+       struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
+       unsigned long child_freq = ULONG_MAX;
+       int i, count;
+
+       /* Get target freq via required opps */
+       child_freq = get_target_freq_by_required_opp(parent_devfreq->dev.parent,
+                                               parent_devfreq->opp_table,
+                                               devfreq->opp_table, freq);
+       if (child_freq)
+               goto out;
+
+       /* Use interpolation if required opps is not available */
        for (i = 0; i < parent_devfreq->profile->max_state; i++)
                if (parent_devfreq->profile->freq_table[i] == *freq)
                        break;
@@ -85,7 +138,6 @@ no_required_opp:
        if (i == parent_devfreq->profile->max_state)
                return -EINVAL;
 
-       /* Get the suitable frequency by using index of parent device. */
        if (i < devfreq->profile->max_state) {
                child_freq = devfreq->profile->freq_table[i];
        } else {
@@ -93,12 +145,202 @@ no_required_opp:
                child_freq = devfreq->profile->freq_table[count - 1];
        }
 
-       /* Return the suitable frequency for passive device. */
+out:
        *freq = child_freq;
 
        return 0;
 }
 
+static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
+                                          unsigned long *freq)
+{
+       struct devfreq_passive_data *p_data =
+                               (struct devfreq_passive_data *)devfreq->data;
+       int ret;
+
+       if (!p_data)
+               return -EINVAL;
+
+       /*
+        * If the devfreq device with passive governor has the specific method
+        * to determine the next frequency, should use the get_target_freq()
+        * of struct devfreq_passive_data.
+        */
+       if (p_data->get_target_freq)
+               return p_data->get_target_freq(devfreq, freq);
+
+       switch (p_data->parent_type) {
+       case DEVFREQ_PARENT_DEV:
+               ret = get_target_freq_with_devfreq(devfreq, freq);
+               break;
+       case CPUFREQ_PARENT_DEV:
+               ret = get_target_freq_with_cpufreq(devfreq, freq);
+               break;
+       default:
+               ret = -EINVAL;
+               dev_err(&devfreq->dev, "Invalid parent type\n");
+               break;
+       }
+
+       return ret;
+}
+
+static int cpufreq_passive_notifier_call(struct notifier_block *nb,
+                                        unsigned long event, void *ptr)
+{
+       struct devfreq_passive_data *p_data =
+                       container_of(nb, struct devfreq_passive_data, nb);
+       struct devfreq *devfreq = (struct devfreq *)p_data->this;
+       struct devfreq_cpu_data *parent_cpu_data;
+       struct cpufreq_freqs *freqs = ptr;
+       unsigned int cur_freq;
+       int ret;
+
+       if (event != CPUFREQ_POSTCHANGE || !freqs)
+               return 0;
+
+       parent_cpu_data = get_parent_cpu_data(p_data, freqs->policy);
+       if (!parent_cpu_data || parent_cpu_data->cur_freq == freqs->new)
+               return 0;
+
+       cur_freq = parent_cpu_data->cur_freq;
+       parent_cpu_data->cur_freq = freqs->new;
+
+       mutex_lock(&devfreq->lock);
+       ret = devfreq_update_target(devfreq, freqs->new);
+       mutex_unlock(&devfreq->lock);
+       if (ret) {
+               parent_cpu_data->cur_freq = cur_freq;
+               dev_err(&devfreq->dev, "failed to update the frequency.\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
+{
+       struct devfreq_passive_data *p_data
+                       = (struct devfreq_passive_data *)devfreq->data;
+       struct devfreq_cpu_data *parent_cpu_data;
+       int cpu, ret = 0;
+
+       if (p_data->nb.notifier_call) {
+               ret = cpufreq_unregister_notifier(&p_data->nb,
+                                       CPUFREQ_TRANSITION_NOTIFIER);
+               if (ret < 0)
+                       return ret;
+       }
+
+       for_each_possible_cpu(cpu) {
+               struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+               if (!policy) {
+                       ret = -EINVAL;
+                       continue;
+               }
+
+               parent_cpu_data = get_parent_cpu_data(p_data, policy);
+               if (!parent_cpu_data) {
+                       cpufreq_cpu_put(policy);
+                       continue;
+               }
+
+               list_del(&parent_cpu_data->node);
+               if (parent_cpu_data->opp_table)
+                       dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
+               kfree(parent_cpu_data);
+               cpufreq_cpu_put(policy);
+       }
+
+       return ret;
+}
+
+static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
+{
+       struct devfreq_passive_data *p_data
+                       = (struct devfreq_passive_data *)devfreq->data;
+       struct device *dev = devfreq->dev.parent;
+       struct opp_table *opp_table = NULL;
+       struct devfreq_cpu_data *parent_cpu_data;
+       struct cpufreq_policy *policy;
+       struct device *cpu_dev;
+       unsigned int cpu;
+       int ret;
+
+       p_data->cpu_data_list
+               = (struct list_head)LIST_HEAD_INIT(p_data->cpu_data_list);
+
+       p_data->nb.notifier_call = cpufreq_passive_notifier_call;
+       ret = cpufreq_register_notifier(&p_data->nb, CPUFREQ_TRANSITION_NOTIFIER);
+       if (ret) {
+               dev_err(dev, "failed to register cpufreq notifier\n");
+               p_data->nb.notifier_call = NULL;
+               goto err;
+       }
+
+       for_each_possible_cpu(cpu) {
+               policy = cpufreq_cpu_get(cpu);
+               if (!policy) {
+                       ret = -EPROBE_DEFER;
+                       goto err;
+               }
+
+               parent_cpu_data = get_parent_cpu_data(p_data, policy);
+               if (parent_cpu_data) {
+                       cpufreq_cpu_put(policy);
+                       continue;
+               }
+
+               parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
+                                               GFP_KERNEL);
+               if (!parent_cpu_data) {
+                       ret = -ENOMEM;
+                       goto err_put_policy;
+               }
+
+               cpu_dev = get_cpu_device(cpu);
+               if (!cpu_dev) {
+                       dev_err(dev, "failed to get cpu device\n");
+                       ret = -ENODEV;
+                       goto err_free_cpu_data;
+               }
+
+               opp_table = dev_pm_opp_get_opp_table(cpu_dev);
+               if (IS_ERR(opp_table)) {
+                       dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
+                       ret = PTR_ERR(opp_table);
+                       goto err_free_cpu_data;
+               }
+
+               parent_cpu_data->dev = cpu_dev;
+               parent_cpu_data->opp_table = opp_table;
+               parent_cpu_data->first_cpu = cpumask_first(policy->related_cpus);
+               parent_cpu_data->cur_freq = policy->cur;
+               parent_cpu_data->min_freq = policy->cpuinfo.min_freq;
+               parent_cpu_data->max_freq = policy->cpuinfo.max_freq;
+
+               list_add_tail(&parent_cpu_data->node, &p_data->cpu_data_list);
+               cpufreq_cpu_put(policy);
+       }
+
+       mutex_lock(&devfreq->lock);
+       ret = devfreq_update_target(devfreq, 0L);
+       mutex_unlock(&devfreq->lock);
+       if (ret)
+               dev_err(dev, "failed to update the frequency\n");
+
+       return ret;
+
+err_free_cpu_data:
+       kfree(parent_cpu_data);
+err_put_policy:
+       cpufreq_cpu_put(policy);
+err:
+       WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
+
+       return ret;
+}
+
 static int devfreq_passive_notifier_call(struct notifier_block *nb,
                                unsigned long event, void *ptr)
 {
@@ -131,30 +373,55 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb,
        return NOTIFY_DONE;
 }
 
-static int devfreq_passive_event_handler(struct devfreq *devfreq,
-                               unsigned int event, void *data)
+static int devfreq_passive_unregister_notifier(struct devfreq *devfreq)
+{
+       struct devfreq_passive_data *p_data
+                       = (struct devfreq_passive_data *)devfreq->data;
+       struct devfreq *parent = (struct devfreq *)p_data->parent;
+       struct notifier_block *nb = &p_data->nb;
+
+       return devfreq_unregister_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER);
+}
+
+static int devfreq_passive_register_notifier(struct devfreq *devfreq)
 {
        struct devfreq_passive_data *p_data
                        = (struct devfreq_passive_data *)devfreq->data;
        struct devfreq *parent = (struct devfreq *)p_data->parent;
        struct notifier_block *nb = &p_data->nb;
-       int ret = 0;
 
        if (!parent)
                return -EPROBE_DEFER;
 
+       nb->notifier_call = devfreq_passive_notifier_call;
+       return devfreq_register_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER);
+}
+
+static int devfreq_passive_event_handler(struct devfreq *devfreq,
+                               unsigned int event, void *data)
+{
+       struct devfreq_passive_data *p_data
+                       = (struct devfreq_passive_data *)devfreq->data;
+       int ret = 0;
+
+       if (!p_data)
+               return -EINVAL;
+
+       if (!p_data->this)
+               p_data->this = devfreq;
+
        switch (event) {
        case DEVFREQ_GOV_START:
-               if (!p_data->this)
-                       p_data->this = devfreq;
-
-               nb->notifier_call = devfreq_passive_notifier_call;
-               ret = devfreq_register_notifier(parent, nb,
-                                       DEVFREQ_TRANSITION_NOTIFIER);
+               if (p_data->parent_type == DEVFREQ_PARENT_DEV)
+                       ret = devfreq_passive_register_notifier(devfreq);
+               else if (p_data->parent_type == CPUFREQ_PARENT_DEV)
+                       ret = cpufreq_passive_register_notifier(devfreq);
                break;
        case DEVFREQ_GOV_STOP:
-               WARN_ON(devfreq_unregister_notifier(parent, nb,
-                                       DEVFREQ_TRANSITION_NOTIFIER));
+               if (p_data->parent_type == DEVFREQ_PARENT_DEV)
+                       WARN_ON(devfreq_passive_unregister_notifier(devfreq));
+               else if (p_data->parent_type == CPUFREQ_PARENT_DEV)
+                       WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
                break;
        default:
                break;
index 293857ebfd75ddb56933b75a0aeb8bce94434d3e..daff407026157bf286214ad63e15acc9b40ac953 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include <linux/arm-smccc.h>
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/devfreq.h>
 #include <linux/rwsem.h>
 #include <linux/suspend.h>
 
+#include <soc/rockchip/pm_domains.h>
 #include <soc/rockchip/rk3399_grf.h>
 #include <soc/rockchip/rockchip_sip.h>
 
-struct dram_timing {
-       unsigned int ddr3_speed_bin;
-       unsigned int pd_idle;
-       unsigned int sr_idle;
-       unsigned int sr_mc_gate_idle;
-       unsigned int srpd_lite_idle;
-       unsigned int standby_idle;
-       unsigned int auto_pd_dis_freq;
-       unsigned int dram_dll_dis_freq;
-       unsigned int phy_dll_dis_freq;
-       unsigned int ddr3_odt_dis_freq;
-       unsigned int ddr3_drv;
-       unsigned int ddr3_odt;
-       unsigned int phy_ddr3_ca_drv;
-       unsigned int phy_ddr3_dq_drv;
-       unsigned int phy_ddr3_odt;
-       unsigned int lpddr3_odt_dis_freq;
-       unsigned int lpddr3_drv;
-       unsigned int lpddr3_odt;
-       unsigned int phy_lpddr3_ca_drv;
-       unsigned int phy_lpddr3_dq_drv;
-       unsigned int phy_lpddr3_odt;
-       unsigned int lpddr4_odt_dis_freq;
-       unsigned int lpddr4_drv;
-       unsigned int lpddr4_dq_odt;
-       unsigned int lpddr4_ca_odt;
-       unsigned int phy_lpddr4_ca_drv;
-       unsigned int phy_lpddr4_ck_cs_drv;
-       unsigned int phy_lpddr4_dq_drv;
-       unsigned int phy_lpddr4_odt;
-};
+#define NS_TO_CYCLE(NS, MHz)                           (((NS) * (MHz)) / NSEC_PER_USEC)
+
+#define RK3399_SET_ODT_PD_0_SR_IDLE                    GENMASK(7, 0)
+#define RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE            GENMASK(15, 8)
+#define RK3399_SET_ODT_PD_0_STANDBY_IDLE               GENMASK(31, 16)
+
+#define RK3399_SET_ODT_PD_1_PD_IDLE                    GENMASK(11, 0)
+#define RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE             GENMASK(27, 16)
+
+#define RK3399_SET_ODT_PD_2_ODT_ENABLE                 BIT(0)
 
 struct rk3399_dmcfreq {
        struct device *dev;
        struct devfreq *devfreq;
+       struct devfreq_dev_profile profile;
        struct devfreq_simple_ondemand_data ondemand_data;
        struct clk *dmc_clk;
        struct devfreq_event_dev *edev;
        struct mutex lock;
-       struct dram_timing timing;
        struct regulator *vdd_center;
        struct regmap *regmap_pmu;
        unsigned long rate, target_rate;
        unsigned long volt, target_volt;
        unsigned int odt_dis_freq;
-       int odt_pd_arg0, odt_pd_arg1;
+
+       unsigned int pd_idle_ns;
+       unsigned int sr_idle_ns;
+       unsigned int sr_mc_gate_idle_ns;
+       unsigned int srpd_lite_idle_ns;
+       unsigned int standby_idle_ns;
+       unsigned int ddr3_odt_dis_freq;
+       unsigned int lpddr3_odt_dis_freq;
+       unsigned int lpddr4_odt_dis_freq;
+
+       unsigned int pd_idle_dis_freq;
+       unsigned int sr_idle_dis_freq;
+       unsigned int sr_mc_gate_idle_dis_freq;
+       unsigned int srpd_lite_idle_dis_freq;
+       unsigned int standby_idle_dis_freq;
 };
 
 static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
@@ -78,10 +73,14 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
        struct dev_pm_opp *opp;
        unsigned long old_clk_rate = dmcfreq->rate;
        unsigned long target_volt, target_rate;
+       unsigned int ddrcon_mhz;
        struct arm_smccc_res res;
-       bool odt_enable = false;
        int err;
 
+       u32 odt_pd_arg0 = 0;
+       u32 odt_pd_arg1 = 0;
+       u32 odt_pd_arg2 = 0;
+
        opp = devfreq_recommended_opp(dev, freq, flags);
        if (IS_ERR(opp))
                return PTR_ERR(opp);
@@ -95,19 +94,71 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
 
        mutex_lock(&dmcfreq->lock);
 
+       /*
+        * Ensure power-domain transitions don't interfere with ARM Trusted
+        * Firmware power-domain idling.
+        */
+       err = rockchip_pmu_block();
+       if (err) {
+               dev_err(dev, "Failed to block PMU: %d\n", err);
+               goto out_unlock;
+       }
+
+       /*
+        * Some idle parameters may be based on the DDR controller clock, which
+        * is half of the DDR frequency.
+        * pd_idle and standby_idle are based on the controller clock cycle.
+        * sr_idle_cycle, sr_mc_gate_idle_cycle, and srpd_lite_idle_cycle
+        * are based on the 1024 controller clock cycle
+        */
+       ddrcon_mhz = target_rate / USEC_PER_SEC / 2;
+
+       u32p_replace_bits(&odt_pd_arg1,
+                         NS_TO_CYCLE(dmcfreq->pd_idle_ns, ddrcon_mhz),
+                         RK3399_SET_ODT_PD_1_PD_IDLE);
+       u32p_replace_bits(&odt_pd_arg0,
+                         NS_TO_CYCLE(dmcfreq->standby_idle_ns, ddrcon_mhz),
+                         RK3399_SET_ODT_PD_0_STANDBY_IDLE);
+       u32p_replace_bits(&odt_pd_arg0,
+                         DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_idle_ns,
+                                                  ddrcon_mhz), 1024),
+                         RK3399_SET_ODT_PD_0_SR_IDLE);
+       u32p_replace_bits(&odt_pd_arg0,
+                         DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_mc_gate_idle_ns,
+                                                  ddrcon_mhz), 1024),
+                         RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE);
+       u32p_replace_bits(&odt_pd_arg1,
+                         DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->srpd_lite_idle_ns,
+                                                  ddrcon_mhz), 1024),
+                         RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE);
+
        if (dmcfreq->regmap_pmu) {
+               if (target_rate >= dmcfreq->sr_idle_dis_freq)
+                       odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_IDLE;
+
+               if (target_rate >= dmcfreq->sr_mc_gate_idle_dis_freq)
+                       odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE;
+
+               if (target_rate >= dmcfreq->standby_idle_dis_freq)
+                       odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_STANDBY_IDLE;
+
+               if (target_rate >= dmcfreq->pd_idle_dis_freq)
+                       odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_PD_IDLE;
+
+               if (target_rate >= dmcfreq->srpd_lite_idle_dis_freq)
+                       odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE;
+
                if (target_rate >= dmcfreq->odt_dis_freq)
-                       odt_enable = true;
+                       odt_pd_arg2 |= RK3399_SET_ODT_PD_2_ODT_ENABLE;
 
                /*
                 * This makes a SMC call to the TF-A to set the DDR PD
                 * (power-down) timings and to enable or disable the
                 * ODT (on-die termination) resistors.
                 */
-               arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
-                             dmcfreq->odt_pd_arg1,
-                             ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
-                             odt_enable, 0, 0, 0, &res);
+               arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, odt_pd_arg0, odt_pd_arg1,
+                             ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, odt_pd_arg2,
+                             0, 0, 0, &res);
        }
 
        /*
@@ -158,6 +209,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
        dmcfreq->volt = target_volt;
 
 out:
+       rockchip_pmu_unblock();
+out_unlock:
        mutex_unlock(&dmcfreq->lock);
        return err;
 }
@@ -189,13 +242,6 @@ static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq)
        return 0;
 }
 
-static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = {
-       .polling_ms     = 200,
-       .target         = rk3399_dmcfreq_target,
-       .get_dev_status = rk3399_dmcfreq_get_dev_status,
-       .get_cur_freq   = rk3399_dmcfreq_get_cur_freq,
-};
-
 static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev)
 {
        struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
@@ -238,69 +284,48 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
                         rk3399_dmcfreq_resume);
 
-static int of_get_ddr_timings(struct dram_timing *timing,
-                             struct device_node *np)
+static int rk3399_dmcfreq_of_props(struct rk3399_dmcfreq *data,
+                                  struct device_node *np)
 {
        int ret = 0;
 
-       ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin",
-                                  &timing->ddr3_speed_bin);
-       ret |= of_property_read_u32(np, "rockchip,pd_idle",
-                                   &timing->pd_idle);
-       ret |= of_property_read_u32(np, "rockchip,sr_idle",
-                                   &timing->sr_idle);
-       ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle",
-                                   &timing->sr_mc_gate_idle);
-       ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle",
-                                   &timing->srpd_lite_idle);
-       ret |= of_property_read_u32(np, "rockchip,standby_idle",
-                                   &timing->standby_idle);
-       ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq",
-                                   &timing->auto_pd_dis_freq);
-       ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq",
-                                   &timing->dram_dll_dis_freq);
-       ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq",
-                                   &timing->phy_dll_dis_freq);
+       /*
+        * These are all optional, and serve as minimum bounds. Give them large
+        * (i.e., never "disabled") values if the DT doesn't specify one.
+        */
+       data->pd_idle_dis_freq =
+               data->sr_idle_dis_freq =
+               data->sr_mc_gate_idle_dis_freq =
+               data->srpd_lite_idle_dis_freq =
+               data->standby_idle_dis_freq = UINT_MAX;
+
+       ret |= of_property_read_u32(np, "rockchip,pd-idle-ns",
+                                   &data->pd_idle_ns);
+       ret |= of_property_read_u32(np, "rockchip,sr-idle-ns",
+                                   &data->sr_idle_ns);
+       ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-ns",
+                                   &data->sr_mc_gate_idle_ns);
+       ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-ns",
+                                   &data->srpd_lite_idle_ns);
+       ret |= of_property_read_u32(np, "rockchip,standby-idle-ns",
+                                   &data->standby_idle_ns);
        ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq",
-                                   &timing->ddr3_odt_dis_freq);
-       ret |= of_property_read_u32(np, "rockchip,ddr3_drv",
-                                   &timing->ddr3_drv);
-       ret |= of_property_read_u32(np, "rockchip,ddr3_odt",
-                                   &timing->ddr3_odt);
-       ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv",
-                                   &timing->phy_ddr3_ca_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv",
-                                   &timing->phy_ddr3_dq_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt",
-                                   &timing->phy_ddr3_odt);
+                                   &data->ddr3_odt_dis_freq);
        ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq",
-                                   &timing->lpddr3_odt_dis_freq);
-       ret |= of_property_read_u32(np, "rockchip,lpddr3_drv",
-                                   &timing->lpddr3_drv);
-       ret |= of_property_read_u32(np, "rockchip,lpddr3_odt",
-                                   &timing->lpddr3_odt);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv",
-                                   &timing->phy_lpddr3_ca_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv",
-                                   &timing->phy_lpddr3_dq_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt",
-                                   &timing->phy_lpddr3_odt);
+                                   &data->lpddr3_odt_dis_freq);
        ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq",
-                                   &timing->lpddr4_odt_dis_freq);
-       ret |= of_property_read_u32(np, "rockchip,lpddr4_drv",
-                                   &timing->lpddr4_drv);
-       ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt",
-                                   &timing->lpddr4_dq_odt);
-       ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt",
-                                   &timing->lpddr4_ca_odt);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv",
-                                   &timing->phy_lpddr4_ca_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv",
-                                   &timing->phy_lpddr4_ck_cs_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv",
-                                   &timing->phy_lpddr4_dq_drv);
-       ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt",
-                                   &timing->phy_lpddr4_odt);
+                                   &data->lpddr4_odt_dis_freq);
+
+       ret |= of_property_read_u32(np, "rockchip,pd-idle-dis-freq-hz",
+                                   &data->pd_idle_dis_freq);
+       ret |= of_property_read_u32(np, "rockchip,sr-idle-dis-freq-hz",
+                                   &data->sr_idle_dis_freq);
+       ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-dis-freq-hz",
+                                   &data->sr_mc_gate_idle_dis_freq);
+       ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-dis-freq-hz",
+                                   &data->srpd_lite_idle_dis_freq);
+       ret |= of_property_read_u32(np, "rockchip,standby-idle-dis-freq-hz",
+                                   &data->standby_idle_dis_freq);
 
        return ret;
 }
@@ -311,8 +336,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct device_node *np = pdev->dev.of_node, *node;
        struct rk3399_dmcfreq *data;
-       int ret, index, size;
-       uint32_t *timing;
+       int ret;
        struct dev_pm_opp *opp;
        u32 ddr_type;
        u32 val;
@@ -343,26 +367,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
                return ret;
        }
 
-       /*
-        * Get dram timing and pass it to arm trust firmware,
-        * the dram driver in arm trust firmware will get these
-        * timing and to do dram initial.
-        */
-       if (!of_get_ddr_timings(&data->timing, np)) {
-               timing = &data->timing.ddr3_speed_bin;
-               size = sizeof(struct dram_timing) / 4;
-               for (index = 0; index < size; index++) {
-                       arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index,
-                                     ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM,
-                                     0, 0, 0, 0, &res);
-                       if (res.a0) {
-                               dev_err(dev, "Failed to set dram param: %ld\n",
-                                       res.a0);
-                               ret = -EINVAL;
-                               goto err_edev;
-                       }
-               }
-       }
+       rk3399_dmcfreq_of_props(data, np);
 
        node = of_parse_phandle(np, "rockchip,pmu", 0);
        if (!node)
@@ -381,13 +386,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 
        switch (ddr_type) {
        case RK3399_PMUGRF_DDRTYPE_DDR3:
-               data->odt_dis_freq = data->timing.ddr3_odt_dis_freq;
+               data->odt_dis_freq = data->ddr3_odt_dis_freq;
                break;
        case RK3399_PMUGRF_DDRTYPE_LPDDR3:
-               data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq;
+               data->odt_dis_freq = data->lpddr3_odt_dis_freq;
                break;
        case RK3399_PMUGRF_DDRTYPE_LPDDR4:
-               data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq;
+               data->odt_dis_freq = data->lpddr4_odt_dis_freq;
                break;
        default:
                ret = -EINVAL;
@@ -399,63 +404,46 @@ no_pmu:
                      ROCKCHIP_SIP_CONFIG_DRAM_INIT,
                      0, 0, 0, 0, &res);
 
-       /*
-        * In TF-A there is a platform SIP call to set the PD (power-down)
-        * timings and to enable or disable the ODT (on-die termination).
-        * This call needs three arguments as follows:
-        *
-        * arg0:
-        *     bit[0-7]   : sr_idle
-        *     bit[8-15]  : sr_mc_gate_idle
-        *     bit[16-31] : standby idle
-        * arg1:
-        *     bit[0-11]  : pd_idle
-        *     bit[16-27] : srpd_lite_idle
-        * arg2:
-        *     bit[0]     : odt enable
-        */
-       data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) |
-                           ((data->timing.sr_mc_gate_idle & 0xff) << 8) |
-                           ((data->timing.standby_idle & 0xffff) << 16);
-       data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) |
-                           ((data->timing.srpd_lite_idle & 0xfff) << 16);
-
        /*
         * We add a devfreq driver to our parent since it has a device tree node
         * with operating points.
         */
-       if (dev_pm_opp_of_add_table(dev)) {
+       if (devm_pm_opp_of_add_table(dev)) {
                dev_err(dev, "Invalid operating-points in device tree.\n");
                ret = -EINVAL;
                goto err_edev;
        }
 
-       of_property_read_u32(np, "upthreshold",
-                            &data->ondemand_data.upthreshold);
-       of_property_read_u32(np, "downdifferential",
-                            &data->ondemand_data.downdifferential);
+       data->ondemand_data.upthreshold = 25;
+       data->ondemand_data.downdifferential = 15;
 
        data->rate = clk_get_rate(data->dmc_clk);
 
        opp = devfreq_recommended_opp(dev, &data->rate, 0);
        if (IS_ERR(opp)) {
                ret = PTR_ERR(opp);
-               goto err_free_opp;
+               goto err_edev;
        }
 
        data->rate = dev_pm_opp_get_freq(opp);
        data->volt = dev_pm_opp_get_voltage(opp);
        dev_pm_opp_put(opp);
 
-       rk3399_devfreq_dmc_profile.initial_freq = data->rate;
+       data->profile = (struct devfreq_dev_profile) {
+               .polling_ms     = 200,
+               .target         = rk3399_dmcfreq_target,
+               .get_dev_status = rk3399_dmcfreq_get_dev_status,
+               .get_cur_freq   = rk3399_dmcfreq_get_cur_freq,
+               .initial_freq   = data->rate,
+       };
 
        data->devfreq = devm_devfreq_add_device(dev,
-                                          &rk3399_devfreq_dmc_profile,
+                                          &data->profile,
                                           DEVFREQ_GOV_SIMPLE_ONDEMAND,
                                           &data->ondemand_data);
        if (IS_ERR(data->devfreq)) {
                ret = PTR_ERR(data->devfreq);
-               goto err_free_opp;
+               goto err_edev;
        }
 
        devm_devfreq_register_opp_notifier(dev, data->devfreq);
@@ -465,8 +453,6 @@ no_pmu:
 
        return 0;
 
-err_free_opp:
-       dev_pm_opp_of_remove_table(&pdev->dev);
 err_edev:
        devfreq_event_disable_edev(data->edev);
 
@@ -477,11 +463,7 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev)
 {
        struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev);
 
-       /*
-        * Before remove the opp table we need to unregister the opp notifier.
-        */
-       devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq);
-       dev_pm_opp_of_remove_table(dmcfreq->dev);
+       devfreq_event_disable_edev(dmcfreq->edev);
 
        return 0;
 }
index df23239b04fc219afb77041172ab84788dcb7365..53297a0d9c5735aafc75a1e69f0bf0674e0e9de1 100644 (file)
@@ -407,6 +407,7 @@ static inline int is_dma_buf_file(struct file *file)
 
 static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
 {
+       static atomic64_t dmabuf_inode = ATOMIC64_INIT(0);
        struct file *file;
        struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
 
@@ -416,6 +417,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
        inode->i_size = dmabuf->size;
        inode_set_bytes(inode, dmabuf->size);
 
+       /*
+        * The ->i_ino acquired from get_next_ino() is not unique thus
+        * not suitable for using it as dentry name by dmabuf stats.
+        * Override ->i_ino with the unique and dmabuffs specific
+        * value.
+        */
+       inode->i_ino = atomic64_add_return(1, &dmabuf_inode);
        file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
                                 flags, &dma_buf_fops);
        if (IS_ERR(file))
@@ -543,10 +551,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
        file->f_mode |= FMODE_LSEEK;
        dmabuf->file = file;
 
-       ret = dma_buf_stats_setup(dmabuf);
-       if (ret)
-               goto err_sysfs;
-
        mutex_init(&dmabuf->lock);
        INIT_LIST_HEAD(&dmabuf->attachments);
 
@@ -554,6 +558,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
        list_add(&dmabuf->list_node, &db_list.head);
        mutex_unlock(&db_list.lock);
 
+       ret = dma_buf_stats_setup(dmabuf);
+       if (ret)
+               goto err_sysfs;
+
        return dmabuf;
 
 err_sysfs:
index 58ab63642e72c22c39b904f85620eb2da2a54edf..d3e2477948c87b2b67d1fc6ec3d54f7a6480d6f0 100644 (file)
@@ -55,6 +55,7 @@ config EDAC_DECODE_MCE
 config EDAC_GHES
        bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
        depends on ACPI_APEI_GHES && (EDAC=y)
+       select UEFI_CPER
        help
          Not all machines support hardware-driven error report. Some of those
          provide a BIOS-driven error report mechanism via ACPI, using the
@@ -484,7 +485,7 @@ config EDAC_ARMADA_XP
 
 config EDAC_SYNOPSYS
        tristate "Synopsys DDR Memory Controller"
-       depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA
+       depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA || ARCH_MXC
        help
          Support for error detection and correction on the Synopsys DDR
          memory controller.
index 812baa48b29065d322e822a04dc5fca02be8b082..2f854feeeb237af1059bcd4ab8b541ce0c25e4a9 100644 (file)
@@ -4336,7 +4336,7 @@ static int __init amd64_edac_init(void)
        if (!x86_match_cpu(amd64_cpuids))
                return -ENODEV;
 
-       if (amd_cache_northbridges() < 0)
+       if (!amd_nb_num())
                return -ENODEV;
 
        opstate_init();
index b1f46a974b9e0001b5333ae8c00fa49b4eafb271..038abbb83f4bce8f37b49f2f20e94bc690aa45da 100644 (file)
@@ -286,17 +286,10 @@ static int axp_mc_probe(struct platform_device *pdev)
        struct edac_mc_layer layers[1];
        const struct of_device_id *id;
        struct mem_ctl_info *mci;
-       struct resource *r;
        void __iomem *base;
        uint32_t config;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!r) {
-               dev_err(&pdev->dev, "Unable to get mem resource\n");
-               return -ENODEV;
-       }
-
-       base = devm_ioremap_resource(&pdev->dev, r);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base)) {
                dev_err(&pdev->dev, "Unable to map regs\n");
                return PTR_ERR(base);
@@ -516,15 +509,8 @@ static int aurora_l2_probe(struct platform_device *pdev)
        const struct of_device_id *id;
        uint32_t l2x0_aux_ctrl;
        void __iomem *base;
-       struct resource *r;
-
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!r) {
-               dev_err(&pdev->dev, "Unable to get mem resource\n");
-               return -ENODEV;
-       }
 
-       base = devm_ioremap_resource(&pdev->dev, r);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base)) {
                dev_err(&pdev->dev, "Unable to map regs\n");
                return PTR_ERR(base);
index b8a7d9594afd42babdb0e93e6cfd94b41f562f6c..1fa5ca57e9ec196cace56662a564ae679e37aea9 100644 (file)
@@ -489,7 +489,7 @@ static int dmc520_edac_probe(struct platform_device *pdev)
        dev = &pdev->dev;
 
        for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
-               irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
+               irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name);
                irqs[idx] = irq;
                masks[idx] = dmc520_irq_configs[idx].mask;
                if (irq >= 0) {
index 8c4d947fb8486bab37ab82b2a4937c97d52e4043..19522c568aa5dea572f5d2c34eac44905ed1f142 100644 (file)
@@ -47,99 +47,67 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
 }
 #endif                         /* CONFIG_EDAC_DEBUG */
 
-struct edac_device_ctl_info *edac_device_alloc_ctl_info(
-       unsigned sz_private,
-       char *edac_device_name, unsigned nr_instances,
-       char *edac_block_name, unsigned nr_blocks,
-       unsigned offset_value,          /* zero, 1, or other based offset */
-       struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib,
-       int device_index)
+/*
+ * @off_val: zero, 1, or other based offset
+ */
+struct edac_device_ctl_info *
+edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances,
+                          char *blk_name, unsigned nr_blocks, unsigned off_val,
+                          struct edac_dev_sysfs_block_attribute *attrib_spec,
+                          unsigned nr_attrib, int device_index)
 {
-       struct edac_device_ctl_info *dev_ctl;
-       struct edac_device_instance *dev_inst, *inst;
-       struct edac_device_block *dev_blk, *blk_p, *blk;
        struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib;
-       unsigned total_size;
-       unsigned count;
+       struct edac_device_block *dev_blk, *blk_p, *blk;
+       struct edac_device_instance *dev_inst, *inst;
+       struct edac_device_ctl_info *dev_ctl;
        unsigned instance, block, attr;
-       void *pvt, *p;
+       void *pvt;
        int err;
 
        edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks);
 
-       /* Calculate the size of memory we need to allocate AND
-        * determine the offsets of the various item arrays
-        * (instance,block,attrib) from the start of an  allocated structure.
-        * We want the alignment of each item  (instance,block,attrib)
-        * to be at least as stringent as what the compiler would
-        * provide if we could simply hardcode everything into a single struct.
-        */
-       p = NULL;
-       dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1);
+       dev_ctl = kzalloc(sizeof(struct edac_device_ctl_info), GFP_KERNEL);
+       if (!dev_ctl)
+               return NULL;
 
-       /* Calc the 'end' offset past end of ONE ctl_info structure
-        * which will become the start of the 'instance' array
-        */
-       dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances);
+       dev_inst = kcalloc(nr_instances, sizeof(struct edac_device_instance), GFP_KERNEL);
+       if (!dev_inst)
+               goto free;
 
-       /* Calc the 'end' offset past the instance array within the ctl_info
-        * which will become the start of the block array
-        */
-       count = nr_instances * nr_blocks;
-       dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count);
+       dev_ctl->instances = dev_inst;
 
-       /* Calc the 'end' offset past the dev_blk array
-        * which will become the start of the attrib array, if any.
-        */
-       /* calc how many nr_attrib we need */
-       if (nr_attrib > 0)
-               count *= nr_attrib;
-       dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count);
+       dev_blk = kcalloc(nr_instances * nr_blocks, sizeof(struct edac_device_block), GFP_KERNEL);
+       if (!dev_blk)
+               goto free;
 
-       /* Calc the 'end' offset past the attributes array */
-       pvt = edac_align_ptr(&p, sz_private, 1);
+       dev_ctl->blocks = dev_blk;
 
-       /* 'pvt' now points to where the private data area is.
-        * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib)
-        * is baselined at ZERO
-        */
-       total_size = ((unsigned long)pvt) + sz_private;
+       if (nr_attrib) {
+               dev_attrib = kcalloc(nr_attrib, sizeof(struct edac_dev_sysfs_block_attribute),
+                                    GFP_KERNEL);
+               if (!dev_attrib)
+                       goto free;
 
-       /* Allocate the amount of memory for the set of control structures */
-       dev_ctl = kzalloc(total_size, GFP_KERNEL);
-       if (dev_ctl == NULL)
-               return NULL;
+               dev_ctl->attribs = dev_attrib;
+       }
 
-       /* Adjust pointers so they point within the actual memory we
-        * just allocated rather than an imaginary chunk of memory
-        * located at address 0.
-        * 'dev_ctl' points to REAL memory, while the others are
-        * ZERO based and thus need to be adjusted to point within
-        * the allocated memory.
-        */
-       dev_inst = (struct edac_device_instance *)
-               (((char *)dev_ctl) + ((unsigned long)dev_inst));
-       dev_blk = (struct edac_device_block *)
-               (((char *)dev_ctl) + ((unsigned long)dev_blk));
-       dev_attrib = (struct edac_dev_sysfs_block_attribute *)
-               (((char *)dev_ctl) + ((unsigned long)dev_attrib));
-       pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL;
-
-       /* Begin storing the information into the control info structure */
-       dev_ctl->dev_idx = device_index;
-       dev_ctl->nr_instances = nr_instances;
-       dev_ctl->instances = dev_inst;
-       dev_ctl->pvt_info = pvt;
+       if (pvt_sz) {
+               pvt = kzalloc(pvt_sz, GFP_KERNEL);
+               if (!pvt)
+                       goto free;
+
+               dev_ctl->pvt_info = pvt;
+       }
+
+       dev_ctl->dev_idx        = device_index;
+       dev_ctl->nr_instances   = nr_instances;
 
        /* Default logging of CEs and UEs */
        dev_ctl->log_ce = 1;
        dev_ctl->log_ue = 1;
 
        /* Name of this edac device */
-       snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name);
-
-       edac_dbg(4, "edac_dev=%p next after end=%p\n",
-                dev_ctl, pvt + sz_private);
+       snprintf(dev_ctl->name, sizeof(dev_ctl->name),"%s", dev_name);
 
        /* Initialize every Instance */
        for (instance = 0; instance < nr_instances; instance++) {
@@ -150,15 +118,14 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
                inst->blocks = blk_p;
 
                /* name of this instance */
-               snprintf(inst->name, sizeof(inst->name),
-                        "%s%u", edac_device_name, instance);
+               snprintf(inst->name, sizeof(inst->name), "%s%u", dev_name, instance);
 
                /* Initialize every block in each instance */
                for (block = 0; block < nr_blocks; block++) {
                        blk = &blk_p[block];
                        blk->instance = inst;
                        snprintf(blk->name, sizeof(blk->name),
-                                "%s%d", edac_block_name, block+offset_value);
+                                "%s%d", blk_name, block + off_val);
 
                        edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
                                 instance, inst, block, blk, blk->name);
@@ -210,10 +177,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
         * Initialize the 'root' kobj for the edac_device controller
         */
        err = edac_device_register_sysfs_main_kobj(dev_ctl);
-       if (err) {
-               kfree(dev_ctl);
-               return NULL;
-       }
+       if (err)
+               goto free;
 
        /* at this point, the root kobj is valid, and in order to
         * 'free' the object, then the function:
@@ -223,6 +188,11 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
         */
 
        return dev_ctl;
+
+free:
+       __edac_device_free_ctl_info(dev_ctl);
+
+       return NULL;
 }
 EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
 
index fc2d2c2180649d239ad3befdb75a27782249e769..3f44e6b9d387f961d615db4a435edcfdc2faef11 100644 (file)
@@ -216,6 +216,8 @@ struct edac_device_ctl_info {
         */
        u32 nr_instances;
        struct edac_device_instance *instances;
+       struct edac_device_block *blocks;
+       struct edac_dev_sysfs_block_attribute *attribs;
 
        /* Event counters for the this whole EDAC Device */
        struct edac_device_counter counters;
@@ -348,4 +350,16 @@ edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
  */
 extern int edac_device_alloc_index(void);
 extern const char *edac_layer_name[];
+
+/* Free the actual struct */
+static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci)
+{
+       if (ci) {
+               kfree(ci->pvt_info);
+               kfree(ci->attribs);
+               kfree(ci->blocks);
+               kfree(ci->instances);
+               kfree(ci);
+       }
+}
 #endif
index 9a61d92bdf42045e6d02d62ee10b4febc1864e45..ac678b4a21fcbfd4d933d6be602594a80be81151 100644 (file)
@@ -208,10 +208,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
        /* decrement the EDAC CORE module ref count */
        module_put(edac_dev->owner);
 
-       /* free the control struct containing the 'main' kobj
-        * passed in to this routine
-        */
-       kfree(edac_dev);
+       __edac_device_free_ctl_info(edac_dev);
 }
 
 /* ktype for the main (master) kobject */
index d2715774af6fb3a0b07ff2c6ce6bb510091c0a68..eb58644bb0190630af007f2ad10363b3bb5cde6c 100644 (file)
@@ -170,61 +170,6 @@ const char * const edac_mem_types[] = {
 };
 EXPORT_SYMBOL_GPL(edac_mem_types);
 
-/**
- * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
- * @p:         pointer to a pointer with the memory offset to be used. At
- *             return, this will be incremented to point to the next offset
- * @size:      Size of the data structure to be reserved
- * @n_elems:   Number of elements that should be reserved
- *
- * If 'size' is a constant, the compiler will optimize this whole function
- * down to either a no-op or the addition of a constant to the value of '*p'.
- *
- * The 'p' pointer is absolutely needed to keep the proper advancing
- * further in memory to the proper offsets when allocating the struct along
- * with its embedded structs, as edac_device_alloc_ctl_info() does it
- * above, for example.
- *
- * At return, the pointer 'p' will be incremented to be used on a next call
- * to this function.
- */
-void *edac_align_ptr(void **p, unsigned int size, int n_elems)
-{
-       unsigned int align, r;
-       void *ptr = *p;
-
-       *p += size * n_elems;
-
-       /*
-        * 'p' can possibly be an unaligned item X such that sizeof(X) is
-        * 'size'.  Adjust 'p' so that its alignment is at least as
-        * stringent as what the compiler would provide for X and return
-        * the aligned result.
-        * Here we assume that the alignment of a "long long" is the most
-        * stringent alignment that the compiler will ever provide by default.
-        * As far as I know, this is a reasonable assumption.
-        */
-       if (size > sizeof(long))
-               align = sizeof(long long);
-       else if (size > sizeof(int))
-               align = sizeof(long);
-       else if (size > sizeof(short))
-               align = sizeof(int);
-       else if (size > sizeof(char))
-               align = sizeof(short);
-       else
-               return ptr;
-
-       r = (unsigned long)ptr % align;
-
-       if (r == 0)
-               return ptr;
-
-       *p += align - r;
-
-       return (void *)(((unsigned long)ptr) + align - r);
-}
-
 static void _edac_mc_free(struct mem_ctl_info *mci)
 {
        put_device(&mci->dev);
@@ -257,6 +202,8 @@ static void mci_release(struct device *dev)
                }
                kfree(mci->csrows);
        }
+       kfree(mci->pvt_info);
+       kfree(mci->layers);
        kfree(mci);
 }
 
@@ -392,9 +339,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 {
        struct mem_ctl_info *mci;
        struct edac_mc_layer *layer;
-       unsigned int idx, size, tot_dimms = 1;
+       unsigned int idx, tot_dimms = 1;
        unsigned int tot_csrows = 1, tot_channels = 1;
-       void *pvt, *ptr = NULL;
        bool per_rank = false;
 
        if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
@@ -416,41 +362,25 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
                        per_rank = true;
        }
 
-       /* Figure out the offsets of the various items from the start of an mc
-        * structure.  We want the alignment of each item to be at least as
-        * stringent as what the compiler would provide if we could simply
-        * hardcode everything into a single struct.
-        */
-       mci     = edac_align_ptr(&ptr, sizeof(*mci), 1);
-       layer   = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
-       pvt     = edac_align_ptr(&ptr, sz_pvt, 1);
-       size    = ((unsigned long)pvt) + sz_pvt;
-
-       edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
-                size,
-                tot_dimms,
-                per_rank ? "ranks" : "dimms",
-                tot_csrows * tot_channels);
-
-       mci = kzalloc(size, GFP_KERNEL);
-       if (mci == NULL)
+       mci = kzalloc(sizeof(struct mem_ctl_info), GFP_KERNEL);
+       if (!mci)
                return NULL;
 
+       mci->layers = kcalloc(n_layers, sizeof(struct edac_mc_layer), GFP_KERNEL);
+       if (!mci->layers)
+               goto error;
+
+       mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL);
+       if (!mci->pvt_info)
+               goto error;
+
        mci->dev.release = mci_release;
        device_initialize(&mci->dev);
 
-       /* Adjust pointers so they point within the memory we just allocated
-        * rather than an imaginary chunk of memory located at address 0.
-        */
-       layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
-       pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
-
        /* setup index and various internal pointers */
        mci->mc_idx = mc_num;
        mci->tot_dimms = tot_dimms;
-       mci->pvt_info = pvt;
        mci->n_layers = n_layers;
-       mci->layers = layer;
        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
        mci->nr_csrows = tot_csrows;
        mci->num_cschannel = tot_channels;
index aa1f91688eb8e0dacd482eda115881252ae5ad0e..96f6de0c8ff6f221bbdbdbc47d8e40b8fb58c9c2 100644 (file)
@@ -59,8 +59,6 @@ extern void edac_device_reset_delay_period(struct edac_device_ctl_info
                                           *edac_dev, unsigned long value);
 extern void edac_mc_reset_delay_period(unsigned long value);
 
-extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
-
 /*
  * EDAC debugfs functions
  */
index 48c844a72a278bc13e72e0af0674b53f3fe06e04..2205d7e731dbaec4f9998c3f0fa06fe6c6160a1e 100644 (file)
@@ -29,32 +29,31 @@ static LIST_HEAD(edac_pci_list);
 static atomic_t pci_indexes = ATOMIC_INIT(0);
 
 struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
-                                               const char *edac_pci_name)
+                                                 const char *edac_pci_name)
 {
        struct edac_pci_ctl_info *pci;
-       void *p = NULL, *pvt;
-       unsigned int size;
 
        edac_dbg(1, "\n");
 
-       pci = edac_align_ptr(&p, sizeof(*pci), 1);
-       pvt = edac_align_ptr(&p, 1, sz_pvt);
-       size = ((unsigned long)pvt) + sz_pvt;
-
-       /* Alloc the needed control struct memory */
-       pci = kzalloc(size, GFP_KERNEL);
-       if (pci  == NULL)
+       pci = kzalloc(sizeof(struct edac_pci_ctl_info), GFP_KERNEL);
+       if (!pci)
                return NULL;
 
-       /* Now much private space */
-       pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL;
+       if (sz_pvt) {
+               pci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL);
+               if (!pci->pvt_info)
+                       goto free;
+       }
 
-       pci->pvt_info = pvt;
        pci->op_state = OP_ALLOC;
 
        snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name);
 
        return pci;
+
+free:
+       kfree(pci);
+       return NULL;
 }
 EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info);
 
index 6d1ddecbf0da3606c73e1139cd01c92af1ede5ba..59b0bedc9c242ab70cad0a21ad5700f72abaca76 100644 (file)
 #include "edac_module.h"
 #include <ras/ras_event.h>
 
+#define OTHER_DETAIL_LEN       400
+
 struct ghes_pvt {
        struct mem_ctl_info *mci;
 
        /* Buffers for the error handling routine */
-       char other_detail[400];
+       char other_detail[OTHER_DETAIL_LEN];
        char msg[80];
 };
 
@@ -36,7 +38,7 @@ static struct ghes_pvt *ghes_pvt;
  * This driver's representation of the system hardware, as collected
  * from DMI.
  */
-struct ghes_hw_desc {
+static struct ghes_hw_desc {
        int num_dimms;
        struct dimm_info *dimms;
 } ghes_hw;
@@ -235,8 +237,34 @@ static void ghes_scan_system(void)
        system_scanned = true;
 }
 
+static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char *msg,
+                                       const char *location, unsigned int len)
+{
+       u32 n;
+
+       if (!msg)
+               return 0;
+
+       n = 0;
+       len -= 1;
+
+       n += scnprintf(msg + n, len - n, "APEI location: %s ", location);
+
+       if (!(mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS))
+               goto out;
+
+       n += scnprintf(msg + n, len - n, "status(0x%016llx): ", mem->error_status);
+       n += scnprintf(msg + n, len - n, "%s ", cper_mem_err_status_str(mem->error_status));
+
+out:
+       msg[n] = '\0';
+
+       return n;
+}
+
 void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 {
+       struct cper_mem_err_compact cmem;
        struct edac_raw_error_desc *e;
        struct mem_ctl_info *mci;
        struct ghes_pvt *pvt;
@@ -292,60 +320,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
        /* Error type, mapped on e->msg */
        if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
+               u8 etype = mem_err->error_type;
+
                p = pvt->msg;
-               switch (mem_err->error_type) {
-               case 0:
-                       p += sprintf(p, "Unknown");
-                       break;
-               case 1:
-                       p += sprintf(p, "No error");
-                       break;
-               case 2:
-                       p += sprintf(p, "Single-bit ECC");
-                       break;
-               case 3:
-                       p += sprintf(p, "Multi-bit ECC");
-                       break;
-               case 4:
-                       p += sprintf(p, "Single-symbol ChipKill ECC");
-                       break;
-               case 5:
-                       p += sprintf(p, "Multi-symbol ChipKill ECC");
-                       break;
-               case 6:
-                       p += sprintf(p, "Master abort");
-                       break;
-               case 7:
-                       p += sprintf(p, "Target abort");
-                       break;
-               case 8:
-                       p += sprintf(p, "Parity Error");
-                       break;
-               case 9:
-                       p += sprintf(p, "Watchdog timeout");
-                       break;
-               case 10:
-                       p += sprintf(p, "Invalid address");
-                       break;
-               case 11:
-                       p += sprintf(p, "Mirror Broken");
-                       break;
-               case 12:
-                       p += sprintf(p, "Memory Sparing");
-                       break;
-               case 13:
-                       p += sprintf(p, "Scrub corrected error");
-                       break;
-               case 14:
-                       p += sprintf(p, "Scrub uncorrected error");
-                       break;
-               case 15:
-                       p += sprintf(p, "Physical Memory Map-out event");
-                       break;
-               default:
-                       p += sprintf(p, "reserved error (%d)",
-                                    mem_err->error_type);
-               }
+               p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype));
        } else {
                strcpy(pvt->msg, "unknown error");
        }
@@ -362,52 +340,19 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
        /* Memory error location, mapped on e->location */
        p = e->location;
-       if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
-               p += sprintf(p, "node:%d ", mem_err->node);
-       if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
-               p += sprintf(p, "card:%d ", mem_err->card);
-       if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
-               p += sprintf(p, "module:%d ", mem_err->module);
-       if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
-               p += sprintf(p, "rank:%d ", mem_err->rank);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
-               p += sprintf(p, "bank:%d ", mem_err->bank);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
-               p += sprintf(p, "bank_group:%d ",
-                            mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
-               p += sprintf(p, "bank_address:%d ",
-                            mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
-       if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
-               u32 row = mem_err->row;
-
-               row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
-               p += sprintf(p, "row:%d ", row);
-       }
-       if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
-               p += sprintf(p, "col:%d ", mem_err->column);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
-               p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
+       cper_mem_err_pack(mem_err, &cmem);
+       p += cper_mem_err_location(&cmem, p);
+
        if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
-               const char *bank = NULL, *device = NULL;
                struct dimm_info *dimm;
 
-               dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
-               if (bank != NULL && device != NULL)
-                       p += sprintf(p, "DIMM location:%s %s ", bank, device);
-               else
-                       p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
-                                    mem_err->mem_dev_handle);
-
+               p += cper_dimm_err_location(&cmem, p);
                dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
                if (dimm) {
                        e->top_layer = dimm->idx;
                        strcpy(e->label, dimm->label);
                }
        }
-       if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
-               p += sprintf(p, "chipID: %d ",
-                            mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
        if (p > e->location)
                *(p - 1) = '\0';
 
@@ -416,78 +361,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
        /* All other fields are mapped on e->other_detail */
        p = pvt->other_detail;
-       p += snprintf(p, sizeof(pvt->other_detail),
-               "APEI location: %s ", e->location);
-       if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
-               u64 status = mem_err->error_status;
-
-               p += sprintf(p, "status(0x%016llx): ", (long long)status);
-               switch ((status >> 8) & 0xff) {
-               case 1:
-                       p += sprintf(p, "Error detected internal to the component ");
-                       break;
-               case 16:
-                       p += sprintf(p, "Error detected in the bus ");
-                       break;
-               case 4:
-                       p += sprintf(p, "Storage error in DRAM memory ");
-                       break;
-               case 5:
-                       p += sprintf(p, "Storage error in TLB ");
-                       break;
-               case 6:
-                       p += sprintf(p, "Storage error in cache ");
-                       break;
-               case 7:
-                       p += sprintf(p, "Error in one or more functional units ");
-                       break;
-               case 8:
-                       p += sprintf(p, "component failed self test ");
-                       break;
-               case 9:
-                       p += sprintf(p, "Overflow or undervalue of internal queue ");
-                       break;
-               case 17:
-                       p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
-                       break;
-               case 18:
-                       p += sprintf(p, "Improper access error ");
-                       break;
-               case 19:
-                       p += sprintf(p, "Access to a memory address which is not mapped to any component ");
-                       break;
-               case 20:
-                       p += sprintf(p, "Loss of Lockstep ");
-                       break;
-               case 21:
-                       p += sprintf(p, "Response not associated with a request ");
-                       break;
-               case 22:
-                       p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
-                       break;
-               case 23:
-                       p += sprintf(p, "Detection of a PATH_ERROR ");
-                       break;
-               case 25:
-                       p += sprintf(p, "Bus operation timeout ");
-                       break;
-               case 26:
-                       p += sprintf(p, "A read was issued to data that has been poisoned ");
-                       break;
-               default:
-                       p += sprintf(p, "reserved ");
-                       break;
-               }
-       }
-       if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
-               p += sprintf(p, "requestorID: 0x%016llx ",
-                            (long long)mem_err->requestor_id);
-       if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
-               p += sprintf(p, "responderID: 0x%016llx ",
-                            (long long)mem_err->responder_id);
-       if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
-               p += sprintf(p, "targetID: 0x%016llx ",
-                            (long long)mem_err->responder_id);
+       p += print_mem_error_other_detail(mem_err, p, e->location, OTHER_DETAIL_LEN);
        if (p > pvt->other_detail)
                *(p - 1) = '\0';
 
index 324a46b8479b0f96879ac2067b56a0c456621ebc..f5d82518c15e5b0138cef472b0de1349c5198171 100644 (file)
@@ -244,11 +244,6 @@ static inline u32 i5100_nrecmema_rank(u32 a)
        return a >>  8 & ((1 << 3) - 1);
 }
 
-static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
-{
-       return a & ((1 << 8) - 1);
-}
-
 static inline u32 i5100_nrecmemb_cas(u32 a)
 {
        return a >> 16 & ((1 << 13) - 1);
index 67f7bc3fe5b3b623b108fddc8cb57ec6ba304ade..5bf92298554d6c2d51a44622d8bf33d320d77826 100644 (file)
@@ -609,13 +609,6 @@ static int mpc85xx_l2_err_remove(struct platform_device *op)
 }
 
 static const struct of_device_id mpc85xx_l2_err_of_match[] = {
-/* deprecate the fsl,85.. forms in the future, 2.6.30? */
-       { .compatible = "fsl,8540-l2-cache-controller", },
-       { .compatible = "fsl,8541-l2-cache-controller", },
-       { .compatible = "fsl,8544-l2-cache-controller", },
-       { .compatible = "fsl,8548-l2-cache-controller", },
-       { .compatible = "fsl,8555-l2-cache-controller", },
-       { .compatible = "fsl,8568-l2-cache-controller", },
        { .compatible = "fsl,mpc8536-l2-cache-controller", },
        { .compatible = "fsl,mpc8540-l2-cache-controller", },
        { .compatible = "fsl,mpc8541-l2-cache-controller", },
@@ -644,13 +637,6 @@ static struct platform_driver mpc85xx_l2_err_driver = {
 };
 
 static const struct of_device_id mpc85xx_mc_err_of_match[] = {
-/* deprecate the fsl,85.. forms in the future, 2.6.30? */
-       { .compatible = "fsl,8540-memory-controller", },
-       { .compatible = "fsl,8541-memory-controller", },
-       { .compatible = "fsl,8544-memory-controller", },
-       { .compatible = "fsl,8548-memory-controller", },
-       { .compatible = "fsl,8555-memory-controller", },
-       { .compatible = "fsl,8568-memory-controller", },
        { .compatible = "fsl,mpc8536-memory-controller", },
        { .compatible = "fsl,mpc8540-memory-controller", },
        { .compatible = "fsl,mpc8541-memory-controller", },
index 40b1abeca8562e2b55dde35aa488e86beb2e9ee4..1cee64b80a7e085ee0c3503aa71d30a7f3cf4f23 100644 (file)
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Synopsys DDR ECC Driver
  * This driver is based on ppc4xx_edac.c drivers
  *
  * Copyright (C) 2012 - 2014 Xilinx, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details
  */
 
 #include <linux/edac.h>
index 7197f9fa024572f72aec324af64cd382820ffac1..54081403db4fd20352202e77161704f725e3b434 100644 (file)
@@ -501,7 +501,7 @@ static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET          0x0804
 
 /*
- * Processor Module Domain (PMD) context - Context for a pair of processsors.
+ * Processor Module Domain (PMD) context - Context for a pair of processors.
  * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
  * its own L1 cache.
  */
index 2c3dac5ecb36d577a7f6b0a5c2ac9fb8bbd09a37..4720ba98cec312e775c8bcb298855fd60ccb38c9 100644 (file)
@@ -91,6 +91,18 @@ config EFI_SOFT_RESERVE
 
          If unsure, say Y.
 
+config EFI_DXE_MEM_ATTRIBUTES
+       bool "Adjust memory attributes in EFISTUB"
+       depends on EFI && EFI_STUB && X86
+       default y
+       help
+         UEFI specification does not guarantee all memory to be
+         accessible for both write and execute as the kernel expects
+         it to be.
+         Use DXE services to check and alter memory protection
+         attributes during boot via EFISTUB to ensure that memory
+         ranges used by the kernel are writable and executable.
+
 config EFI_PARAMS_FROM_FDT
        bool
        help
@@ -284,3 +296,34 @@ config EFI_CUSTOM_SSDT_OVERLAYS
 
          See Documentation/admin-guide/acpi/ssdt-overlays.rst for more
          information.
+
+config EFI_DISABLE_RUNTIME
+       bool "Disable EFI runtime services support by default"
+       default y if PREEMPT_RT
+       help
+         Allow to disable the EFI runtime services support by default. This can
+         already be achieved by using the efi=noruntime option, but it could be
+         useful to have this default without any kernel command line parameter.
+
+         The EFI runtime services are disabled by default when PREEMPT_RT is
+         enabled, because measurements have shown that some EFI functions calls
+         might take too much time to complete, causing large latencies which is
+         an issue for Real-Time kernels.
+
+         This default can be overridden by using the efi=runtime option.
+
+config EFI_COCO_SECRET
+       bool "EFI Confidential Computing Secret Area Support"
+       depends on EFI
+       help
+         Confidential Computing platforms (such as AMD SEV) allow the
+         Guest Owner to securely inject secrets during guest VM launch.
+         The secrets are placed in a designated EFI reserved memory area.
+
+         In order to use the secrets in the kernel, the location of the secret
+         area (as published in the EFI config table) must be kept.
+
+         If you say Y here, the address of the EFI secret area will be kept
+         for usage inside the kernel.  This will allow the
+         virt/coco/efi_secret module to access the secrets, which in turn
+         allows userspace programs to access the injected secrets.
index 6ec8edec63296b435389531bbc567d8703125201..e4e5ea7ce910a53642a4f8cb9f9e9fe11fca96d8 100644 (file)
@@ -211,7 +211,33 @@ const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
-static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
+const char *cper_mem_err_status_str(u64 status)
+{
+       switch ((status >> 8) & 0xff) {
+       case  1:        return "Error detected internal to the component";
+       case  4:        return "Storage error in DRAM memory";
+       case  5:        return "Storage error in TLB";
+       case  6:        return "Storage error in cache";
+       case  7:        return "Error in one or more functional units";
+       case  8:        return "Component failed self test";
+       case  9:        return "Overflow or undervalue of internal queue";
+       case 16:        return "Error detected in the bus";
+       case 17:        return "Virtual address not found on IO-TLB or IO-PDIR";
+       case 18:        return "Improper access error";
+       case 19:        return "Access to a memory address which is not mapped to any component";
+       case 20:        return "Loss of Lockstep";
+       case 21:        return "Response not associated with a request";
+       case 22:        return "Bus parity error - must also set the A, C, or D Bits";
+       case 23:        return "Detection of a protocol error";
+       case 24:        return "Detection of a PATH_ERROR";
+       case 25:        return "Bus operation timeout";
+       case 26:        return "A read was issued to data that has been poisoned";
+       default:        return "Reserved";
+       }
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_status_str);
+
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
        u32 len, n;
 
@@ -221,51 +247,51 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
        n = 0;
        len = CPER_REC_LEN;
        if (mem->validation_bits & CPER_MEM_VALID_NODE)
-               n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
+               n += scnprintf(msg + n, len - n, "node:%d ", mem->node);
        if (mem->validation_bits & CPER_MEM_VALID_CARD)
-               n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
+               n += scnprintf(msg + n, len - n, "card:%d ", mem->card);
        if (mem->validation_bits & CPER_MEM_VALID_MODULE)
-               n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
+               n += scnprintf(msg + n, len - n, "module:%d ", mem->module);
        if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
-               n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
+               n += scnprintf(msg + n, len - n, "rank:%d ", mem->rank);
        if (mem->validation_bits & CPER_MEM_VALID_BANK)
-               n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
+               n += scnprintf(msg + n, len - n, "bank:%d ", mem->bank);
        if (mem->validation_bits & CPER_MEM_VALID_BANK_GROUP)
-               n += scnprintf(msg + n, len - n, "bank_group: %d ",
+               n += scnprintf(msg + n, len - n, "bank_group:%d ",
                               mem->bank >> CPER_MEM_BANK_GROUP_SHIFT);
        if (mem->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
-               n += scnprintf(msg + n, len - n, "bank_address: %d ",
+               n += scnprintf(msg + n, len - n, "bank_address:%d ",
                               mem->bank & CPER_MEM_BANK_ADDRESS_MASK);
        if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
-               n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
+               n += scnprintf(msg + n, len - n, "device:%d ", mem->device);
        if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
                u32 row = mem->row;
 
                row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
-               n += scnprintf(msg + n, len - n, "row: %d ", row);
+               n += scnprintf(msg + n, len - n, "row:%d ", row);
        }
        if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
-               n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
+               n += scnprintf(msg + n, len - n, "column:%d ", mem->column);
        if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
-               n += scnprintf(msg + n, len - n, "bit_position: %d ",
+               n += scnprintf(msg + n, len - n, "bit_position:%d ",
                               mem->bit_pos);
        if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
-               n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
+               n += scnprintf(msg + n, len - n, "requestor_id:0x%016llx ",
                               mem->requestor_id);
        if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
-               n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
+               n += scnprintf(msg + n, len - n, "responder_id:0x%016llx ",
                               mem->responder_id);
        if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
-               n += scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
+               n += scnprintf(msg + n, len - n, "target_id:0x%016llx ",
                               mem->target_id);
        if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
-               n += scnprintf(msg + n, len - n, "chip_id: %d ",
+               n += scnprintf(msg + n, len - n, "chip_id:%d ",
                               mem->extended >> CPER_MEM_CHIP_ID_SHIFT);
 
        return n;
 }
 
-static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
        u32 len, n;
        const char *bank = NULL, *device = NULL;
@@ -334,7 +360,9 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
                return;
        }
        if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
-               printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
+               printk("%s error_status: %s (0x%016llx)\n",
+                      pfx, cper_mem_err_status_str(mem->error_status),
+                      mem->error_status);
        if (mem->validation_bits & CPER_MEM_VALID_PA)
                printk("%s""physical_address: 0x%016llx\n",
                       pfx, mem->physical_addr);
index 5502e176d51bee14deba8d37bc240244d3e9878b..860534bcfdac2aa989f5721ec045740e94748af7 100644 (file)
@@ -46,6 +46,9 @@ struct efi __read_mostly efi = {
 #ifdef CONFIG_LOAD_UEFI_KEYS
        .mokvar_table           = EFI_INVALID_TABLE_ADDR,
 #endif
+#ifdef CONFIG_EFI_COCO_SECRET
+       .coco_secret            = EFI_INVALID_TABLE_ADDR,
+#endif
 };
 EXPORT_SYMBOL(efi);
 
@@ -66,7 +69,7 @@ struct mm_struct efi_mm = {
 
 struct workqueue_struct *efi_rts_wq;
 
-static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
+static bool disable_runtime = IS_ENABLED(CONFIG_EFI_DISABLE_RUNTIME);
 static int __init setup_noefi(char *arg)
 {
        disable_runtime = true;
@@ -422,6 +425,11 @@ static int __init efisubsys_init(void)
        if (efi_enabled(EFI_DBG) && efi_enabled(EFI_PRESERVE_BS_REGIONS))
                efi_debugfs_init();
 
+#ifdef CONFIG_EFI_COCO_SECRET
+       if (efi.coco_secret != EFI_INVALID_TABLE_ADDR)
+               platform_device_register_simple("efi_secret", 0, NULL, 0);
+#endif
+
        return 0;
 
 err_remove_group:
@@ -528,6 +536,9 @@ static const efi_config_table_type_t common_tables[] __initconst = {
 #endif
 #ifdef CONFIG_LOAD_UEFI_KEYS
        {LINUX_EFI_MOK_VARIABLE_TABLE_GUID,     &efi.mokvar_table,      "MOKvar"        },
+#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+       {LINUX_EFI_COCO_SECRET_AREA_GUID,       &efi.coco_secret,       "CocoSecret"    },
 #endif
        {},
 };
index 4b5b2403b3a07759269e7a1cacf8ceb1ecd49e8b..0131e3aaa6055317993267ca2d354c09cc95f310 100644 (file)
@@ -117,7 +117,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image)
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle)
 {
        const int slack = TEXT_OFFSET - 5 * PAGE_SIZE;
        int alloc_size = MAX_UNCOMP_KERNEL_SIZE + EFI_PHYS_ALIGN;
index 9cc556013d085991a4825643de486547069e35e2..577173ee1f83d4eebca0100e2941cd47e3ae34b2 100644 (file)
@@ -83,7 +83,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image)
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle)
 {
        efi_status_t status;
        unsigned long kernel_size, kernel_memsize = 0;
@@ -100,7 +101,15 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
        u64 min_kimg_align = efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-               if (!efi_nokaslr) {
+               efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID;
+               void *p;
+
+               if (efi_nokaslr) {
+                       efi_info("KASLR disabled on kernel command line\n");
+               } else if (efi_bs_call(handle_protocol, image_handle,
+                                      &li_fixed_proto, &p) == EFI_SUCCESS) {
+                       efi_info("Image placement fixed by loader\n");
+               } else {
                        status = efi_get_random_bytes(sizeof(phys_seed),
                                                      (u8 *)&phys_seed);
                        if (status == EFI_NOT_FOUND) {
@@ -111,8 +120,6 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                        status);
                                efi_nokaslr = true;
                        }
-               } else {
-                       efi_info("KASLR disabled on kernel command line\n");
                }
        }
 
index da93864d7abcd104495e653b7d3079f31c1b7ed0..f515394cce6e23295e93f018a57b4f1aa4c59a37 100644 (file)
@@ -198,7 +198,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
        status = handle_kernel_image(&image_addr, &image_size,
                                     &reserve_addr,
                                     &reserve_size,
-                                    image);
+                                    image, handle);
        if (status != EFI_SUCCESS) {
                efi_err("Failed to relocate kernel\n");
                goto fail_free_screeninfo;
index edb77b0621ea3f5da7a2f2a0f2bd066c1925210d..b0ae0a454404b83944ec75465fe0fd890b45f95e 100644 (file)
@@ -36,6 +36,9 @@ extern bool efi_novamap;
 
 extern const efi_system_table_t *efi_system_table;
 
+typedef union efi_dxe_services_table efi_dxe_services_table_t;
+extern const efi_dxe_services_table_t *efi_dxe_table;
+
 efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
                                   efi_system_table_t *sys_table_arg);
 
@@ -44,6 +47,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
 #define efi_is_native()                (true)
 #define efi_bs_call(func, ...) efi_system_table->boottime->func(__VA_ARGS__)
 #define efi_rt_call(func, ...) efi_system_table->runtime->func(__VA_ARGS__)
+#define efi_dxe_call(func, ...)        efi_dxe_table->func(__VA_ARGS__)
 #define efi_table_attr(inst, attr)     (inst->attr)
 #define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
 
@@ -329,6 +333,76 @@ union efi_boot_services {
        } mixed_mode;
 };
 
+typedef enum {
+       EfiGcdMemoryTypeNonExistent,
+       EfiGcdMemoryTypeReserved,
+       EfiGcdMemoryTypeSystemMemory,
+       EfiGcdMemoryTypeMemoryMappedIo,
+       EfiGcdMemoryTypePersistent,
+       EfiGcdMemoryTypeMoreReliable,
+       EfiGcdMemoryTypeMaximum
+} efi_gcd_memory_type_t;
+
+typedef struct {
+       efi_physical_addr_t base_address;
+       u64 length;
+       u64 capabilities;
+       u64 attributes;
+       efi_gcd_memory_type_t gcd_memory_type;
+       void *image_handle;
+       void *device_handle;
+} efi_gcd_memory_space_desc_t;
+
+/*
+ * EFI DXE Services table
+ */
+union efi_dxe_services_table {
+       struct {
+               efi_table_hdr_t hdr;
+               void *add_memory_space;
+               void *allocate_memory_space;
+               void *free_memory_space;
+               void *remove_memory_space;
+               efi_status_t (__efiapi *get_memory_space_descriptor)(efi_physical_addr_t,
+                                                                    efi_gcd_memory_space_desc_t *);
+               efi_status_t (__efiapi *set_memory_space_attributes)(efi_physical_addr_t,
+                                                                    u64, u64);
+               void *get_memory_space_map;
+               void *add_io_space;
+               void *allocate_io_space;
+               void *free_io_space;
+               void *remove_io_space;
+               void *get_io_space_descriptor;
+               void *get_io_space_map;
+               void *dispatch;
+               void *schedule;
+               void *trust;
+               void *process_firmware_volume;
+               void *set_memory_space_capabilities;
+       };
+       struct {
+               efi_table_hdr_t hdr;
+               u32 add_memory_space;
+               u32 allocate_memory_space;
+               u32 free_memory_space;
+               u32 remove_memory_space;
+               u32 get_memory_space_descriptor;
+               u32 set_memory_space_attributes;
+               u32 get_memory_space_map;
+               u32 add_io_space;
+               u32 allocate_io_space;
+               u32 free_io_space;
+               u32 remove_io_space;
+               u32 get_io_space_descriptor;
+               u32 get_io_space_map;
+               u32 dispatch;
+               u32 schedule;
+               u32 trust;
+               u32 process_firmware_volume;
+               u32 set_memory_space_capabilities;
+       } mixed_mode;
+};
+
 typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
 
 union efi_uga_draw_protocol {
@@ -720,6 +794,13 @@ union efi_tcg2_protocol {
        } mixed_mode;
 };
 
+struct riscv_efi_boot_protocol {
+       u64 revision;
+
+       efi_status_t (__efiapi *get_boot_hartid)(struct riscv_efi_boot_protocol *,
+                                                unsigned long *boot_hartid);
+};
+
 typedef union efi_load_file_protocol efi_load_file_protocol_t;
 typedef union efi_load_file_protocol efi_load_file2_protocol_t;
 
@@ -865,7 +946,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image);
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle);
 
 asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint,
                                            unsigned long fdt_addr,
index 724155b9e10dcf84a44a836efed036574daaa728..715f374791542bb477bdbb9e613862c6c5b861c7 100644 (file)
@@ -56,6 +56,7 @@ efi_status_t efi_random_alloc(unsigned long size,
                              unsigned long random_seed)
 {
        unsigned long map_size, desc_size, total_slots = 0, target_slot;
+       unsigned long total_mirrored_slots = 0;
        unsigned long buff_size;
        efi_status_t status;
        efi_memory_desc_t *memory_map;
@@ -86,8 +87,14 @@ efi_status_t efi_random_alloc(unsigned long size,
                slots = get_entry_num_slots(md, size, ilog2(align));
                MD_NUM_SLOTS(md) = slots;
                total_slots += slots;
+               if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
+                       total_mirrored_slots += slots;
        }
 
+       /* consider only mirrored slots for randomization if any exist */
+       if (total_mirrored_slots > 0)
+               total_slots = total_mirrored_slots;
+
        /* find a random number between 0 and total_slots */
        target_slot = (total_slots * (u64)(random_seed & U32_MAX)) >> 32;
 
@@ -107,6 +114,10 @@ efi_status_t efi_random_alloc(unsigned long size,
                efi_physical_addr_t target;
                unsigned long pages;
 
+               if (total_mirrored_slots > 0 &&
+                   !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+                       continue;
+
                if (target_slot >= MD_NUM_SLOTS(md)) {
                        target_slot -= MD_NUM_SLOTS(md);
                        continue;
index 9c460843442f5ad56a8865a19088d7a2cb3dbcbd..9e85e58d1f27ff45c3a8c37599bd5ffe648e1a80 100644 (file)
@@ -21,9 +21,9 @@
 #define MIN_KIMG_ALIGN         SZ_4M
 #endif
 
-typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
+typedef void __noreturn (*jump_kernel_func)(unsigned long, unsigned long);
 
-static u32 hartid;
+static unsigned long hartid;
 
 static int get_boot_hartid_from_fdt(void)
 {
@@ -47,14 +47,31 @@ static int get_boot_hartid_from_fdt(void)
        return 0;
 }
 
+static efi_status_t get_boot_hartid_from_efi(void)
+{
+       efi_guid_t boot_protocol_guid = RISCV_EFI_BOOT_PROTOCOL_GUID;
+       struct riscv_efi_boot_protocol *boot_protocol;
+       efi_status_t status;
+
+       status = efi_bs_call(locate_protocol, &boot_protocol_guid, NULL,
+                            (void **)&boot_protocol);
+       if (status != EFI_SUCCESS)
+               return status;
+       return efi_call_proto(boot_protocol, get_boot_hartid, &hartid);
+}
+
 efi_status_t check_platform_features(void)
 {
+       efi_status_t status;
        int ret;
 
-       ret = get_boot_hartid_from_fdt();
-       if (ret) {
-               efi_err("/chosen/boot-hartid missing or invalid!\n");
-               return EFI_UNSUPPORTED;
+       status = get_boot_hartid_from_efi();
+       if (status != EFI_SUCCESS) {
+               ret = get_boot_hartid_from_fdt();
+               if (ret) {
+                       efi_err("Failed to get boot hartid!\n");
+                       return EFI_UNSUPPORTED;
+               }
        }
        return EFI_SUCCESS;
 }
@@ -80,7 +97,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image)
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle)
 {
        unsigned long kernel_size = 0;
        unsigned long preferred_addr;
index 01ddd4502e28a116f553ffb5b3383e76def9fded..b14e88ccefcab5cedeca8ad5d1ce68be98e7d4b0 100644 (file)
@@ -22,6 +22,7 @@
 #define MAXMEM_X86_64_4LEVEL (1ull << 46)
 
 const efi_system_table_t *efi_system_table;
+const efi_dxe_services_table_t *efi_dxe_table;
 extern u32 image_offset;
 static efi_loaded_image_t *image = NULL;
 
@@ -211,9 +212,110 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
        }
 }
 
+static void
+adjust_memory_range_protection(unsigned long start, unsigned long size)
+{
+       efi_status_t status;
+       efi_gcd_memory_space_desc_t desc;
+       unsigned long end, next;
+       unsigned long rounded_start, rounded_end;
+       unsigned long unprotect_start, unprotect_size;
+       int has_system_memory = 0;
+
+       if (efi_dxe_table == NULL)
+               return;
+
+       rounded_start = rounddown(start, EFI_PAGE_SIZE);
+       rounded_end = roundup(start + size, EFI_PAGE_SIZE);
+
+       /*
+        * Don't modify memory region attributes, they are
+        * already suitable, to lower the possibility to
+        * encounter firmware bugs.
+        */
+
+       for (end = start + size; start < end; start = next) {
+
+               status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
+
+               if (status != EFI_SUCCESS)
+                       return;
+
+               next = desc.base_address + desc.length;
+
+               /*
+                * Only system memory is suitable for trampoline/kernel image placement,
+                * so only this type of memory needs its attributes to be modified.
+                */
+
+               if (desc.gcd_memory_type != EfiGcdMemoryTypeSystemMemory ||
+                   (desc.attributes & (EFI_MEMORY_RO | EFI_MEMORY_XP)) == 0)
+                       continue;
+
+               unprotect_start = max(rounded_start, (unsigned long)desc.base_address);
+               unprotect_size = min(rounded_end, next) - unprotect_start;
+
+               status = efi_dxe_call(set_memory_space_attributes,
+                                     unprotect_start, unprotect_size,
+                                     EFI_MEMORY_WB);
+
+               if (status != EFI_SUCCESS) {
+                       efi_warn("Unable to unprotect memory range [%08lx,%08lx]: %d\n",
+                                unprotect_start,
+                                unprotect_start + unprotect_size,
+                                (int)status);
+               }
+       }
+}
+
+/*
+ * Trampoline takes 2 pages and can be loaded in first megabyte of memory
+ * with its end placed between 128k and 640k where BIOS might start.
+ * (see arch/x86/boot/compressed/pgtable_64.c)
+ *
+ * We cannot find exact trampoline placement since memory map
+ * can be modified by UEFI, and it can alter the computed address.
+ */
+
+#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
+#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
+
+void startup_32(struct boot_params *boot_params);
+
+static void
+setup_memory_protection(unsigned long image_base, unsigned long image_size)
+{
+       /*
+        * Allow execution of possible trampoline used
+        * for switching between 4- and 5-level page tables
+        * and relocated kernel image.
+        */
+
+       adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
+                                      TRAMPOLINE_PLACEMENT_SIZE);
+
+#ifdef CONFIG_64BIT
+       if (image_base != (unsigned long)startup_32)
+               adjust_memory_range_protection(image_base, image_size);
+#else
+       /*
+        * Clear protection flags on a whole range of possible
+        * addresses used for KASLR. We don't need to do that
+        * on x86_64, since KASLR/extraction is performed after
+        * dedicated identity page tables are built and we only
+        * need to remove possible protection on relocated image
+        * itself disregarding further relocations.
+        */
+       adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
+                                      KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
+#endif
+}
+
 static const efi_char16_t apple[] = L"Apple";
 
-static void setup_quirks(struct boot_params *boot_params)
+static void setup_quirks(struct boot_params *boot_params,
+                        unsigned long image_base,
+                        unsigned long image_size)
 {
        efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
                efi_table_attr(efi_system_table, fw_vendor);
@@ -222,6 +324,9 @@ static void setup_quirks(struct boot_params *boot_params)
                if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
                        retrieve_apple_device_properties(boot_params);
        }
+
+       if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
+               setup_memory_protection(image_base, image_size);
 }
 
 /*
@@ -341,8 +446,6 @@ static void __noreturn efi_exit(efi_handle_t handle, efi_status_t status)
                asm("hlt");
 }
 
-void startup_32(struct boot_params *boot_params);
-
 void __noreturn efi_stub_entry(efi_handle_t handle,
                               efi_system_table_t *sys_table_arg,
                               struct boot_params *boot_params);
@@ -677,11 +780,17 @@ unsigned long efi_main(efi_handle_t handle,
        efi_status_t status;
 
        efi_system_table = sys_table_arg;
-
        /* Check if we were booted by the EFI firmware */
        if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
                efi_exit(handle, EFI_INVALID_PARAMETER);
 
+       efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+       if (efi_dxe_table &&
+           efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+               efi_warn("Ignoring DXE services table: invalid signature\n");
+               efi_dxe_table = NULL;
+       }
+
        /*
         * If the kernel isn't already loaded at a suitable address,
         * relocate it.
@@ -791,7 +900,7 @@ unsigned long efi_main(efi_handle_t handle,
 
        setup_efi_pci(boot_params);
 
-       setup_quirks(boot_params);
+       setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
 
        status = exit_boot(boot_params, handle);
        if (status != EFI_SUCCESS) {
index b8b1473a5b1e9670c0d19027965ac89e440c6c85..f87ff3fa8a531d27ac537d7f1a3eb4dd894dcad2 100644 (file)
@@ -178,3 +178,22 @@ discussed but the idea is to provide a low-level access point
 for debugging and hacking and to expose all lines without the
 need of any exporting. Also provide ample ammunition to shoot
 oneself in the foot, because this is debugfs after all.
+
+
+Moving over to immutable irq_chip structures
+
+Most of the gpio chips implementing interrupt support rely on gpiolib
+intercepting some of the irq_chip callbacks, preventing the structures
+from being made read-only and forcing duplication of structures that
+should otherwise be unique.
+
+The solution is to call into the gpiolib code when needed (resource
+management, enable/disable or unmask/mask callbacks), and to let the
+core code know about that by exposing a flag (IRQCHIP_IMMUTABLE) in
+the irq_chip structure. The irq_chip structure can then be made unique
+and const.
+
+A small number of drivers have been converted (pl061, tegra186, msm,
+amd, apple), and can be used as examples of how to proceed with this
+conversion. Note that drivers using the generic irqchip framework
+cannot be converted yet, but watch this space!
index a2c8dd329b31b9307cf24cdc93a63a815f372d2b..2db19cd640a43a4265975e06466d0556dc8ebcaa 100644 (file)
@@ -707,6 +707,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        unsigned long flags;
        unsigned int on, off;
 
+       if (state->polarity != PWM_POLARITY_NORMAL)
+               return -EINVAL;
+
        val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle;
        do_div(val, NSEC_PER_SEC);
        if (val > UINT_MAX + 1ULL)
index 4ecab700f23f6cc43c79a0bb05a0f1844e499faa..6464056cb6ae523a232e4c1f433110f5ebadaf1c 100644 (file)
@@ -52,7 +52,6 @@ struct pl061 {
 
        void __iomem            *base;
        struct gpio_chip        gc;
-       struct irq_chip         irq_chip;
        int                     parent_irq;
 
 #ifdef CONFIG_PM
@@ -241,6 +240,8 @@ static void pl061_irq_mask(struct irq_data *d)
        gpioie = readb(pl061->base + GPIOIE) & ~mask;
        writeb(gpioie, pl061->base + GPIOIE);
        raw_spin_unlock(&pl061->lock);
+
+       gpiochip_disable_irq(gc, d->hwirq);
 }
 
 static void pl061_irq_unmask(struct irq_data *d)
@@ -250,6 +251,8 @@ static void pl061_irq_unmask(struct irq_data *d)
        u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR);
        u8 gpioie;
 
+       gpiochip_enable_irq(gc, d->hwirq);
+
        raw_spin_lock(&pl061->lock);
        gpioie = readb(pl061->base + GPIOIE) | mask;
        writeb(gpioie, pl061->base + GPIOIE);
@@ -283,6 +286,24 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned int state)
        return irq_set_irq_wake(pl061->parent_irq, state);
 }
 
+static void pl061_irq_print_chip(struct irq_data *data, struct seq_file *p)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+       seq_printf(p, dev_name(gc->parent));
+}
+
+static const struct irq_chip pl061_irq_chip = {
+       .irq_ack                = pl061_irq_ack,
+       .irq_mask               = pl061_irq_mask,
+       .irq_unmask             = pl061_irq_unmask,
+       .irq_set_type           = pl061_irq_type,
+       .irq_set_wake           = pl061_irq_set_wake,
+       .irq_print_chip         = pl061_irq_print_chip,
+       .flags                  = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
 static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
 {
        struct device *dev = &adev->dev;
@@ -315,13 +336,6 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
        /*
         * irq_chip support
         */
-       pl061->irq_chip.name = dev_name(dev);
-       pl061->irq_chip.irq_ack = pl061_irq_ack;
-       pl061->irq_chip.irq_mask = pl061_irq_mask;
-       pl061->irq_chip.irq_unmask = pl061_irq_unmask;
-       pl061->irq_chip.irq_set_type = pl061_irq_type;
-       pl061->irq_chip.irq_set_wake = pl061_irq_set_wake;
-
        writeb(0, pl061->base + GPIOIE); /* disable irqs */
        irq = adev->irq[0];
        if (!irq)
@@ -329,7 +343,7 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
        pl061->parent_irq = irq;
 
        girq = &pl061->gc.irq;
-       girq->chip = &pl061->irq_chip;
+       gpio_irq_chip_set_chip(girq, &pl061_irq_chip);
        girq->parent_handler = pl061_irq_handler;
        girq->num_parents = 1;
        girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
index 031fe105b58ed34a2d54f381eef189093fcfc078..84c4f1e9fb0c580a24c87ff320c6f5ccdceb3326 100644 (file)
@@ -80,7 +80,6 @@ struct tegra_gpio_soc {
 
 struct tegra_gpio {
        struct gpio_chip gpio;
-       struct irq_chip intc;
        unsigned int num_irq;
        unsigned int *irq;
 
@@ -372,6 +371,8 @@ static void tegra186_irq_mask(struct irq_data *data)
        value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG);
        value &= ~TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT;
        writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG);
+
+       gpiochip_disable_irq(&gpio->gpio, data->hwirq);
 }
 
 static void tegra186_irq_unmask(struct irq_data *data)
@@ -385,6 +386,8 @@ static void tegra186_irq_unmask(struct irq_data *data)
        if (WARN_ON(base == NULL))
                return;
 
+       gpiochip_enable_irq(&gpio->gpio, data->hwirq);
+
        value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG);
        value |= TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT;
        writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG);
@@ -456,6 +459,24 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on)
        return 0;
 }
 
+static void tegra186_irq_print_chip(struct irq_data *data, struct seq_file *p)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+       seq_printf(p, dev_name(gc->parent));
+}
+
+static const struct irq_chip tegra186_gpio_irq_chip = {
+       .irq_ack                = tegra186_irq_ack,
+       .irq_mask               = tegra186_irq_mask,
+       .irq_unmask             = tegra186_irq_unmask,
+       .irq_set_type           = tegra186_irq_set_type,
+       .irq_set_wake           = tegra186_irq_set_wake,
+       .irq_print_chip         = tegra186_irq_print_chip,
+       .flags                  = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
 static void tegra186_gpio_irq(struct irq_desc *desc)
 {
        struct tegra_gpio *gpio = irq_desc_get_handler_data(desc);
@@ -760,15 +781,8 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
        gpio->gpio.of_xlate = tegra186_gpio_of_xlate;
 #endif /* CONFIG_OF_GPIO */
 
-       gpio->intc.name = dev_name(&pdev->dev);
-       gpio->intc.irq_ack = tegra186_irq_ack;
-       gpio->intc.irq_mask = tegra186_irq_mask;
-       gpio->intc.irq_unmask = tegra186_irq_unmask;
-       gpio->intc.irq_set_type = tegra186_irq_set_type;
-       gpio->intc.irq_set_wake = tegra186_irq_set_wake;
-
        irq = &gpio->gpio.irq;
-       irq->chip = &gpio->intc;
+       gpio_irq_chip_set_chip(irq, &tegra186_gpio_irq_chip);
        irq->fwnode = of_node_to_fwnode(pdev->dev.of_node);
        irq->child_to_parent_hwirq = tegra186_gpio_child_to_parent_hwirq;
        irq->populate_parent_alloc_arg = tegra186_gpio_populate_parent_fwspec;
index 20780c35da1b4150fe0c94ad3901fcf1f8b0a8d7..23cddb265a0dc3b6d5b6ed9108496ab6449dc209 100644 (file)
@@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
 {
        struct vf610_gpio_port *port = gpiochip_get_data(chip);
        unsigned long mask = BIT(gpio);
+       u32 val;
 
-       if (port->sdata && port->sdata->have_paddr)
-               vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR);
+       if (port->sdata && port->sdata->have_paddr) {
+               val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR);
+               val |= mask;
+               vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR);
+       }
 
        vf610_gpio_set(chip, gpio, value);
 
index b7694171655cfd00870e2c078fbc7d977fdef653..690035124faa3522fcee7a9d32ffa50166bcc767 100644 (file)
@@ -1433,19 +1433,21 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
        return irq_create_mapping(domain, offset);
 }
 
-static int gpiochip_irq_reqres(struct irq_data *d)
+int gpiochip_irq_reqres(struct irq_data *d)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 
        return gpiochip_reqres_irq(gc, d->hwirq);
 }
+EXPORT_SYMBOL(gpiochip_irq_reqres);
 
-static void gpiochip_irq_relres(struct irq_data *d)
+void gpiochip_irq_relres(struct irq_data *d)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 
        gpiochip_relres_irq(gc, d->hwirq);
 }
+EXPORT_SYMBOL(gpiochip_irq_relres);
 
 static void gpiochip_irq_mask(struct irq_data *d)
 {
@@ -1485,6 +1487,11 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc)
 {
        struct irq_chip *irqchip = gc->irq.chip;
 
+       if (irqchip->flags & IRQCHIP_IMMUTABLE)
+               return;
+
+       chip_warn(gc, "not an immutable chip, please consider fixing it!\n");
+
        if (!irqchip->irq_request_resources &&
            !irqchip->irq_release_resources) {
                irqchip->irq_request_resources = gpiochip_irq_reqres;
@@ -1652,7 +1659,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
                irq_domain_remove(gc->irq.domain);
        }
 
-       if (irqchip) {
+       if (irqchip && !(irqchip->flags & IRQCHIP_IMMUTABLE)) {
                if (irqchip->irq_request_resources == gpiochip_irq_reqres) {
                        irqchip->irq_request_resources = NULL;
                        irqchip->irq_release_resources = NULL;
index cdf0818088b3de86633cab519473213fc5c6310a..7606e3b6361eaa18ce76b17dbc625845f5d18ba2 100644 (file)
@@ -1342,9 +1342,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
 bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
 #else
 static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
 static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
 #endif
 
index 0e12315fa0cb87c643426bda0fa3a8f09feb3820..98ac53ee6bb55f0d9fdda1ceeeb7c0efc73a0567 100644 (file)
@@ -1045,6 +1045,20 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
                (pm_suspend_target_state == PM_SUSPEND_MEM);
 }
 
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+       if (adev->flags & AMD_IS_APU)
+               return false;
+       return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+}
+
 /**
  * amdgpu_acpi_is_s0ix_active
  *
index 8f0e6d93bb9cb9ed1c340c27be931262f50400a4..c317078d1afd0d3e7cab7e8c9251e94c6f21f483 100644 (file)
@@ -296,6 +296,7 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
 {
        struct amdgpu_device *adev = ctx->adev;
        enum amd_dpm_forced_level level;
+       u32 current_stable_pstate;
        int r;
 
        mutex_lock(&adev->pm.stable_pstate_ctx_lock);
@@ -304,6 +305,10 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
                goto done;
        }
 
+       r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+       if (r || (stable_pstate == current_stable_pstate))
+               goto done;
+
        switch (stable_pstate) {
        case AMDGPU_CTX_STABLE_PSTATE_NONE:
                level = AMD_DPM_FORCED_LEVEL_AUTO;
index 7fd0277b280512b75d817e15be71ab133370fc30..46ef57b07c151743d6cee9bab8325bfe75a5834f 100644 (file)
@@ -2336,7 +2336,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
        struct drm_device *drm_dev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(drm_dev);
 
-       if (!adev->in_s0ix)
+       if (amdgpu_acpi_should_gpu_reset(adev))
                return amdgpu_asic_reset(adev);
 
        return 0;
index 039b90cdc3bcac9f22167d7d25489a3ffa113535..45f0188c42739b7d9a80803848af12e6f55929b2 100644 (file)
 #include "mxgpu_vi.h"
 #include "amdgpu_dm.h"
 
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
 #define ixPCIE_LC_L1_PM_SUBSTATE       0x100100C6
 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK       0x00000001L
 #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK   0x00000002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
                WREG32_PCIE(ixPCIE_LC_CNTL, data);
 }
 
+static bool aspm_support_quirk_check(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+       struct cpuinfo_x86 *c = &cpu_data(0);
+
+       return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+       return true;
+#endif
+}
+
 static void vi_program_aspm(struct amdgpu_device *adev)
 {
        u32 data, data1, orig;
        bool bL1SS = false;
        bool bClkReqSupport = true;
 
-       if (!amdgpu_device_should_use_aspm(adev))
+       if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
                return;
 
        if (adev->flags & AMD_IS_APU ||
index d7559e5a99ce8282d292934e1bfbe549692be2eb..e708f07fe75af1b40d45c66a841a194f017fad4d 100644 (file)
@@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc)
                dc->hwss.init_hw = dcn20_fpga_init_hw;
                dc->hwseq->funcs.init_pipes = NULL;
        }
-       if (dc->debug.disable_z10) {
-               /*hw not support z10 or sw disable it*/
-               dc->hwss.z10_restore = NULL;
-               dc->hwss.z10_save_init = NULL;
-       }
 }
index f1544755d8b469539cd3c4a20b2e6f635f8da800..f10a0256413e6461b228cd8e454588daaa599ad6 100644 (file)
@@ -1351,14 +1351,8 @@ static int smu_disable_dpms(struct smu_context *smu)
 {
        struct amdgpu_device *adev = smu->adev;
        int ret = 0;
-       /*
-        * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair
-        * the workaround which always reset the asic in suspend.
-        * It's likely that workaround will be dropped in the future.
-        * Then the change here should be dropped together.
-        */
        bool use_baco = !smu->is_apu &&
-               (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) &&
+               ((amdgpu_in_reset(adev) &&
                  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
                 ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev)));
 
index 11300b53d24fcada3535881926b1eb886435c537..7a7cc44686f9749584d1e0e34c7540d0c210d355 100644 (file)
@@ -4852,6 +4852,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr,
 
        mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port);
        drm_edid_get_monitor_name(mst_edid, name, namelen);
+       kfree(mst_edid);
 }
 
 /**
index 7616a3906b9ec440b0bf648dc10a79ebf2da246d..1b774dcfb28194c9516dae35f10c2a4b3582a72f 100644 (file)
@@ -367,6 +367,44 @@ static void dmc_set_fw_offset(struct intel_dmc *dmc,
        }
 }
 
+static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc,
+                                      const u32 *mmioaddr, u32 mmio_count,
+                                      int header_ver, u8 dmc_id)
+{
+       struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc);
+       u32 start_range, end_range;
+       int i;
+
+       if (dmc_id >= DMC_FW_MAX) {
+               drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id);
+               return false;
+       }
+
+       if (header_ver == 1) {
+               start_range = DMC_MMIO_START_RANGE;
+               end_range = DMC_MMIO_END_RANGE;
+       } else if (dmc_id == DMC_FW_MAIN) {
+               start_range = TGL_MAIN_MMIO_START;
+               end_range = TGL_MAIN_MMIO_END;
+       } else if (DISPLAY_VER(i915) >= 13) {
+               start_range = ADLP_PIPE_MMIO_START;
+               end_range = ADLP_PIPE_MMIO_END;
+       } else if (DISPLAY_VER(i915) >= 12) {
+               start_range = TGL_PIPE_MMIO_START(dmc_id);
+               end_range = TGL_PIPE_MMIO_END(dmc_id);
+       } else {
+               drm_warn(&i915->drm, "Unknown mmio range for sanity check");
+               return false;
+       }
+
+       for (i = 0; i < mmio_count; i++) {
+               if (mmioaddr[i] < start_range || mmioaddr[i] > end_range)
+                       return false;
+       }
+
+       return true;
+}
+
 static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
                               const struct intel_dmc_header_base *dmc_header,
                               size_t rem_size, u8 dmc_id)
@@ -436,6 +474,12 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
                return 0;
        }
 
+       if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count,
+                                       dmc_header->header_ver, dmc_id)) {
+               drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n");
+               return 0;
+       }
+
        for (i = 0; i < mmio_count; i++) {
                dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
                dmc_info->mmiodata[i] = mmiodata[i];
index d42f437149c953577390f6efff9a60bdb2c62926..6ca8929cf6e128634e38fe5e136cfae98810823e 100644 (file)
@@ -1252,14 +1252,12 @@ static void *reloc_iomap(struct i915_vma *batch,
                 * Only attempt to pin the batch buffer to ggtt if the current batch
                 * is not inside ggtt, or the batch buffer is not misplaced.
                 */
-               if (!i915_is_ggtt(batch->vm)) {
+               if (!i915_is_ggtt(batch->vm) ||
+                   !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
                        vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
                                                          PIN_MAPPABLE |
                                                          PIN_NONBLOCK /* NOWARN */ |
                                                          PIN_NOEVICT);
-               } else if (i915_vma_is_map_and_fenceable(batch)) {
-                       __i915_vma_pin(batch);
-                       vma = batch;
                }
 
                if (vma == ERR_PTR(-EDEADLK))
index 82713264b96c11bec1031f656f5315b677cc746d..b7c6d4462ec5555deed48e0b11f7f0516e1b8b2d 100644 (file)
@@ -806,7 +806,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
                __intel_engine_reset(engine, stalled_mask & engine->mask);
        local_bh_enable();
 
-       intel_uc_reset(&gt->uc, true);
+       intel_uc_reset(&gt->uc, ALL_ENGINES);
 
        intel_ggtt_restore_fences(gt->ggtt);
 
index bf7079480d472dd9a041b893311c0a825ef46ae3..2488d1197f3e5c5236679d072025cabb955768cb 100644 (file)
@@ -438,7 +438,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc);
 void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq);
 
 void intel_guc_submission_reset_prepare(struct intel_guc *guc);
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled);
 void intel_guc_submission_reset_finish(struct intel_guc *guc);
 void intel_guc_submission_cancel_requests(struct intel_guc *guc);
 
index 1ce7e04aa837bab4c1fe894bbfb0613e97883123..28f9aac0201ddfa69b61e741b312c09b5c4340a9 100644 (file)
@@ -1590,9 +1590,9 @@ __unwind_incomplete_requests(struct intel_context *ce)
        spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
-static void __guc_reset_context(struct intel_context *ce, bool stalled)
+static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
 {
-       bool local_stalled;
+       bool guilty;
        struct i915_request *rq;
        unsigned long flags;
        u32 head;
@@ -1620,7 +1620,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
                if (!intel_context_is_pinned(ce))
                        goto next_context;
 
-               local_stalled = false;
+               guilty = false;
                rq = intel_context_find_active_request(ce);
                if (!rq) {
                        head = ce->ring->tail;
@@ -1628,14 +1628,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
                }
 
                if (i915_request_started(rq))
-                       local_stalled = true;
+                       guilty = stalled & ce->engine->mask;
 
                GEM_BUG_ON(i915_active_is_idle(&ce->active));
                head = intel_ring_wrap(ce->ring, rq->head);
 
-               __i915_request_reset(rq, local_stalled && stalled);
+               __i915_request_reset(rq, guilty);
 out_replay:
-               guc_reset_state(ce, head, local_stalled && stalled);
+               guc_reset_state(ce, head, guilty);
 next_context:
                if (i != number_children)
                        ce = list_next_entry(ce, parallel.child_link);
@@ -1645,7 +1645,7 @@ next_context:
        intel_context_put(parent);
 }
 
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
 {
        struct intel_context *ce;
        unsigned long index;
@@ -4013,7 +4013,7 @@ static void guc_context_replay(struct intel_context *ce)
 {
        struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
 
-       __guc_reset_context(ce, true);
+       __guc_reset_context(ce, ce->engine->mask);
        tasklet_hi_schedule(&sched_engine->tasklet);
 }
 
index da199aa6989fba35fcc129f48f8d86d5dd886ed7..8eb34de2f20c03612d030d4684a79faa1a223df5 100644 (file)
@@ -593,7 +593,7 @@ sanitize:
        __uc_sanitize(uc);
 }
 
-void intel_uc_reset(struct intel_uc *uc, bool stalled)
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
 {
        struct intel_guc *guc = &uc->guc;
 
index 866b462821c005314b771ce48d3c736cb10c6d3d..a8f38c2c60e23f2d5bb298aaead52a6214298032 100644 (file)
@@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
 void intel_uc_driver_remove(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
-void intel_uc_reset(struct intel_uc *uc, bool stalled);
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
 void intel_uc_reset_finish(struct intel_uc *uc);
 void intel_uc_cancel_requests(struct intel_uc *uc);
 void intel_uc_suspend(struct intel_uc *uc);
index a9354f8f110d8f1c2f029c3bad9fce9fca7a3a8b..fe960c2043621dfc0756e2baa45537afc9718c25 100644 (file)
 /* MMIO address range for DMC program (0x80000 - 0x82FFF) */
 #define DMC_MMIO_START_RANGE   0x80000
 #define DMC_MMIO_END_RANGE     0x8FFFF
+#define DMC_V1_MMIO_START_RANGE        0x80000
+#define TGL_MAIN_MMIO_START    0x8F000
+#define TGL_MAIN_MMIO_END      0x8FFFF
+#define _TGL_PIPEA_MMIO_START  0x92000
+#define _TGL_PIPEA_MMIO_END    0x93FFF
+#define _TGL_PIPEB_MMIO_START  0x96000
+#define _TGL_PIPEB_MMIO_END    0x97FFF
+#define ADLP_PIPE_MMIO_START   0x5F000
+#define ADLP_PIPE_MMIO_END     0x5FFFF
+
+#define TGL_PIPE_MMIO_START(dmc_id)    _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\
+                                               _TGL_PIPEB_MMIO_START)
+
+#define TGL_PIPE_MMIO_END(dmc_id)      _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\
+                                               _TGL_PIPEB_MMIO_END)
+
 #define SKL_DMC_DC3_DC5_COUNT  _MMIO(0x80030)
 #define SKL_DMC_DC5_DC6_COUNT  _MMIO(0x8002C)
 #define BXT_DMC_DC3_DC5_COUNT  _MMIO(0x80038)
index 94fcdb7bd21d3c8e39642132768b6ded8b9e5a9e..eeaa8d0d0407572e8a0b0e7ee5ae99a0e5a1df8d 100644 (file)
@@ -1605,17 +1605,17 @@ void i915_vma_close(struct i915_vma *vma)
 
 static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-       struct intel_gt *gt = vma->vm->gt;
-
-       spin_lock_irq(&gt->closed_lock);
        list_del_init(&vma->closed_link);
-       spin_unlock_irq(&gt->closed_lock);
 }
 
 void i915_vma_reopen(struct i915_vma *vma)
 {
+       struct intel_gt *gt = vma->vm->gt;
+
+       spin_lock_irq(&gt->closed_lock);
        if (i915_vma_is_closed(vma))
                __i915_vma_remove_closed(vma);
+       spin_unlock_irq(&gt->closed_lock);
 }
 
 void i915_vma_release(struct kref *ref)
@@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma)
 static void release_references(struct i915_vma *vma)
 {
        struct drm_i915_gem_object *obj = vma->obj;
+       struct intel_gt *gt = vma->vm->gt;
 
        GEM_BUG_ON(i915_vma_is_active(vma));
 
@@ -1650,7 +1651,9 @@ static void release_references(struct i915_vma *vma)
                rb_erase(&vma->obj_node, &obj->vma.tree);
        spin_unlock(&obj->vma.lock);
 
+       spin_lock_irq(&gt->closed_lock);
        __i915_vma_remove_closed(vma);
+       spin_unlock_irq(&gt->closed_lock);
 
        __i915_vma_put(vma);
 }
index daf9f87477ba13cf3e6c1c3bdd24ff027a42a115..a2141d3d9b1d2bbc3048f6e397a358a915b1058d 100644 (file)
@@ -46,8 +46,9 @@ static bool
 nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE],
                           struct nouveau_backlight *bl)
 {
-       const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL);
-       if (nb < 0 || nb >= 100)
+       const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL);
+
+       if (nb < 0)
                return false;
        if (nb > 0)
                snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb);
@@ -414,7 +415,7 @@ nouveau_backlight_init(struct drm_connector *connector)
                                            nv_encoder, ops, &props);
        if (IS_ERR(bl->dev)) {
                if (bl->id >= 0)
-                       ida_simple_remove(&bl_ida, bl->id);
+                       ida_free(&bl_ida, bl->id);
                ret = PTR_ERR(bl->dev);
                goto fail_alloc;
        }
@@ -442,7 +443,7 @@ nouveau_backlight_fini(struct drm_connector *connector)
                return;
 
        if (bl->id >= 0)
-               ida_simple_remove(&bl_ida, bl->id);
+               ida_free(&bl_ida, bl->id);
 
        backlight_device_unregister(bl->dev);
        nv_conn->backlight = NULL;
index 992cc285f2fecfb7c126fdc59e107e4cb415a904..2ed528c065fae6ba09d801a1401aa72fd0fa505d 100644 (file)
@@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 
        mutex_init(&tdev->iommu.mutex);
 
-       if (iommu_present(&platform_bus_type)) {
+       if (device_iommu_mapped(dev)) {
                tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
                if (!tdev->iommu.domain)
                        goto error;
index 6c58b0fd13fbb002cdecbb10618493b6e6ee8eb5..98b78ec6b37d6d9ed49acff047eb6ad614720669 100644 (file)
@@ -38,6 +38,7 @@
 #include <drm/drm_scdc_helper.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/of_address.h>
 #include <linux/of_gpio.h>
index a3bfbb6c3e14aa30a6431293c46252294b5668ac..162dfeb1cc5ada1bd580cf5bf40c3afbd72b7e5c 100644 (file)
@@ -528,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
                *seqno = atomic_add_return(1, &dev_priv->marker_seq);
        } while (*seqno == 0);
 
-       if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) {
+       if (!vmw_has_fences(dev_priv)) {
 
                /*
                 * Don't request hardware to send a fence. The
@@ -675,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv,
  */
 bool vmw_cmd_supported(struct vmw_private *vmw)
 {
-       if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
-                                 SVGA_CAP_CMD_BUFFERS_2)) != 0)
-               return true;
+       bool has_cmdbufs =
+               (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+                                     SVGA_CAP_CMD_BUFFERS_2)) != 0;
+       if (vmw_is_svga_v3(vmw))
+               return (has_cmdbufs &&
+                       (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0);
        /*
         * We have FIFO cmd's
         */
-       return vmw->fifo_mem != NULL;
+       return has_cmdbufs || vmw->fifo_mem != NULL;
 }
index ea3ecdda561dc3fe9a2e565e1273b167f138edb7..6de0b9ef5c7734bc58f852ddb2ac0982923d5386 100644 (file)
@@ -1679,4 +1679,12 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw,
                outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT);
 }
 
+static inline bool vmw_has_fences(struct vmw_private *vmw)
+{
+       if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+                                 SVGA_CAP_CMD_BUFFERS_2)) != 0)
+               return true;
+       return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
+}
+
 #endif
index 8ee34576c7d08ac66632beadf513ecbc4c811fba..adf17c740656d72f0c8752095b583977f1a0a7db 100644 (file)
@@ -483,7 +483,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par,
 
 static int vmw_fb_kms_framebuffer(struct fb_info *info)
 {
-       struct drm_mode_fb_cmd2 mode_cmd;
+       struct drm_mode_fb_cmd2 mode_cmd = {0};
        struct vmw_fb_par *par = info->par;
        struct fb_var_screeninfo *var = &info->var;
        struct drm_framebuffer *cur_fb;
index 59d6a2dd4c2e41e0eb7084afb0c4ae4ef5c711c6..66cc35dc223e709089e5d2b4051658bc1a335d9d 100644 (file)
@@ -82,6 +82,22 @@ fman_from_fence(struct vmw_fence_obj *fence)
        return container_of(fence->base.lock, struct vmw_fence_manager, lock);
 }
 
+static u32 vmw_fence_goal_read(struct vmw_private *vmw)
+{
+       if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+               return vmw_read(vmw, SVGA_REG_FENCE_GOAL);
+       else
+               return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL);
+}
+
+static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value)
+{
+       if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+               vmw_write(vmw, SVGA_REG_FENCE_GOAL, value);
+       else
+               vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value);
+}
+
 /*
  * Note on fencing subsystem usage of irqs:
  * Typically the vmw_fences_update function is called
@@ -392,7 +408,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
        if (likely(!fman->seqno_valid))
                return false;
 
-       goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
+       goal_seqno = vmw_fence_goal_read(fman->dev_priv);
        if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
                return false;
 
@@ -400,9 +416,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
        list_for_each_entry(fence, &fman->fence_list, head) {
                if (!list_empty(&fence->seq_passed_actions)) {
                        fman->seqno_valid = true;
-                       vmw_fifo_mem_write(fman->dev_priv,
-                                          SVGA_FIFO_FENCE_GOAL,
-                                          fence->base.seqno);
+                       vmw_fence_goal_write(fman->dev_priv,
+                                            fence->base.seqno);
                        break;
                }
        }
@@ -434,13 +449,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
        if (dma_fence_is_signaled_locked(&fence->base))
                return false;
 
-       goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
+       goal_seqno = vmw_fence_goal_read(fman->dev_priv);
        if (likely(fman->seqno_valid &&
                   goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
                return false;
 
-       vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL,
-                          fence->base.seqno);
+       vmw_fence_goal_write(fman->dev_priv, fence->base.seqno);
        fman->seqno_valid = true;
 
        return true;
index c5191de365ca1c1e5268654817113974372f565d..fe4732bf2c9d23a4d22cf856b46031455d9e1ba4 100644 (file)
 
 #define VMW_FENCE_WRAP (1 << 24)
 
+static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw)
+{
+       if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+               return SVGA_IRQFLAG_REG_FENCE_GOAL;
+       else
+               return SVGA_IRQFLAG_FENCE_GOAL;
+}
+
 /**
  * vmw_thread_fn - Deferred (process context) irq handler
  *
@@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg)
                wake_up_all(&dev_priv->fifo_queue);
 
        if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE |
-                             SVGA_IRQFLAG_FENCE_GOAL)) &&
+                             vmw_irqflag_fence_goal(dev_priv))) &&
            !test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending))
                ret = IRQ_WAKE_THREAD;
 
@@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv,
        if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
                return true;
 
-       if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) &&
-           vmw_fifo_idle(dev_priv, seqno))
+       if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno))
                return true;
 
        /**
@@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
                      unsigned long timeout)
 {
        struct vmw_fifo_state *fifo_state = dev_priv->fifo;
+       bool fifo_down = false;
 
        uint32_t count = 0;
        uint32_t signal_seq;
@@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
         */
 
        if (fifo_idle) {
-               down_read(&fifo_state->rwsem);
                if (dev_priv->cman) {
                        ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible,
                                              10*HZ);
                        if (ret)
                                goto out_err;
+               } else if (fifo_state) {
+                       down_read(&fifo_state->rwsem);
+                       fifo_down = true;
                }
        }
 
@@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
                }
        }
        finish_wait(&dev_priv->fence_queue, &__wait);
-       if (ret == 0 && fifo_idle)
+       if (ret == 0 && fifo_idle && fifo_state)
                vmw_fence_write(dev_priv, signal_seq);
 
        wake_up_all(&dev_priv->fence_queue);
 out_err:
-       if (fifo_idle)
+       if (fifo_down)
                up_read(&fifo_state->rwsem);
 
        return ret;
@@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
 
 void vmw_goal_waiter_add(struct vmw_private *dev_priv)
 {
-       vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+       vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv),
                               &dev_priv->goal_queue_waiters);
 }
 
 void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
 {
-       vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+       vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv),
                                  &dev_priv->goal_queue_waiters);
 }
 
index bbd2f4ec08ec13f95e718d6434b3d11664f6f8f2..93431e8f6606014ef33f660ed3879383385a4065 100644 (file)
@@ -1344,7 +1344,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
                ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
                                                      mode_cmd,
                                                      is_bo_proxy);
-
                /*
                 * vmw_create_bo_proxy() adds a reference that is no longer
                 * needed
@@ -1385,13 +1384,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
        ret = vmw_user_lookup_handle(dev_priv, file_priv,
                                     mode_cmd->handles[0],
                                     &surface, &bo);
-       if (ret)
+       if (ret) {
+               DRM_ERROR("Invalid buffer object handle %u (0x%x).\n",
+                         mode_cmd->handles[0], mode_cmd->handles[0]);
                goto err_out;
+       }
 
 
        if (!bo &&
            !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) {
-               DRM_ERROR("Surface size cannot exceed %dx%d",
+               DRM_ERROR("Surface size cannot exceed %dx%d\n",
                        dev_priv->texture_max_width,
                        dev_priv->texture_max_height);
                goto err_out;
index cefafe859aba3f7c628a9a6b59fea4b856af14fd..a987c78abe412b0effd54b7c54f796dd24dda99a 100644 (file)
@@ -80,15 +80,6 @@ struct drm_pending_vblank_event;
 /* timeout in ms to wait for backend to respond */
 #define XEN_DRM_FRONT_WAIT_BACK_MS     3000
 
-#ifndef GRANT_INVALID_REF
-/*
- * Note on usage of grant reference 0 as invalid grant reference:
- * grant reference 0 is valid, but never exposed to a PV driver,
- * because of the fact it is already in use/reserved by the PV console.
- */
-#define GRANT_INVALID_REF      0
-#endif
-
 struct xen_drm_front_info {
        struct xenbus_device *xb_dev;
        struct xen_drm_front_drm_info *drm_info;
index 08b526eeec168e3a71a09df993d297cd1d84f2d6..e52afd792346e557065fdb325ad080dec31c08f0 100644 (file)
@@ -123,12 +123,12 @@ out:
 static void evtchnl_free(struct xen_drm_front_info *front_info,
                         struct xen_drm_front_evtchnl *evtchnl)
 {
-       unsigned long page = 0;
+       void *page = NULL;
 
        if (evtchnl->type == EVTCHNL_TYPE_REQ)
-               page = (unsigned long)evtchnl->u.req.ring.sring;
+               page = evtchnl->u.req.ring.sring;
        else if (evtchnl->type == EVTCHNL_TYPE_EVT)
-               page = (unsigned long)evtchnl->u.evt.page;
+               page = evtchnl->u.evt.page;
        if (!page)
                return;
 
@@ -147,8 +147,7 @@ static void evtchnl_free(struct xen_drm_front_info *front_info,
                xenbus_free_evtchn(front_info->xb_dev, evtchnl->port);
 
        /* end access and free the page */
-       if (evtchnl->gref != GRANT_INVALID_REF)
-               gnttab_end_foreign_access(evtchnl->gref, page);
+       xenbus_teardown_ring(&page, 1, &evtchnl->gref);
 
        memset(evtchnl, 0, sizeof(*evtchnl));
 }
@@ -158,8 +157,7 @@ static int evtchnl_alloc(struct xen_drm_front_info *front_info, int index,
                         enum xen_drm_front_evtchnl_type type)
 {
        struct xenbus_device *xb_dev = front_info->xb_dev;
-       unsigned long page;
-       grant_ref_t gref;
+       void *page;
        irq_handler_t handler;
        int ret;
 
@@ -168,44 +166,25 @@ static int evtchnl_alloc(struct xen_drm_front_info *front_info, int index,
        evtchnl->index = index;
        evtchnl->front_info = front_info;
        evtchnl->state = EVTCHNL_STATE_DISCONNECTED;
-       evtchnl->gref = GRANT_INVALID_REF;
 
-       page = get_zeroed_page(GFP_NOIO | __GFP_HIGH);
-       if (!page) {
-               ret = -ENOMEM;
+       ret = xenbus_setup_ring(xb_dev, GFP_NOIO | __GFP_HIGH, &page,
+                               1, &evtchnl->gref);
+       if (ret)
                goto fail;
-       }
 
        if (type == EVTCHNL_TYPE_REQ) {
                struct xen_displif_sring *sring;
 
                init_completion(&evtchnl->u.req.completion);
                mutex_init(&evtchnl->u.req.req_io_lock);
-               sring = (struct xen_displif_sring *)page;
-               SHARED_RING_INIT(sring);
-               FRONT_RING_INIT(&evtchnl->u.req.ring, sring, XEN_PAGE_SIZE);
-
-               ret = xenbus_grant_ring(xb_dev, sring, 1, &gref);
-               if (ret < 0) {
-                       evtchnl->u.req.ring.sring = NULL;
-                       free_page(page);
-                       goto fail;
-               }
+               sring = page;
+               XEN_FRONT_RING_INIT(&evtchnl->u.req.ring, sring, XEN_PAGE_SIZE);
 
                handler = evtchnl_interrupt_ctrl;
        } else {
-               ret = gnttab_grant_foreign_access(xb_dev->otherend_id,
-                                                 virt_to_gfn((void *)page), 0);
-               if (ret < 0) {
-                       free_page(page);
-                       goto fail;
-               }
-
-               evtchnl->u.evt.page = (struct xendispl_event_page *)page;
-               gref = ret;
+               evtchnl->u.evt.page = page;
                handler = evtchnl_interrupt_evt;
        }
-       evtchnl->gref = gref;
 
        ret = xenbus_alloc_evtchn(xb_dev, &evtchnl->port);
        if (ret < 0)
index a95a7cbc4a59c8e03545543df28bec2265a55a06..70da5931082f6e2b1b597244b768e91e681af38b 100644 (file)
@@ -697,6 +697,14 @@ config HID_MAYFLASH
        Say Y here if you have HJZ Mayflash PS3 game controller adapters
        and want to enable force feedback support.
 
+config HID_MEGAWORLD_FF
+       tristate "Mega World based game controller force feedback support"
+       depends on USB_HID
+       select INPUT_FF_MEMLESS
+       help
+       Say Y here if you have a Mega World based game controller and want
+       to have force feedback support for it.
+
 config HID_REDRAGON
        tristate "Redragon keyboards"
        depends on HID
index 345ac5581bd85537896f2d169f7d5d41cd072b78..cac2cbe26d112bc3ae7f942cacedcd48e0960946 100644 (file)
@@ -77,6 +77,7 @@ obj-$(CONFIG_HID_MAGICMOUSE)  += hid-magicmouse.o
 obj-$(CONFIG_HID_MALTRON)      += hid-maltron.o
 obj-$(CONFIG_HID_MCP2221)      += hid-mcp2221.o
 obj-$(CONFIG_HID_MAYFLASH)     += hid-mf.o
+obj-$(CONFIG_HID_MEGAWORLD_FF) += hid-megaworld.o
 obj-$(CONFIG_HID_MICROSOFT)    += hid-microsoft.o
 obj-$(CONFIG_HID_MONTEREY)     += hid-monterey.o
 obj-$(CONFIG_HID_MULTITOUCH)   += hid-multitouch.o
index c5de0ec4f9d0376ed2a83d760a6681961c77c94e..0f770a2b47ff55c464fa5f7b271d3ef44b3389b9 100644 (file)
@@ -141,6 +141,24 @@ u32 amd_sfh_wait_for_response(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts)
        return sensor_sts;
 }
 
+const char *get_sensor_name(int idx)
+{
+       switch (idx) {
+       case accel_idx:
+               return "accelerometer";
+       case gyro_idx:
+               return "gyroscope";
+       case mag_idx:
+               return "magnetometer";
+       case als_idx:
+               return "ALS";
+       case HPD_IDX:
+               return "HPD";
+       default:
+               return "unknown sensor type";
+       }
+}
+
 int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 {
        struct amd_input_data *in_data = &privdata->in_data;
@@ -219,13 +237,27 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
                                        (privdata, cl_data->sensor_idx[i], SENSOR_DISABLED);
                                if (status != SENSOR_ENABLED)
                                        cl_data->sensor_sts[i] = SENSOR_DISABLED;
-                               dev_dbg(dev, "sid 0x%x status 0x%x\n",
-                                       cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+                               dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n",
+                                       cl_data->sensor_idx[i],
+                                       get_sensor_name(cl_data->sensor_idx[i]),
+                                       cl_data->sensor_sts[i]);
                                goto cleanup;
                        }
                }
-               dev_dbg(dev, "sid 0x%x status 0x%x\n",
-                       cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+               dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n",
+                       cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]),
+                       cl_data->sensor_sts[i]);
+       }
+       if (privdata->mp2_ops->discovery_status &&
+           privdata->mp2_ops->discovery_status(privdata) == 0) {
+               amd_sfh_hid_client_deinit(privdata);
+               for (i = 0; i < cl_data->num_hid_devices; i++) {
+                       devm_kfree(dev, cl_data->feature_report[i]);
+                       devm_kfree(dev, in_data->input_report[i]);
+                       devm_kfree(dev, cl_data->report_descr[i]);
+               }
+               dev_warn(dev, "Failed to discover, sensors not enabled\n");
+               return -EOPNOTSUPP;
        }
        schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
        return 0;
@@ -257,8 +289,9 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
                                        (privdata, cl_data->sensor_idx[i], SENSOR_DISABLED);
                        if (status != SENSOR_ENABLED)
                                cl_data->sensor_sts[i] = SENSOR_DISABLED;
-                       dev_dbg(&privdata->pdev->dev, "stopping sid 0x%x status 0x%x\n",
-                               cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+                       dev_dbg(&privdata->pdev->dev, "stopping sid 0x%x (%s) status 0x%x\n",
+                               cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]),
+                               cl_data->sensor_sts[i]);
                }
        }
 
index 2bf97b6ac9735a54bc47d348140ab1946069cca0..1089134030b0c07d2e99dd676f754c5fd63bae18 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 
 #include "amd_sfh_hid.h"
+#include "amd_sfh_pcie.h"
 
 #define AMD_SFH_RESPONSE_TIMEOUT       1500
 
@@ -120,6 +121,8 @@ static struct hid_ll_driver amdtp_hid_ll_driver = {
 
 int amdtp_hid_probe(u32 cur_hid_dev, struct amdtp_cl_data *cli_data)
 {
+       struct amd_mp2_dev *mp2 = container_of(cli_data->in_data, struct amd_mp2_dev, in_data);
+       struct device *dev = &mp2->pdev->dev;
        struct hid_device *hid;
        struct amdtp_hid_data *hid_data;
        int rc;
@@ -141,10 +144,12 @@ int amdtp_hid_probe(u32 cur_hid_dev, struct amdtp_cl_data *cli_data)
 
        hid->driver_data = hid_data;
        cli_data->hid_sensor_hubs[cur_hid_dev] = hid;
-       hid->bus = BUS_AMD_AMDTP;
+       strscpy(hid->phys, dev->driver ? dev->driver->name : dev_name(dev),
+               sizeof(hid->phys));
+       hid->bus = BUS_AMD_SFH;
        hid->vendor = AMD_SFH_HID_VENDOR;
        hid->product = AMD_SFH_HID_PRODUCT;
-       snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "hid-amdtp",
+       snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "hid-amdsfh",
                 hid->vendor, hid->product);
 
        rc = hid_add_device(hid);
index c60abd38054ca2705a882c4c3d5e83329b540292..ad264db631805dba272edc3ce38d5036ca696998 100644 (file)
@@ -12,7 +12,6 @@
 #define AMDSFH_HID_H
 
 #define MAX_HID_DEVICES                5
-#define BUS_AMD_AMDTP          0x20
 #define AMD_SFH_HID_VENDOR     0x1022
 #define AMD_SFH_HID_PRODUCT    0x0001
 
index 6b5fd90b0bd1b89a937d81b483cb1841ce65f369..dadc491bbf6b241f3b9f96624748fc130d18f89b 100644 (file)
@@ -130,6 +130,12 @@ static int amd_sfh_irq_init_v2(struct amd_mp2_dev *privdata)
        return 0;
 }
 
+static int amd_sfh_dis_sts_v2(struct amd_mp2_dev *privdata)
+{
+       return (readl(privdata->mmio + AMD_P2C_MSG(1)) &
+                     SENSOR_DISCOVERY_STATUS_MASK) >> SENSOR_DISCOVERY_STATUS_SHIFT;
+}
+
 void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info)
 {
        union sfh_cmd_param cmd_param;
@@ -245,6 +251,7 @@ static const struct amd_mp2_ops amd_sfh_ops_v2 = {
        .response = amd_sfh_wait_response_v2,
        .clear_intr = amd_sfh_clear_intr_v2,
        .init_intr = amd_sfh_irq_init_v2,
+       .discovery_status = amd_sfh_dis_sts_v2,
 };
 
 static const struct amd_mp2_ops amd_sfh_ops = {
@@ -346,8 +353,9 @@ static int __maybe_unused amd_mp2_pci_resume(struct device *dev)
                                        (mp2, cl_data->sensor_idx[i], SENSOR_ENABLED);
                        if (status == SENSOR_ENABLED)
                                cl_data->sensor_sts[i] = SENSOR_ENABLED;
-                       dev_dbg(dev, "resume sid 0x%x status 0x%x\n",
-                               cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+                       dev_dbg(dev, "suspend sid 0x%x (%s) status 0x%x\n",
+                               cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]),
+                               cl_data->sensor_sts[i]);
                }
        }
 
@@ -371,8 +379,9 @@ static int __maybe_unused amd_mp2_pci_suspend(struct device *dev)
                                        (mp2, cl_data->sensor_idx[i], SENSOR_DISABLED);
                        if (status != SENSOR_ENABLED)
                                cl_data->sensor_sts[i] = SENSOR_DISABLED;
-                       dev_dbg(dev, "suspend sid 0x%x status 0x%x\n",
-                               cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+                       dev_dbg(dev, "suspend sid 0x%x (%s) status 0x%x\n",
+                               cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]),
+                               cl_data->sensor_sts[i]);
                }
        }
 
index 97b99861fae251a542cb8a13dddd1a86c6de7fc2..8c760526132aaab880e008db07a0b928b3774258 100644 (file)
@@ -39,6 +39,9 @@
 
 #define AMD_SFH_IDLE_LOOP      200
 
+#define SENSOR_DISCOVERY_STATUS_MASK           GENMASK(5, 3)
+#define SENSOR_DISCOVERY_STATUS_SHIFT          3
+
 /* SFH Command register */
 union sfh_cmd_base {
        u32 ul;
@@ -135,6 +138,7 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata);
 u32 amd_sfh_wait_for_response(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts);
 void amd_mp2_suspend(struct amd_mp2_dev *mp2);
 void amd_mp2_resume(struct amd_mp2_dev *mp2);
+const char *get_sensor_name(int idx);
 
 struct amd_mp2_ops {
         void (*start)(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info);
@@ -143,5 +147,6 @@ struct amd_mp2_ops {
         int (*response)(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts);
         void (*clear_intr)(struct amd_mp2_dev *privdata);
         int (*init_intr)(struct amd_mp2_dev *privdata);
+        int (*discovery_status)(struct amd_mp2_dev *privdata);
 };
 #endif
index 8d97ca0f9b52646cad8201b61f370f6197c3759b..697f2791ea9cbb989a9ce63ff1a1c4dc0c9a438a 100644 (file)
@@ -179,7 +179,7 @@ static const u8 accel3_report_descriptor[] = {
 0xC0                   /* HID end collection */
 };
 
-const u8 gyro3_report_descriptor[] = {
+static const u8 gyro3_report_descriptor[] = {
 0x05, 0x20,            /* Usage page */
 0x09, 0x76,            /* Motion type Gyro3D */
 0xA1, 0x00,            /* HID Collection (Physical) */
@@ -340,7 +340,7 @@ const u8 gyro3_report_descriptor[] = {
 0xC0,                  /* HID end collection */
 };
 
-const u8 comp3_report_descriptor[] = {
+static const u8 comp3_report_descriptor[] = {
 0x05, 0x20,            /* Usage page */
 0x09, 0x83,            /* Motion type Orientation compass 3D */
 0xA1, 0x00,            /* HID Collection (Physical) */
@@ -512,7 +512,7 @@ const u8 comp3_report_descriptor[] = {
 0xC0                           /* HID end collection */
 };
 
-const u8 als_report_descriptor[] = {
+static const u8 als_report_descriptor[] = {
 0x05, 0x20,    /* HID usage page sensor */
 0x09, 0x41,    /* HID usage sensor type Ambientlight  */
 0xA1, 0x00,    /* HID Collection (Physical) */
index 0cf35caee9fa048480146c0cdcad0eac7c466280..42a568902f4923a0b3ca8856138d131b463e5788 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <linux/string.h>
 
 #include "hid-ids.h"
 
 #define APPLE_NUMLOCK_EMULATION        BIT(8)
 #define APPLE_RDESC_BATTERY    BIT(9)
 #define APPLE_BACKLIGHT_CTL    BIT(10)
+#define APPLE_IS_KEYCHRON      BIT(11)
 
 #define APPLE_FLAG_FKEY                0x01
 
 #define HID_COUNTRY_INTERNATIONAL_ISO  13
 #define APPLE_BATTERY_TIMEOUT_MS       60000
 
-static unsigned int fnmode = 1;
+static unsigned int fnmode = 3;
 module_param(fnmode, uint, 0644);
 MODULE_PARM_DESC(fnmode, "Mode of fn key on Apple keyboards (0 = disabled, "
-               "[1] = fkeyslast, 2 = fkeysfirst)");
+               "1 = fkeyslast, 2 = fkeysfirst, [3] = auto)");
 
 static int iso_layout = -1;
 module_param(iso_layout, int, 0644);
@@ -349,6 +351,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
        const struct apple_key_translation *trans, *table;
        bool do_translate;
        u16 code = 0;
+       unsigned int real_fnmode;
 
        u16 fn_keycode = (swap_fn_leftctrl) ? (KEY_LEFTCTRL) : (KEY_FN);
 
@@ -359,7 +362,13 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
                return 1;
        }
 
-       if (fnmode) {
+       if (fnmode == 3) {
+               real_fnmode = (asc->quirks & APPLE_IS_KEYCHRON) ? 2 : 1;
+       } else {
+               real_fnmode = fnmode;
+       }
+
+       if (real_fnmode) {
                if (hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS ||
@@ -406,7 +415,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 
                        if (!code) {
                                if (trans->flags & APPLE_FLAG_FKEY) {
-                                       switch (fnmode) {
+                                       switch (real_fnmode) {
                                        case 1:
                                                do_translate = !asc->fn_on;
                                                break;
@@ -660,6 +669,11 @@ static int apple_input_configured(struct hid_device *hdev,
                asc->quirks &= ~APPLE_HAS_FN;
        }
 
+       if (strncmp(hdev->name, "Keychron", 8) == 0) {
+               hid_info(hdev, "Keychron keyboard detected; function keys will default to fnmode=2 behavior\n");
+               asc->quirks |= APPLE_IS_KEYCHRON;
+       }
+
        return 0;
 }
 
index 74ad8bf98bfd5acea3d24ecff58300bdab434a26..e8c5e3ac9fff1596b4962697cbf42be116c9d600 100644 (file)
@@ -347,6 +347,12 @@ static int bigben_probe(struct hid_device *hid,
        bigben->report = list_entry(report_list->next,
                struct hid_report, list);
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               error = -ENODEV;
+               goto error_hw_stop;
+       }
+
        hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
        set_bit(FF_RUMBLE, hidinput->input->ffbit);
 
index db925794fbe6ef47447088e15fa36c21dea39dea..00154a1cd2d83a1be2c8f48dc7f3ca7cc8fdd9e4 100644 (file)
@@ -2222,6 +2222,10 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
        case BUS_VIRTUAL:
                bus = "VIRTUAL";
                break;
+       case BUS_INTEL_ISHTP:
+       case BUS_AMD_SFH:
+               bus = "SENSOR HUB";
+               break;
        default:
                bus = "<UNKNOWN>";
        }
index 3091355d48df64a28ae1bc704eae4185274975f5..8e4a5528e25dfe826224bb99f410f3944fd49353 100644 (file)
@@ -188,7 +188,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi)
        ret = input_mt_init_slots(input, ELAN_MAX_FINGERS, INPUT_MT_POINTER);
        if (ret) {
                hid_err(hdev, "Failed to init elan MT slots: %d\n", ret);
-               input_free_device(input);
                return ret;
        }
 
@@ -200,7 +199,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi)
                hid_err(hdev, "Failed to register elan input device: %d\n",
                        ret);
                input_mt_destroy_slots(input);
-               input_free_device(input);
                return ret;
        }
 
index 053853a891c50b07b55baae421275d382c332cfc..d9eb676abe9600b7a6c96b9cd794cb8383c058c7 100644 (file)
 #define USB_VENDOR_ID_LENOVO           0x17ef
 #define USB_DEVICE_ID_LENOVO_TPKBD     0x6009
 #define USB_DEVICE_ID_LENOVO_CUSBKBD   0x6047
+#define USB_DEVICE_ID_LENOVO_TPIIUSBKBD        0x60ee
 #define USB_DEVICE_ID_LENOVO_CBTKBD    0x6048
+#define USB_DEVICE_ID_LENOVO_TPIIBTKBD 0x60e1
 #define USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL       0x6049
 #define USB_DEVICE_ID_LENOVO_TP10UBKBD 0x6062
 #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067
 #define USB_DEVICE_ID_LENOVO_X1_COVER  0x6085
 #define USB_DEVICE_ID_LENOVO_X1_TAB    0x60a3
 #define USB_DEVICE_ID_LENOVO_X1_TAB3   0x60b5
+#define USB_DEVICE_ID_LENOVO_X12_TAB   0x60fe
 #define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E    0x600e
 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D     0x608d
 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019     0x6019
 #define USB_VENDOR_ID_MCS              0x16d0
 #define USB_DEVICE_ID_MCS_GAMEPADBLOCK 0x0bcc
 
+#define USB_VENDOR_MEGAWORLD           0x07b5
+#define USB_DEVICE_ID_MEGAWORLD_GAMEPAD        0x0312
+
 #define USB_VENDOR_ID_MGE              0x0463
 #define USB_DEVICE_ID_MGE_UPS          0xffff
 #define USB_DEVICE_ID_MGE_UPS1         0x0001
 #define USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540   0x0075
 #define USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640   0x0094
 #define USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01 0x0042
+#define USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06 0x0078
 #define USB_DEVICE_ID_UGEE_TABLET_G5           0x0074
 #define USB_DEVICE_ID_UGEE_TABLET_EX07S                0x0071
 #define USB_DEVICE_ID_UGEE_TABLET_RAINBOW_CV720        0x0055
index f46616390a984453327ce817195684d69f315886..da903138eee4901c9e94a1f4061bc960365f78ef 100644 (file)
@@ -33,7 +33,7 @@ static __u8 easypen_i405x_rdesc_fixed[] = {
        0xB1, 0x02,       /*    Feature (Variable),           */
        0xC0,             /*  End Collection,                 */
        0x05, 0x0D,       /*  Usage Page (Digitizer),         */
-       0x09, 0x02,       /*  Usage (Pen),                    */
+       0x09, 0x01,       /*  Usage (Digitizer),              */
        0xA1, 0x01,       /*  Collection (Application),       */
        0x85, 0x10,       /*    Report ID (16),               */
        0x09, 0x20,       /*    Usage (Stylus),               */
@@ -91,7 +91,7 @@ static __u8 mousepen_i608x_rdesc_fixed[] = {
        0xB1, 0x02,       /*    Feature (Variable),           */
        0xC0,             /*  End Collection,                 */
        0x05, 0x0D,       /*  Usage Page (Digitizer),         */
-       0x09, 0x02,       /*  Usage (Pen),                    */
+       0x09, 0x01,       /*  Usage (Digitizer),              */
        0xA1, 0x01,       /*  Collection (Application),       */
        0x85, 0x10,       /*    Report ID (16),               */
        0x09, 0x20,       /*    Usage (Stylus),               */
@@ -190,7 +190,7 @@ static __u8 mousepen_i608x_v2_rdesc_fixed[] = {
        0xB1, 0x02,                   /*    Feature (Variable),           */
        0xC0,                         /*  End Collection,                 */
        0x05, 0x0D,                   /*  Usage Page (Digitizer),         */
-       0x09, 0x02,                   /*  Usage (Pen),                    */
+       0x09, 0x01,                   /*  Usage (Digitizer),              */
        0xA1, 0x01,                   /*  Collection (Application),       */
        0x85, 0x10,                   /*    Report ID (16),               */
        0x09, 0x20,                   /*    Usage (Stylus),               */
@@ -289,7 +289,7 @@ static __u8 easypen_m610x_rdesc_fixed[] = {
        0xB1, 0x02,                   /*    Feature (Variable),           */
        0xC0,                         /*  End Collection,                 */
        0x05, 0x0D,                   /*  Usage Page (Digitizer),         */
-       0x09, 0x02,                   /*  Usage (Pen),                    */
+       0x09, 0x01,                   /*  Usage (Digitizer),              */
        0xA1, 0x01,                   /*  Collection (Application),       */
        0x85, 0x10,                   /*    Report ID (16),               */
        0x09, 0x20,                   /*    Usage (Stylus),               */
@@ -368,7 +368,7 @@ static __u8 pensketch_m912_rdesc_fixed[] = {
        0xB1, 0x02,                   /*    Feature (Variable),           */
        0xC0,                         /*  End Collection,                 */
        0x05, 0x0D,                   /*  Usage Page (Digitizer),         */
-       0x09, 0x02,                   /*  Usage (Pen),                    */
+       0x09, 0x01,                   /*  Usage (Digitizer),              */
        0xA1, 0x01,                   /*  Collection (Application),       */
        0x85, 0x10,                   /*    Report ID (16),               */
        0x09, 0x20,                   /*    Usage (Stylus),               */
@@ -497,7 +497,7 @@ static __u8 easypen_m406xe_rdesc_fixed[] = {
        0xB1, 0x02,         /*      Feature (Variable),             */
        0xC0,               /*  End Collection,                     */
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x10,         /*      Report ID (16),                 */
        0x09, 0x20,         /*      Usage (Stylus),                 */
index c2c66ceca13276a85733a67b35c62033f2ee016e..7d82f8d426bbcb75c3f7ae3a2c8b95f8a439ae10 100644 (file)
@@ -366,7 +366,7 @@ static const struct hidled_config hidled_configs[] = {
                .type = DREAM_CHEEKY,
                .name = "Dream Cheeky Webmail Notifier",
                .short_name = "dream_cheeky",
-               .max_brightness = 31,
+               .max_brightness = 63,
                .num_leds = 1,
                .report_size = 9,
                .report_type = RAW_REQUEST,
index 93b1f935e526e84d80826c14352cdc1b02fb1829..9dabd63232343eaca67cfa0961faf92612f33297 100644 (file)
@@ -4,6 +4,7 @@
  *  - ThinkPad USB Keyboard with TrackPoint (tpkbd)
  *  - ThinkPad Compact Bluetooth Keyboard with TrackPoint (cptkbd)
  *  - ThinkPad Compact USB Keyboard with TrackPoint (cptkbd)
+ *  - ThinkPad TrackPoint Keyboard II USB/Bluetooth (cptkbd/tpIIkbd)
  *
  *  Copyright (c) 2012 Bernhard Seibold
  *  Copyright (c) 2014 Jamie Lentin <jm@lentin.co.uk>
@@ -110,6 +111,23 @@ static const __u8 lenovo_pro_dock_need_fixup_collection[] = {
        0x2a, 0xff, 0xff,       /*  Usage Maximum (65535)               */
 };
 
+/* Broken ThinkPad TrackPoint II collection (Bluetooth mode) */
+static const __u8 lenovo_tpIIbtkbd_need_fixup_collection[] = {
+       0x06, 0x00, 0xFF,       /* Usage Page (Vendor Defined 0xFF00) */
+       0x09, 0x01,             /* Usage (0x01) */
+       0xA1, 0x01,             /* Collection (Application) */
+       0x85, 0x05,             /*   Report ID (5) */
+       0x1A, 0xF1, 0x00,       /*   Usage Minimum (0xF1) */
+       0x2A, 0xFC, 0x00,       /*   Usage Maximum (0xFC) */
+       0x15, 0x00,             /*   Logical Minimum (0) */
+       0x25, 0x01,             /*   Logical Maximum (1) */
+       0x75, 0x01,             /*   Report Size (1) */
+       0x95, 0x0D,             /*   Report Count (13) */
+       0x81, 0x02,             /*   Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) */
+       0x95, 0x03,             /*   Report Count (3) */
+       0x81, 0x01,             /*   Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) */
+};
+
 static __u8 *lenovo_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
 {
@@ -126,6 +144,19 @@ static __u8 *lenovo_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                        rdesc[152] = 0x00;
                }
                break;
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
+               if (*rsize >= 263 &&
+                   memcmp(&rdesc[234], lenovo_tpIIbtkbd_need_fixup_collection,
+                         sizeof(lenovo_tpIIbtkbd_need_fixup_collection)) == 0) {
+                       rdesc[244] = 0x00; /* usage minimum = 0x00 */
+                       rdesc[247] = 0xff; /* usage maximum = 0xff */
+                       rdesc[252] = 0xff; /* logical maximum = 0xff */
+                       rdesc[254] = 0x08; /* report size = 0x08 */
+                       rdesc[256] = 0x01; /* report count = 0x01 */
+                       rdesc[258] = 0x00; /* input = 0x00 */
+                       rdesc[260] = 0x01; /* report count (2) = 0x01 */
+               }
+               break;
        }
        return rdesc;
 }
@@ -217,6 +248,101 @@ static int lenovo_input_mapping_cptkbd(struct hid_device *hdev,
        return 0;
 }
 
+static int lenovo_input_mapping_tpIIkbd(struct hid_device *hdev,
+               struct hid_input *hi, struct hid_field *field,
+               struct hid_usage *usage, unsigned long **bit, int *max)
+{
+       /*
+        * 0xff0a0000 = USB, HID_UP_MSVENDOR = BT.
+        *
+        * In BT mode, there are two HID_UP_MSVENDOR pages.
+        * Use only the page that contains report ID == 5.
+        */
+       if (((usage->hid & HID_USAGE_PAGE) == 0xff0a0000 ||
+           (usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR) &&
+           field->report->id == 5) {
+               switch (usage->hid & HID_USAGE) {
+               case 0x00bb: /* Fn-F4: Mic mute */
+                       map_key_clear(LENOVO_KEY_MICMUTE);
+                       return 1;
+               case 0x00c3: /* Fn-F5: Brightness down */
+                       map_key_clear(KEY_BRIGHTNESSDOWN);
+                       return 1;
+               case 0x00c4: /* Fn-F6: Brightness up */
+                       map_key_clear(KEY_BRIGHTNESSUP);
+                       return 1;
+               case 0x00c1: /* Fn-F8: Notification center */
+                       map_key_clear(KEY_NOTIFICATION_CENTER);
+                       return 1;
+               case 0x00bc: /* Fn-F9: Control panel */
+                       map_key_clear(KEY_CONFIG);
+                       return 1;
+               case 0x00b6: /* Fn-F10: Bluetooth */
+                       map_key_clear(KEY_BLUETOOTH);
+                       return 1;
+               case 0x00b7: /* Fn-F11: Keyboard config */
+                       map_key_clear(KEY_KEYBOARD);
+                       return 1;
+               case 0x00b8: /* Fn-F12: User function */
+                       map_key_clear(KEY_PROG1);
+                       return 1;
+               case 0x00b9: /* Fn-PrtSc: Snipping tool */
+                       map_key_clear(KEY_SELECTIVE_SCREENSHOT);
+                       return 1;
+               case 0x00b5: /* Fn-Esc: Fn-lock toggle */
+                       map_key_clear(KEY_FN_ESC);
+                       return 1;
+               }
+       }
+
+       if ((usage->hid & HID_USAGE_PAGE) == 0xffa00000) {
+               switch (usage->hid & HID_USAGE) {
+               case 0x00fb: /* Middle mouse (in native USB mode) */
+                       map_key_clear(BTN_MIDDLE);
+                       return 1;
+               }
+       }
+
+       if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR &&
+           field->report->id == 21) {
+               switch (usage->hid & HID_USAGE) {
+               case 0x0004: /* Middle mouse (in native Bluetooth mode) */
+                       map_key_clear(BTN_MIDDLE);
+                       return 1;
+               }
+       }
+
+       /* Compatibility middle/wheel mappings should be ignored */
+       if (usage->hid == HID_GD_WHEEL)
+               return -1;
+       if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON &&
+                       (usage->hid & HID_USAGE) == 0x003)
+               return -1;
+       if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER &&
+                       (usage->hid & HID_USAGE) == 0x238)
+               return -1;
+
+       /* Map wheel emulation reports: 0xff10 */
+       if ((usage->hid & HID_USAGE_PAGE) == 0xff100000) {
+               field->flags |= HID_MAIN_ITEM_RELATIVE | HID_MAIN_ITEM_VARIABLE;
+               field->logical_minimum = -127;
+               field->logical_maximum = 127;
+
+               switch (usage->hid & HID_USAGE) {
+               case 0x0000:
+                       hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
+                       return 1;
+               case 0x0001:
+                       hid_map_usage(hi, usage, bit, max, EV_REL, REL_WHEEL);
+                       return 1;
+               default:
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
 static int lenovo_input_mapping_scrollpoint(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
@@ -326,6 +452,10 @@ static int lenovo_input_mapping(struct hid_device *hdev,
        case USB_DEVICE_ID_LENOVO_CBTKBD:
                return lenovo_input_mapping_cptkbd(hdev, hi, field,
                                                        usage, bit, max);
+       case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
+               return lenovo_input_mapping_tpIIkbd(hdev, hi, field,
+                                                       usage, bit, max);
        case USB_DEVICE_ID_IBM_SCROLLPOINT_III:
        case USB_DEVICE_ID_IBM_SCROLLPOINT_PRO:
        case USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL:
@@ -357,16 +487,23 @@ static int lenovo_send_cmd_cptkbd(struct hid_device *hdev,
        if (!buf)
                return -ENOMEM;
 
+       /*
+        * Feature report 0x13 is used for USB,
+        * output report 0x18 is used for Bluetooth.
+        * buf[0] is ignored by hid_hw_raw_request.
+        */
        buf[0] = 0x18;
        buf[1] = byte2;
        buf[2] = byte3;
 
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
                ret = hid_hw_raw_request(hdev, 0x13, buf, 3,
                                        HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
                break;
        case USB_DEVICE_ID_LENOVO_CBTKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                ret = hid_hw_output_report(hdev, buf, 3);
                break;
        default:
@@ -422,6 +559,8 @@ static ssize_t attr_fn_lock_store(struct device *dev,
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                lenovo_features_set_cptkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
@@ -556,6 +695,15 @@ static int lenovo_event_cptkbd(struct hid_device *hdev,
                return 1;
        }
 
+       if (usage->type == EV_KEY && usage->code == KEY_FN_ESC && value == 1) {
+               /*
+                * The user has toggled the Fn-lock state. Toggle our own
+                * cached value of it and sync our value to the keyboard to
+                * ensure things are in sync (the syncing should be a no-op).
+                */
+               cptkbd_data->fn_lock = !cptkbd_data->fn_lock;
+       }
+
        return 0;
 }
 
@@ -568,6 +716,8 @@ static int lenovo_event(struct hid_device *hdev, struct hid_field *field,
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                return lenovo_event_cptkbd(hdev, field, usage, value);
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
        case USB_DEVICE_ID_LENOVO_X1_TAB:
@@ -960,8 +1110,9 @@ static int lenovo_probe_cptkbd(struct hid_device *hdev)
        struct lenovo_drvdata *cptkbd_data;
 
        /* All the custom action happens on the USBMOUSE device for USB */
-       if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD
-                       && hdev->type != HID_TYPE_USBMOUSE) {
+       if (((hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD) ||
+           (hdev->product == USB_DEVICE_ID_LENOVO_TPIIUSBKBD)) &&
+           hdev->type != HID_TYPE_USBMOUSE) {
                hid_dbg(hdev, "Ignoring keyboard half of device\n");
                return 0;
        }
@@ -977,11 +1128,14 @@ static int lenovo_probe_cptkbd(struct hid_device *hdev)
 
        /*
         * Tell the keyboard a driver understands it, and turn F7, F9, F11 into
-        * regular keys
+        * regular keys (Compact only)
         */
-       ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03);
-       if (ret)
-               hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret);
+       if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD ||
+           hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) {
+               ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03);
+               if (ret)
+                       hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret);
+       }
 
        /* Switch middle button to native mode */
        ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01);
@@ -1088,6 +1242,8 @@ static int lenovo_probe(struct hid_device *hdev,
                break;
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                ret = lenovo_probe_cptkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
@@ -1154,6 +1310,8 @@ static void lenovo_remove(struct hid_device *hdev)
                break;
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
+       case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                lenovo_remove_cptkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
@@ -1172,6 +1330,8 @@ static int lenovo_input_configured(struct hid_device *hdev,
                case USB_DEVICE_ID_LENOVO_TPKBD:
                case USB_DEVICE_ID_LENOVO_CUSBKBD:
                case USB_DEVICE_ID_LENOVO_CBTKBD:
+               case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
+               case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                        if (test_bit(EV_REL, hi->input->evbit)) {
                                /* set only for trackpoint device */
                                __set_bit(INPUT_PROP_POINTER, hi->input->propbit);
@@ -1188,7 +1348,9 @@ static int lenovo_input_configured(struct hid_device *hdev,
 static const struct hid_device_id lenovo_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPIIUSBKBD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPIIBTKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_III) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_PRO) },
diff --git a/drivers/hid/hid-megaworld.c b/drivers/hid/hid-megaworld.c
new file mode 100644 (file)
index 0000000..5996578
--- /dev/null
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Vibration support for Mega World controllers
+ *
+ * Copyright 2022 Frank Zago
+ *
+ * Derived from hid-zpff.c:
+ *   Copyright (c) 2005, 2006 Anssi Hannula <anssi.hannula@gmail.com>
+ */
+
+#include <linux/hid.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "hid-ids.h"
+
+struct mwctrl_device {
+       struct hid_report *report;
+       s32 *weak;
+       s32 *strong;
+};
+
+static int mwctrl_play(struct input_dev *dev, void *data,
+                      struct ff_effect *effect)
+{
+       struct hid_device *hid = input_get_drvdata(dev);
+       struct mwctrl_device *mwctrl = data;
+
+       *mwctrl->strong = effect->u.rumble.strong_magnitude >> 8;
+       *mwctrl->weak = effect->u.rumble.weak_magnitude >> 8;
+
+       hid_hw_request(hid, mwctrl->report, HID_REQ_SET_REPORT);
+
+       return 0;
+}
+
+static int mwctrl_init(struct hid_device *hid)
+{
+       struct mwctrl_device *mwctrl;
+       struct hid_report *report;
+       struct hid_input *hidinput;
+       struct input_dev *dev;
+       int error;
+       int i;
+
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+       hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+       dev = hidinput->input;
+
+       for (i = 0; i < 4; i++) {
+               report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1);
+               if (!report)
+                       return -ENODEV;
+       }
+
+       mwctrl = kzalloc(sizeof(struct mwctrl_device), GFP_KERNEL);
+       if (!mwctrl)
+               return -ENOMEM;
+
+       set_bit(FF_RUMBLE, dev->ffbit);
+
+       error = input_ff_create_memless(dev, mwctrl, mwctrl_play);
+       if (error) {
+               kfree(mwctrl);
+               return error;
+       }
+
+       mwctrl->report = report;
+
+       /* Field 0 is always 2, and field 1 is always 0. The original
+        * windows driver has a 5 bytes command, where the 5th byte is
+        * a repeat of the 3rd byte, however the device has only 4
+        * fields. It could be a bug in the driver, or there is a
+        * different device that needs it.
+        */
+       report->field[0]->value[0] = 0x02;
+
+       mwctrl->strong = &report->field[2]->value[0];
+       mwctrl->weak = &report->field[3]->value[0];
+
+       return 0;
+}
+
+static int mwctrl_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+       int ret;
+
+       ret = hid_parse(hdev);
+       if (ret) {
+               hid_err(hdev, "parse failed\n");
+               return ret;
+       }
+
+       ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
+       if (ret) {
+               hid_err(hdev, "hw start failed\n");
+               return ret;
+       }
+
+       ret = mwctrl_init(hdev);
+       if (ret)
+               hid_hw_stop(hdev);
+
+       return ret;
+}
+
+static const struct hid_device_id mwctrl_devices[] = {
+       { HID_USB_DEVICE(USB_VENDOR_MEGAWORLD,
+                        USB_DEVICE_ID_MEGAWORLD_GAMEPAD) },
+       { }
+};
+MODULE_DEVICE_TABLE(hid, mwctrl_devices);
+
+static struct hid_driver mwctrl_driver = {
+       .name = "megaworld",
+       .id_table = mwctrl_devices,
+       .probe = mwctrl_probe,
+};
+module_hid_driver(mwctrl_driver);
+
+MODULE_LICENSE("GPL");
index 99eabfb4145b50b882564f41eb339cca9eb0c926..6bb3890b0f2c9dd320173a135058d640f8792aa8 100644 (file)
@@ -2034,6 +2034,12 @@ static const struct hid_device_id mt_devices[] = {
                           USB_VENDOR_ID_LENOVO,
                           USB_DEVICE_ID_LENOVO_X1_TAB3) },
 
+       /* Lenovo X12 TAB Gen 1 */
+       { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
+               HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
+                          USB_VENDOR_ID_LENOVO,
+                          USB_DEVICE_ID_LENOVO_X12_TAB) },
+
        /* MosArt panels */
        { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
                MT_USB_DEVICE(USB_VENDOR_ID_ASUS,
@@ -2178,6 +2184,9 @@ static const struct hid_device_id mt_devices[] = {
        { .driver_data = MT_CLS_GOOGLE,
                HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
                        USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) },
+       { .driver_data = MT_CLS_GOOGLE,
+               HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE,
+                       USB_DEVICE_ID_GOOGLE_WHISKERS) },
 
        /* Generic MT device */
        { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) },
index 05147f2d75645f745c47b35786205b4d4ef79e98..c0fe66e50c58d912aabe10d6a462276a19b9656c 100644 (file)
@@ -81,6 +81,24 @@ static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc,
        return rdesc;
 }
 
+static int uclogic_input_mapping(struct hid_device *hdev,
+                                struct hid_input *hi,
+                                struct hid_field *field,
+                                struct hid_usage *usage,
+                                unsigned long **bit,
+                                int *max)
+{
+       struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
+       struct uclogic_params *params = &drvdata->params;
+
+       /* Discard invalid pen usages */
+       if (params->pen.usage_invalid && (field->application == HID_DG_PEN))
+               return -1;
+
+       /* Let hid-core decide what to do */
+       return 0;
+}
+
 static int uclogic_input_configured(struct hid_device *hdev,
                struct hid_input *hi)
 {
@@ -90,6 +108,8 @@ static int uclogic_input_configured(struct hid_device *hdev,
        const char *suffix = NULL;
        struct hid_field *field;
        size_t len;
+       size_t i;
+       const struct uclogic_params_frame *frame;
 
        /* no report associated (HID_QUIRK_MULTI_INPUT not set) */
        if (!hi->report)
@@ -104,27 +124,44 @@ static int uclogic_input_configured(struct hid_device *hdev,
                drvdata->pen_input = hi->input;
        }
 
-       field = hi->report->field[0];
+       /* If it's one of the frame devices */
+       for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) {
+               frame = &params->frame_list[i];
+               if (hi->report->id == frame->id) {
+                       /* Assign custom suffix, if any */
+                       suffix = frame->suffix;
+                       /*
+                        * Disable EV_MSC reports for touch ring interfaces to
+                        * make the Wacom driver pickup touch ring extents
+                        */
+                       if (frame->touch_byte > 0)
+                               __clear_bit(EV_MSC, hi->input->evbit);
+               }
+       }
 
-       switch (field->application) {
-       case HID_GD_KEYBOARD:
-               suffix = "Keyboard";
-               break;
-       case HID_GD_MOUSE:
-               suffix = "Mouse";
-               break;
-       case HID_GD_KEYPAD:
-               suffix = "Pad";
-               break;
-       case HID_DG_PEN:
-               suffix = "Pen";
-               break;
-       case HID_CP_CONSUMER_CONTROL:
-               suffix = "Consumer Control";
-               break;
-       case HID_GD_SYSTEM_CONTROL:
-               suffix = "System Control";
-               break;
+       if (!suffix) {
+               field = hi->report->field[0];
+
+               switch (field->application) {
+               case HID_GD_KEYBOARD:
+                       suffix = "Keyboard";
+                       break;
+               case HID_GD_MOUSE:
+                       suffix = "Mouse";
+                       break;
+               case HID_GD_KEYPAD:
+                       suffix = "Pad";
+                       break;
+               case HID_DG_PEN:
+                       suffix = "Pen";
+                       break;
+               case HID_CP_CONSUMER_CONTROL:
+                       suffix = "Consumer Control";
+                       break;
+               case HID_GD_SYSTEM_CONTROL:
+                       suffix = "System Control";
+                       break;
+               }
        }
 
        if (suffix) {
@@ -172,8 +209,8 @@ static int uclogic_probe(struct hid_device *hdev,
                goto failure;
        }
        params_initialized = true;
-       hid_dbg(hdev, "parameters:\n" UCLOGIC_PARAMS_FMT_STR,
-               UCLOGIC_PARAMS_FMT_ARGS(&drvdata->params));
+       hid_dbg(hdev, "parameters:\n");
+       uclogic_params_hid_dbg(hdev, &drvdata->params);
        if (drvdata->params.invalid) {
                hid_info(hdev, "interface is invalid, ignoring\n");
                rc = -ENODEV;
@@ -313,8 +350,15 @@ static int uclogic_raw_event_frame(
 
        /* If need to, and can, set pad device ID for Wacom drivers */
        if (frame->dev_id_byte > 0 && frame->dev_id_byte < size) {
-               data[frame->dev_id_byte] = 0xf;
+               /* If we also have a touch ring and the finger left it */
+               if (frame->touch_byte > 0 && frame->touch_byte < size &&
+                   data[frame->touch_byte] == 0) {
+                       data[frame->dev_id_byte] = 0;
+               } else {
+                       data[frame->dev_id_byte] = 0xf;
+               }
        }
+
        /* If need to, and can, read rotary encoder state change */
        if (frame->re_lsb > 0 && frame->re_lsb / 8 < size) {
                unsigned int byte = frame->re_lsb / 8;
@@ -341,6 +385,26 @@ static int uclogic_raw_event_frame(
                drvdata->re_state = state;
        }
 
+       /* If need to, and can, transform the touch ring reports */
+       if (frame->touch_byte > 0 && frame->touch_byte < size) {
+               __s8 value = data[frame->touch_byte];
+
+               if (value != 0) {
+                       if (frame->touch_flip_at != 0) {
+                               value = frame->touch_flip_at - value;
+                               if (value <= 0)
+                                       value = frame->touch_max + value;
+                       }
+                       data[frame->touch_byte] = value - 1;
+               }
+       }
+
+       /* If need to, and can, transform the bitmap dial reports */
+       if (frame->bitmap_dial_byte > 0 && frame->bitmap_dial_byte < size) {
+               if (data[frame->bitmap_dial_byte] == 2)
+                       data[frame->bitmap_dial_byte] = -1;
+       }
+
        return 0;
 }
 
@@ -457,6 +521,8 @@ static const struct hid_device_id uclogic_devices[] = {
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
+                               USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, uclogic_devices);
@@ -468,6 +534,7 @@ static struct hid_driver uclogic_driver = {
        .remove = uclogic_remove,
        .report_fixup = uclogic_report_fixup,
        .raw_event = uclogic_raw_event,
+       .input_mapping = uclogic_input_mapping,
        .input_configured = uclogic_input_configured,
 #ifdef CONFIG_PM
        .resume           = uclogic_resume,
index 5f50ceb875d6d5ef75d88ec427ed2032910d163a..db838f16282d6488e40b57d18f5b0b4d29b79990 100644 (file)
@@ -29,8 +29,8 @@
  * Returns:
  *     The string representing the type, or NULL if the type is unknown.
  */
-const char *uclogic_params_pen_inrange_to_str(
-                       enum uclogic_params_pen_inrange inrange)
+static const char *uclogic_params_pen_inrange_to_str(
+                               enum uclogic_params_pen_inrange inrange)
 {
        switch (inrange) {
        case UCLOGIC_PARAMS_PEN_INRANGE_NORMAL:
@@ -44,6 +44,91 @@ const char *uclogic_params_pen_inrange_to_str(
        }
 }
 
+/**
+ * Dump tablet interface pen parameters with hid_dbg(), indented with one tab.
+ *
+ * @hdev:      The HID device the pen parameters describe.
+ * @pen:       The pen parameters to dump.
+ */
+static void uclogic_params_pen_hid_dbg(const struct hid_device *hdev,
+                                       const struct uclogic_params_pen *pen)
+{
+       size_t i;
+
+       hid_dbg(hdev, "\t.usage_invalid = %s\n",
+               (pen->usage_invalid ? "true" : "false"));
+       hid_dbg(hdev, "\t.desc_ptr = %p\n", pen->desc_ptr);
+       hid_dbg(hdev, "\t.desc_size = %u\n", pen->desc_size);
+       hid_dbg(hdev, "\t.id = %u\n", pen->id);
+       hid_dbg(hdev, "\t.subreport_list = {\n");
+       for (i = 0; i < ARRAY_SIZE(pen->subreport_list); i++) {
+               hid_dbg(hdev, "\t\t{0x%02hhx, %hhu}%s\n",
+                       pen->subreport_list[i].value,
+                       pen->subreport_list[i].id,
+                       i < (ARRAY_SIZE(pen->subreport_list) - 1) ? "," : "");
+       }
+       hid_dbg(hdev, "\t}\n");
+       hid_dbg(hdev, "\t.inrange = %s\n",
+               uclogic_params_pen_inrange_to_str(pen->inrange));
+       hid_dbg(hdev, "\t.fragmented_hires = %s\n",
+               (pen->fragmented_hires ? "true" : "false"));
+       hid_dbg(hdev, "\t.tilt_y_flipped = %s\n",
+               (pen->tilt_y_flipped ? "true" : "false"));
+}
+
+/**
+ * Dump tablet interface frame parameters with hid_dbg(), indented with two
+ * tabs.
+ *
+ * @hdev:      The HID device the pen parameters describe.
+ * @frame:     The frame parameters to dump.
+ */
+static void uclogic_params_frame_hid_dbg(
+                               const struct hid_device *hdev,
+                               const struct uclogic_params_frame *frame)
+{
+       hid_dbg(hdev, "\t\t.desc_ptr = %p\n", frame->desc_ptr);
+       hid_dbg(hdev, "\t\t.desc_size = %u\n", frame->desc_size);
+       hid_dbg(hdev, "\t\t.id = %u\n", frame->id);
+       hid_dbg(hdev, "\t\t.suffix = %s\n", frame->suffix);
+       hid_dbg(hdev, "\t\t.re_lsb = %u\n", frame->re_lsb);
+       hid_dbg(hdev, "\t\t.dev_id_byte = %u\n", frame->dev_id_byte);
+       hid_dbg(hdev, "\t\t.touch_byte = %u\n", frame->touch_byte);
+       hid_dbg(hdev, "\t\t.touch_max = %hhd\n", frame->touch_max);
+       hid_dbg(hdev, "\t\t.touch_flip_at = %hhd\n",
+               frame->touch_flip_at);
+       hid_dbg(hdev, "\t\t.bitmap_dial_byte = %u\n",
+               frame->bitmap_dial_byte);
+}
+
+/**
+ * Dump tablet interface parameters with hid_dbg().
+ *
+ * @hdev:      The HID device the parameters describe.
+ * @params:    The parameters to dump.
+ */
+void uclogic_params_hid_dbg(const struct hid_device *hdev,
+                               const struct uclogic_params *params)
+{
+       size_t i;
+
+       hid_dbg(hdev, ".invalid = %s\n",
+               params->invalid ? "true" : "false");
+       hid_dbg(hdev, ".desc_ptr = %p\n", params->desc_ptr);
+       hid_dbg(hdev, ".desc_size = %u\n", params->desc_size);
+       hid_dbg(hdev, ".pen = {\n");
+       uclogic_params_pen_hid_dbg(hdev, &params->pen);
+       hid_dbg(hdev, "\t}\n");
+       hid_dbg(hdev, ".frame_list = {\n");
+       for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) {
+               hid_dbg(hdev, "\t{\n");
+               uclogic_params_frame_hid_dbg(hdev, &params->frame_list[i]);
+               hid_dbg(hdev, "\t}%s\n",
+                       i < (ARRAY_SIZE(params->frame_list) - 1) ? "," : "");
+       }
+       hid_dbg(hdev, "}\n");
+}
+
 /**
  * uclogic_params_get_str_desc - retrieve a string descriptor from a HID
  * device interface, putting it into a kmalloc-allocated buffer as is, without
@@ -253,28 +338,45 @@ static s32 uclogic_params_get_le24(const void *p)
  * uclogic_params_pen_init_v2() - initialize tablet interface pen
  * input and retrieve its parameters from the device, using v2 protocol.
  *
- * @pen:       Pointer to the pen parameters to initialize (to be
- *             cleaned up with uclogic_params_pen_cleanup()). Not modified in
- *             case of error, or if parameters are not found. Cannot be NULL.
- * @pfound:    Location for a flag which is set to true if the parameters
- *             were found, and to false if not (e.g. device was
- *             incompatible). Not modified in case of error. Cannot be NULL.
- * @hdev:      The HID device of the tablet interface to initialize and get
- *             parameters from. Cannot be NULL.
+ * @pen:               Pointer to the pen parameters to initialize (to be
+ *                     cleaned up with uclogic_params_pen_cleanup()). Not
+ *                     modified in case of error, or if parameters are not
+ *                     found. Cannot be NULL.
+ * @pfound:            Location for a flag which is set to true if the
+ *                     parameters were found, and to false if not (e.g.
+ *                     device was incompatible). Not modified in case of
+ *                     error. Cannot be NULL.
+ * @pparams_ptr:       Location for a kmalloc'ed pointer to the retrieved raw
+ *                     parameters, which could be used to identify the tablet
+ *                     to some extent. Should be freed with kfree after use.
+ *                     NULL, if not needed. Not modified in case of error.
+ *                     Only set if *pfound is set to true.
+ * @pparams_len:       Location for the length of the retrieved raw
+ *                     parameters. NULL, if not needed. Not modified in case
+ *                     of error. Only set if *pfound is set to true.
+ * @hdev:              The HID device of the tablet interface to initialize
+ *                     and get parameters from. Cannot be NULL.
  *
  * Returns:
  *     Zero, if successful. A negative errno code on error.
  */
 static int uclogic_params_pen_init_v2(struct uclogic_params_pen *pen,
                                        bool *pfound,
+                                       __u8 **pparams_ptr,
+                                       size_t *pparams_len,
                                        struct hid_device *hdev)
 {
        int rc;
        bool found = false;
-       /* Buffer for (part of) the string descriptor */
+       /* Buffer for (part of) the parameter string descriptor */
        __u8 *buf = NULL;
-       /* Descriptor length required */
-       const int len = 18;
+       /* Parameter string descriptor required length */
+       const int params_len_min = 18;
+       /* Parameter string descriptor accepted length */
+       const int params_len_max = 32;
+       /* Parameter string descriptor received length */
+       int params_len;
+       size_t i;
        s32 resolution;
        /* Pen report descriptor template parameters */
        s32 desc_params[UCLOGIC_RDESC_PEN_PH_ID_NUM];
@@ -292,7 +394,7 @@ static int uclogic_params_pen_init_v2(struct uclogic_params_pen *pen,
         * the Windows driver traffic.
         * NOTE: This enables fully-functional tablet mode.
         */
-       rc = uclogic_params_get_str_desc(&buf, hdev, 200, len);
+       rc = uclogic_params_get_str_desc(&buf, hdev, 200, params_len_max);
        if (rc == -EPIPE) {
                hid_dbg(hdev,
                        "string descriptor with pen parameters not found, assuming not compatible\n");
@@ -300,27 +402,28 @@ static int uclogic_params_pen_init_v2(struct uclogic_params_pen *pen,
        } else if (rc < 0) {
                hid_err(hdev, "failed retrieving pen parameters: %d\n", rc);
                goto cleanup;
-       } else if (rc != len) {
+       } else if (rc < params_len_min) {
                hid_dbg(hdev,
-                       "string descriptor with pen parameters has invalid length (got %d, expected %d), assuming not compatible\n",
-                       rc, len);
+                       "string descriptor with pen parameters is too short (got %d, expected at least %d), assuming not compatible\n",
+                       rc, params_len_min);
+               goto finish;
+       }
+
+       params_len = rc;
+
+       /*
+        * Check it's not just a catch-all UTF-16LE-encoded ASCII
+        * string (such as the model name) some tablets put into all
+        * unknown string descriptors.
+        */
+       for (i = 2;
+            i < params_len &&
+               (buf[i] >= 0x20 && buf[i] < 0x7f && buf[i + 1] == 0);
+            i += 2);
+       if (i >= params_len) {
+               hid_dbg(hdev,
+                       "string descriptor with pen parameters seems to contain only text, assuming not compatible\n");
                goto finish;
-       } else {
-               size_t i;
-               /*
-                * Check it's not just a catch-all UTF-16LE-encoded ASCII
-                * string (such as the model name) some tablets put into all
-                * unknown string descriptors.
-                */
-               for (i = 2;
-                    i < len &&
-                       (buf[i] >= 0x20 && buf[i] < 0x7f && buf[i + 1] == 0);
-                    i += 2);
-               if (i >= len) {
-                       hid_dbg(hdev,
-                               "string descriptor with pen parameters seems to contain only text, assuming not compatible\n");
-                       goto finish;
-               }
        }
 
        /*
@@ -344,8 +447,6 @@ static int uclogic_params_pen_init_v2(struct uclogic_params_pen *pen,
                        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_LM] * 1000 /
                        resolution;
        }
-       kfree(buf);
-       buf = NULL;
 
        /*
         * Generate pen report descriptor
@@ -371,6 +472,13 @@ static int uclogic_params_pen_init_v2(struct uclogic_params_pen *pen,
        pen->fragmented_hires = true;
        pen->tilt_y_flipped = true;
        found = true;
+       if (pparams_ptr != NULL) {
+               *pparams_ptr = buf;
+               buf = NULL;
+       }
+       if (pparams_len != NULL)
+               *pparams_len = params_len;
+
 finish:
        *pfound = found;
        rc = 0;
@@ -700,6 +808,14 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
        static const char transition_ver[] = "HUION_T153_160607";
        char *ver_ptr = NULL;
        const size_t ver_len = sizeof(transition_ver) + 1;
+       __u8 *params_ptr = NULL;
+       size_t params_len = 0;
+       /* Parameters string descriptor of a model with touch ring (HS610) */
+       const __u8 touch_ring_model_params_buf[] = {
+               0x13, 0x03, 0x70, 0xC6, 0x00, 0x06, 0x7C, 0x00,
+               0xFF, 0x1F, 0xD8, 0x13, 0x03, 0x0D, 0x10, 0x01,
+               0x04, 0x3C, 0x3E
+       };
 
        /* Check arguments */
        if (params == NULL || hdev == NULL) {
@@ -711,8 +827,13 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
        iface = to_usb_interface(hdev->dev.parent);
        bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;
 
-       /* If it's not a pen interface */
-       if (bInterfaceNumber != 0) {
+       /* If it's a custom keyboard interface */
+       if (bInterfaceNumber == 1) {
+               /* Keep everything intact, but mark pen usage invalid */
+               p.pen.usage_invalid = true;
+               goto output;
+       /* Else, if it's not a pen interface */
+       } else if (bInterfaceNumber != 0) {
                uclogic_params_init_invalid(&p);
                goto output;
        }
@@ -738,29 +859,103 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
                        "transition firmware detected, not probing pen v2 parameters\n");
        } else {
                /* Try to probe v2 pen parameters */
-               rc = uclogic_params_pen_init_v2(&p.pen, &found, hdev);
+               rc = uclogic_params_pen_init_v2(&p.pen, &found,
+                                               &params_ptr, &params_len,
+                                               hdev);
                if (rc != 0) {
                        hid_err(hdev,
                                "failed probing pen v2 parameters: %d\n", rc);
                        goto cleanup;
                } else if (found) {
                        hid_dbg(hdev, "pen v2 parameters found\n");
-                       /* Create v2 frame parameters */
+                       /* Create v2 frame button parameters */
                        rc = uclogic_params_frame_init_with_desc(
                                        &p.frame_list[0],
-                                       uclogic_rdesc_v2_frame_arr,
-                                       uclogic_rdesc_v2_frame_size,
-                                       UCLOGIC_RDESC_V2_FRAME_ID);
+                                       uclogic_rdesc_v2_frame_buttons_arr,
+                                       uclogic_rdesc_v2_frame_buttons_size,
+                                       UCLOGIC_RDESC_V2_FRAME_BUTTONS_ID);
                        if (rc != 0) {
                                hid_err(hdev,
-                                       "failed creating v2 frame parameters: %d\n",
+                                       "failed creating v2 frame button parameters: %d\n",
                                        rc);
                                goto cleanup;
                        }
-                       /* Link frame button subreports from pen reports */
+
+                       /* Link from pen sub-report */
                        p.pen.subreport_list[0].value = 0xe0;
                        p.pen.subreport_list[0].id =
-                               UCLOGIC_RDESC_V2_FRAME_ID;
+                               UCLOGIC_RDESC_V2_FRAME_BUTTONS_ID;
+
+                       /* If this is the model with touch ring */
+                       if (params_ptr != NULL &&
+                           params_len == sizeof(touch_ring_model_params_buf) &&
+                           memcmp(params_ptr, touch_ring_model_params_buf,
+                                  params_len) == 0) {
+                               /* Create touch ring parameters */
+                               rc = uclogic_params_frame_init_with_desc(
+                                       &p.frame_list[1],
+                                       uclogic_rdesc_v2_frame_touch_ring_arr,
+                                       uclogic_rdesc_v2_frame_touch_ring_size,
+                                       UCLOGIC_RDESC_V2_FRAME_TOUCH_ID);
+                               if (rc != 0) {
+                                       hid_err(hdev,
+                                               "failed creating v2 frame touch ring parameters: %d\n",
+                                               rc);
+                                       goto cleanup;
+                               }
+                               p.frame_list[1].suffix = "Touch Ring";
+                               p.frame_list[1].dev_id_byte =
+                                       UCLOGIC_RDESC_V2_FRAME_TOUCH_DEV_ID_BYTE;
+                               p.frame_list[1].touch_byte = 5;
+                               p.frame_list[1].touch_max = 12;
+                               p.frame_list[1].touch_flip_at = 7;
+                       } else {
+                               /* Create touch strip parameters */
+                               rc = uclogic_params_frame_init_with_desc(
+                                       &p.frame_list[1],
+                                       uclogic_rdesc_v2_frame_touch_strip_arr,
+                                       uclogic_rdesc_v2_frame_touch_strip_size,
+                                       UCLOGIC_RDESC_V2_FRAME_TOUCH_ID);
+                               if (rc != 0) {
+                                       hid_err(hdev,
+                                               "failed creating v2 frame touch strip parameters: %d\n",
+                                               rc);
+                                       goto cleanup;
+                               }
+                               p.frame_list[1].suffix = "Touch Strip";
+                               p.frame_list[1].dev_id_byte =
+                                       UCLOGIC_RDESC_V2_FRAME_TOUCH_DEV_ID_BYTE;
+                               p.frame_list[1].touch_byte = 5;
+                               p.frame_list[1].touch_max = 8;
+                       }
+
+                       /* Link from pen sub-report */
+                       p.pen.subreport_list[1].value = 0xf0;
+                       p.pen.subreport_list[1].id =
+                               UCLOGIC_RDESC_V2_FRAME_TOUCH_ID;
+
+                       /* Create v2 frame dial parameters */
+                       rc = uclogic_params_frame_init_with_desc(
+                                       &p.frame_list[2],
+                                       uclogic_rdesc_v2_frame_dial_arr,
+                                       uclogic_rdesc_v2_frame_dial_size,
+                                       UCLOGIC_RDESC_V2_FRAME_DIAL_ID);
+                       if (rc != 0) {
+                               hid_err(hdev,
+                                       "failed creating v2 frame dial parameters: %d\n",
+                                       rc);
+                               goto cleanup;
+                       }
+                       p.frame_list[2].suffix = "Dial";
+                       p.frame_list[2].dev_id_byte =
+                               UCLOGIC_RDESC_V2_FRAME_DIAL_DEV_ID_BYTE;
+                       p.frame_list[2].bitmap_dial_byte = 5;
+
+                       /* Link from pen sub-report */
+                       p.pen.subreport_list[2].value = 0xf1;
+                       p.pen.subreport_list[2].id =
+                               UCLOGIC_RDESC_V2_FRAME_DIAL_ID;
+
                        goto output;
                }
                hid_dbg(hdev, "pen v2 parameters not found\n");
@@ -801,6 +996,7 @@ output:
        memset(&p, 0, sizeof(p));
        rc = 0;
 cleanup:
+       kfree(params_ptr);
        kfree(ver_ptr);
        uclogic_params_cleanup(&p);
        return rc;
@@ -999,6 +1195,8 @@ int uclogic_params_init(struct uclogic_params *params,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640):
+       case VID_PID(USB_VENDOR_ID_UGEE,
+                    USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_TABLET_RAINBOW_CV720):
                /* If this is the pen interface */
index 86f616dfbb53dddc576ff6e81972e069f2eb04b2..5bef8daaa60741195fdff24838139266fcf2aef8 100644 (file)
@@ -29,11 +29,6 @@ enum uclogic_params_pen_inrange {
        UCLOGIC_PARAMS_PEN_INRANGE_NONE,
 };
 
-/* Convert a pen in-range reporting type to a string */
-extern const char *uclogic_params_pen_inrange_to_str(
-                       enum uclogic_params_pen_inrange inrange);
-
-
 /*
  * Pen report's subreport data.
  */
@@ -62,8 +57,13 @@ struct uclogic_params_pen_subreport {
  */
 struct uclogic_params_pen {
        /*
-        * Pointer to report descriptor describing the inputs.
-        * Allocated with kmalloc.
+        * True if pen usage is invalid for this interface and should be
+        * ignored, false otherwise.
+        */
+       bool usage_invalid;
+       /*
+        * Pointer to report descriptor part describing the pen inputs.
+        * Allocated with kmalloc. NULL if the part is not specified.
         */
        __u8 *desc_ptr;
        /*
@@ -73,8 +73,8 @@ struct uclogic_params_pen {
        unsigned int desc_size;
        /* Report ID, if reports should be tweaked, zero if not */
        unsigned int id;
-       /* The list of subreports */
-       struct uclogic_params_pen_subreport subreport_list[1];
+       /* The list of subreports, only valid if "id" is not zero */
+       struct uclogic_params_pen_subreport subreport_list[3];
        /* Type of in-range reporting, only valid if "id" is not zero */
        enum uclogic_params_pen_inrange inrange;
        /*
@@ -101,8 +101,8 @@ struct uclogic_params_pen {
  */
 struct uclogic_params_frame {
        /*
-        * Pointer to report descriptor describing the inputs.
-        * Allocated with kmalloc.
+        * Pointer to report descriptor part describing the frame inputs.
+        * Allocated with kmalloc. NULL if the part is not specified.
         */
        __u8 *desc_ptr;
        /*
@@ -114,6 +114,10 @@ struct uclogic_params_frame {
         * Report ID, if reports should be tweaked, zero if not.
         */
        unsigned int id;
+       /*
+        * The suffix to add to the input device name, if not NULL.
+        */
+       const char *suffix;
        /*
         * Number of the least-significant bit of the 2-bit state of a rotary
         * encoder, in the report. Cannot point to a 2-bit field crossing a
@@ -123,10 +127,40 @@ struct uclogic_params_frame {
        /*
         * Offset of the Wacom-style device ID byte in the report, to be set
         * to pad device ID (0xf), for compatibility with Wacom drivers. Zero
-        * if no changes to the report should be made. Only valid if "id" is
-        * not zero.
+        * if no changes to the report should be made. The ID byte will be set
+        * to zero whenever the byte pointed by "touch_byte" is zero, if
+        * the latter is valid. Only valid if "id" is not zero.
         */
        unsigned int dev_id_byte;
+       /*
+        * Offset of the touch ring/strip state byte, in the report.
+        * Zero if not present. If dev_id_byte is also valid and non-zero,
+        * then the device ID byte will be cleared when the byte pointed to by
+        * this offset is zero. Only valid if "id" is not zero.
+        */
+       unsigned int touch_byte;
+       /*
+        * The value to anchor the reversed touch ring/strip reports at.
+        * I.e. one, if the reports should be flipped without offset.
+        * Zero if no reversal should be done.
+        * Only valid if "touch_byte" is valid and not zero.
+        */
+       __s8 touch_flip_at;
+       /*
+        * Maximum value of the touch ring/strip report around which the value
+        * should be wrapped when flipping according to "touch_flip_at".
+        * The minimum valid value is considered to be one, with zero being
+        * out-of-proximity (finger lift) value.
+        * Only valid if "touch_flip_at" is valid and not zero.
+        */
+       __s8 touch_max;
+       /*
+        * Offset of the bitmap dial byte, in the report. Zero if not present.
+        * Only valid if "id" is not zero. A bitmap dial sends reports with a
+        * dedicated bit per direction: 1 means clockwise rotation, 2 means
+        * counterclockwise, as opposed to the normal 1 and -1.
+        */
+       unsigned int bitmap_dial_byte;
 };
 
 /*
@@ -156,7 +190,7 @@ struct uclogic_params {
        __u8 *desc_ptr;
        /*
         * Size of the common part of the replacement report descriptor.
-        * Only valid, if "desc_ptr" is not NULL.
+        * Only valid, if "desc_ptr" is valid and not NULL.
         */
        unsigned int desc_size;
        /*
@@ -168,50 +202,13 @@ struct uclogic_params {
         * The list of frame control parameters and optional report descriptor
         * parts. Only valid, if "invalid" is false.
         */
-       struct uclogic_params_frame frame_list[1];
+       struct uclogic_params_frame frame_list[3];
 };
 
 /* Initialize a tablet interface and discover its parameters */
 extern int uclogic_params_init(struct uclogic_params *params,
                                struct hid_device *hdev);
 
-/* Tablet interface parameters *printf format string */
-#define UCLOGIC_PARAMS_FMT_STR \
-               ".invalid = %s\n"                               \
-               ".desc_ptr = %p\n"                              \
-               ".desc_size = %u\n"                             \
-               ".pen.desc_ptr = %p\n"                          \
-               ".pen.desc_size = %u\n"                         \
-               ".pen.id = %u\n"                                \
-               ".pen.subreport_list[0] = {0x%02hhx, %hhu}\n"   \
-               ".pen.inrange = %s\n"                           \
-               ".pen.fragmented_hires = %s\n"                  \
-               ".pen.tilt_y_flipped = %s\n"                    \
-               ".frame_list[0].desc_ptr = %p\n"                \
-               ".frame_list[0].desc_size = %u\n"               \
-               ".frame_list[0].id = %u\n"                      \
-               ".frame_list[0].re_lsb = %u\n"                  \
-               ".frame_list[0].dev_id_byte = %u\n"
-
-/* Tablet interface parameters *printf format arguments */
-#define UCLOGIC_PARAMS_FMT_ARGS(_params) \
-               ((_params)->invalid ? "true" : "false"),                    \
-               (_params)->desc_ptr,                                        \
-               (_params)->desc_size,                                       \
-               (_params)->pen.desc_ptr,                                    \
-               (_params)->pen.desc_size,                                   \
-               (_params)->pen.id,                                          \
-               (_params)->pen.subreport_list[0].value,                     \
-               (_params)->pen.subreport_list[0].id,                        \
-               uclogic_params_pen_inrange_to_str((_params)->pen.inrange),  \
-               ((_params)->pen.fragmented_hires ? "true" : "false"),       \
-               ((_params)->pen.tilt_y_flipped ? "true" : "false"),         \
-               (_params)->frame_list[0].desc_ptr,                          \
-               (_params)->frame_list[0].desc_size,                         \
-               (_params)->frame_list[0].id,                                \
-               (_params)->frame_list[0].re_lsb,                            \
-               (_params)->frame_list[0].dev_id_byte
-
 /* Get a replacement report descriptor for a tablet's interface. */
 extern int uclogic_params_get_desc(const struct uclogic_params *params,
                                        __u8 **pdesc,
@@ -220,4 +217,8 @@ extern int uclogic_params_get_desc(const struct uclogic_params *params,
 /* Free resources used by tablet interface's parameters */
 extern void uclogic_params_cleanup(struct uclogic_params *params);
 
+/* Dump tablet interface parameters with hid_dbg() */
+extern void uclogic_params_hid_dbg(const struct hid_device *hdev,
+                                       const struct uclogic_params *params);
+
 #endif /* _HID_UCLOGIC_PARAMS_H */
index 04644d93bd117cb1f64acc1a0bc93089a8bd22f8..13f9ce73f1b161b20706c4d6ae5460b16c7df214 100644 (file)
@@ -21,7 +21,7 @@
 /* Fixed WP4030U report descriptor */
 __u8 uclogic_rdesc_wp4030u_fixed_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -66,7 +66,7 @@ const size_t uclogic_rdesc_wp4030u_fixed_size =
 /* Fixed WP5540U report descriptor */
 __u8 uclogic_rdesc_wp5540u_fixed_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -143,7 +143,7 @@ const size_t uclogic_rdesc_wp5540u_fixed_size =
 /* Fixed WP8060U report descriptor */
 __u8 uclogic_rdesc_wp8060u_fixed_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -220,7 +220,7 @@ const size_t uclogic_rdesc_wp8060u_fixed_size =
 /* Fixed WP1062 report descriptor */
 __u8 uclogic_rdesc_wp1062_fixed_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -268,7 +268,7 @@ const size_t uclogic_rdesc_wp1062_fixed_size =
 /* Fixed PF1209 report descriptor */
 __u8 uclogic_rdesc_pf1209_fixed_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -345,7 +345,7 @@ const size_t uclogic_rdesc_pf1209_fixed_size =
 /* Fixed PID 0522 tablet report descriptor, interface 0 (stylus) */
 __u8 uclogic_rdesc_twhl850_fixed0_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -457,7 +457,7 @@ const size_t uclogic_rdesc_twhl850_fixed2_size =
 /* Fixed TWHA60 report descriptor, interface 0 (stylus) */
 __u8 uclogic_rdesc_twha60_fixed0_arr[] = {
        0x05, 0x0D,         /*  Usage Page (Digitizer),             */
-       0x09, 0x02,         /*  Usage (Pen),                        */
+       0x09, 0x01,         /*  Usage (Digitizer),                  */
        0xA1, 0x01,         /*  Collection (Application),           */
        0x85, 0x09,         /*      Report ID (9),                  */
        0x09, 0x20,         /*      Usage (Stylus),                 */
@@ -534,7 +534,7 @@ const size_t uclogic_rdesc_twha60_fixed1_size =
 /* Fixed report descriptor template for (tweaked) v1 pen reports */
 const __u8 uclogic_rdesc_v1_pen_template_arr[] = {
        0x05, 0x0D,             /*  Usage Page (Digitizer),                 */
-       0x09, 0x02,             /*  Usage (Pen),                            */
+       0x09, 0x01,             /*  Usage (Digitizer),                      */
        0xA1, 0x01,             /*  Collection (Application),               */
        0x85, 0x07,             /*      Report ID (7),                      */
        0x09, 0x20,             /*      Usage (Stylus),                     */
@@ -588,7 +588,7 @@ const size_t uclogic_rdesc_v1_pen_template_size =
 /* Fixed report descriptor template for (tweaked) v2 pen reports */
 const __u8 uclogic_rdesc_v2_pen_template_arr[] = {
        0x05, 0x0D,             /*  Usage Page (Digitizer),                 */
-       0x09, 0x02,             /*  Usage (Pen),                            */
+       0x09, 0x01,             /*  Usage (Digitizer),                      */
        0xA1, 0x01,             /*  Collection (Application),               */
        0x85, 0x08,             /*      Report ID (8),                      */
        0x09, 0x20,             /*      Usage (Stylus),                     */
@@ -652,12 +652,12 @@ const size_t uclogic_rdesc_v2_pen_template_size =
                        sizeof(uclogic_rdesc_v2_pen_template_arr);
 
 /*
- * Expand to the contents of a generic frame report descriptor.
+ * Expand to the contents of a generic frame buttons report descriptor.
  *
  * @_id:       The report ID to use.
  * @_size:     Size of the report to pad to, including report ID, bytes.
  */
-#define UCLOGIC_RDESC_FRAME_BYTES(_id, _size) \
+#define UCLOGIC_RDESC_FRAME_BUTTONS_BYTES(_id, _size) \
        0x05, 0x01,     /*  Usage Page (Desktop),               */ \
        0x09, 0x07,     /*  Usage (Keypad),                     */ \
        0xA1, 0x01,     /*  Collection (Application),           */ \
@@ -700,17 +700,164 @@ const size_t uclogic_rdesc_v2_pen_template_size =
 
 /* Fixed report descriptor for (tweaked) v1 frame reports */
 const __u8 uclogic_rdesc_v1_frame_arr[] = {
-       UCLOGIC_RDESC_FRAME_BYTES(UCLOGIC_RDESC_V1_FRAME_ID, 8)
+       UCLOGIC_RDESC_FRAME_BUTTONS_BYTES(UCLOGIC_RDESC_V1_FRAME_ID, 8)
 };
 const size_t uclogic_rdesc_v1_frame_size =
                        sizeof(uclogic_rdesc_v1_frame_arr);
 
-/* Fixed report descriptor for (tweaked) v2 frame reports */
-const __u8 uclogic_rdesc_v2_frame_arr[] = {
-       UCLOGIC_RDESC_FRAME_BYTES(UCLOGIC_RDESC_V2_FRAME_ID, 12)
+/* Fixed report descriptor for (tweaked) v2 frame button reports */
+const __u8 uclogic_rdesc_v2_frame_buttons_arr[] = {
+       UCLOGIC_RDESC_FRAME_BUTTONS_BYTES(UCLOGIC_RDESC_V2_FRAME_BUTTONS_ID,
+                                         12)
 };
-const size_t uclogic_rdesc_v2_frame_size =
-                       sizeof(uclogic_rdesc_v2_frame_arr);
+const size_t uclogic_rdesc_v2_frame_buttons_size =
+                       sizeof(uclogic_rdesc_v2_frame_buttons_arr);
+
+/* Fixed report descriptor for (tweaked) v2 frame touch ring reports */
+const __u8 uclogic_rdesc_v2_frame_touch_ring_arr[] = {
+       0x05, 0x01,         /*  Usage Page (Desktop),               */
+       0x09, 0x07,         /*  Usage (Keypad),                     */
+       0xA1, 0x01,         /*  Collection (Application),           */
+       0x85, UCLOGIC_RDESC_V2_FRAME_TOUCH_ID,
+                           /*      Report ID (TOUCH_ID),           */
+       0x14,               /*      Logical Minimum (0),            */
+       0x05, 0x0D,         /*      Usage Page (Digitizer),         */
+       0x09, 0x39,         /*      Usage (Tablet Function Keys),   */
+       0xA0,               /*      Collection (Physical),          */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x75, 0x01,         /*          Report Size (1),            */
+       0x05, 0x09,         /*          Usage Page (Button),        */
+       0x09, 0x01,         /*          Usage (01h),                */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x95, 0x07,         /*          Report Count (7),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x75, 0x08,         /*          Report Size (8),            */
+       0x95, 0x02,         /*          Report Count (2),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x05, 0x0D,         /*          Usage Page (Digitizer),     */
+       0x0A, 0xFF, 0xFF,   /*          Usage (FFFFh),              */
+       0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x05, 0x01,         /*          Usage Page (Desktop),       */
+       0x09, 0x38,         /*          Usage (Wheel),              */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x15, 0x00,         /*          Logical Minimum (0),        */
+       0x25, 0x0B,         /*          Logical Maximum (11),       */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x09, 0x30,         /*          Usage (X),                  */
+       0x09, 0x31,         /*          Usage (Y),                  */
+       0x14,               /*          Logical Minimum (0),        */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x75, 0x01,         /*          Report Size (1),            */
+       0x95, 0x02,         /*          Report Count (2),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x95, 0x2E,         /*          Report Count (46),          */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0xC0,               /*      End Collection,                 */
+       0xC0                /*  End Collection                      */
+};
+const size_t uclogic_rdesc_v2_frame_touch_ring_size =
+                       sizeof(uclogic_rdesc_v2_frame_touch_ring_arr);
+
+/* Fixed report descriptor for (tweaked) v2 frame touch strip reports */
+const __u8 uclogic_rdesc_v2_frame_touch_strip_arr[] = {
+       0x05, 0x01,         /*  Usage Page (Desktop),               */
+       0x09, 0x07,         /*  Usage (Keypad),                     */
+       0xA1, 0x01,         /*  Collection (Application),           */
+       0x85, UCLOGIC_RDESC_V2_FRAME_TOUCH_ID,
+                           /*      Report ID (TOUCH_ID),           */
+       0x14,               /*      Logical Minimum (0),            */
+       0x05, 0x0D,         /*      Usage Page (Digitizer),         */
+       0x09, 0x39,         /*      Usage (Tablet Function Keys),   */
+       0xA0,               /*      Collection (Physical),          */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x75, 0x01,         /*          Report Size (1),            */
+       0x05, 0x09,         /*          Usage Page (Button),        */
+       0x09, 0x01,         /*          Usage (01h),                */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x95, 0x07,         /*          Report Count (7),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x75, 0x08,         /*          Report Size (8),            */
+       0x95, 0x02,         /*          Report Count (2),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x05, 0x0D,         /*          Usage Page (Digitizer),     */
+       0x0A, 0xFF, 0xFF,   /*          Usage (FFFFh),              */
+       0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x05, 0x01,         /*          Usage Page (Desktop),       */
+       0x09, 0x38,         /*          Usage (Wheel),              */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x15, 0x00,         /*          Logical Minimum (0),        */
+       0x25, 0x07,         /*          Logical Maximum (7),        */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x09, 0x30,         /*          Usage (X),                  */
+       0x09, 0x31,         /*          Usage (Y),                  */
+       0x14,               /*          Logical Minimum (0),        */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x75, 0x01,         /*          Report Size (1),            */
+       0x95, 0x02,         /*          Report Count (2),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x95, 0x2E,         /*          Report Count (46),          */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0xC0,               /*      End Collection,                 */
+       0xC0                /*  End Collection                      */
+};
+const size_t uclogic_rdesc_v2_frame_touch_strip_size =
+                       sizeof(uclogic_rdesc_v2_frame_touch_strip_arr);
+
+/* Fixed report descriptor for (tweaked) v2 frame dial reports */
+const __u8 uclogic_rdesc_v2_frame_dial_arr[] = {
+       0x05, 0x01,         /*  Usage Page (Desktop),               */
+       0x09, 0x07,         /*  Usage (Keypad),                     */
+       0xA1, 0x01,         /*  Collection (Application),           */
+       0x85, UCLOGIC_RDESC_V2_FRAME_DIAL_ID,
+                           /*      Report ID (DIAL_ID),            */
+       0x14,               /*      Logical Minimum (0),            */
+       0x05, 0x0D,         /*      Usage Page (Digitizer),         */
+       0x09, 0x39,         /*      Usage (Tablet Function Keys),   */
+       0xA0,               /*      Collection (Physical),          */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x75, 0x01,         /*          Report Size (1),            */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x05, 0x09,         /*          Usage Page (Button),        */
+       0x09, 0x01,         /*          Usage (01h),                */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x95, 0x06,         /*          Report Count (6),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x75, 0x08,         /*          Report Size (8),            */
+       0x95, 0x02,         /*          Report Count (2),           */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0x05, 0x0D,         /*          Usage Page (Digitizer),     */
+       0x0A, 0xFF, 0xFF,   /*          Usage (FFFFh),              */
+       0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x05, 0x01,         /*          Usage Page (Desktop),       */
+       0x09, 0x38,         /*          Usage (Wheel),              */
+       0x95, 0x01,         /*          Report Count (1),           */
+       0x15, 0xFF,         /*          Logical Minimum (-1),       */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x81, 0x06,         /*          Input (Variable, Relative), */
+       0x09, 0x30,         /*          Usage (X),                  */
+       0x09, 0x31,         /*          Usage (Y),                  */
+       0x14,               /*          Logical Minimum (0),        */
+       0x25, 0x01,         /*          Logical Maximum (1),        */
+       0x75, 0x01,         /*          Report Size (1),            */
+       0x95, 0x02,         /*          Report Count (2),           */
+       0x81, 0x02,         /*          Input (Variable),           */
+       0x95, 0x2E,         /*          Report Count (46),          */
+       0x81, 0x01,         /*          Input (Constant),           */
+       0xC0,               /*      End Collection,                 */
+       0xC0                /*  End Collection                      */
+};
+const size_t uclogic_rdesc_v2_frame_dial_size =
+                       sizeof(uclogic_rdesc_v2_frame_dial_arr);
 
 /* Fixed report descriptor for Ugee EX07 frame */
 const __u8 uclogic_rdesc_ugee_ex07_frame_arr[] = {
index 3d904c27b86a4a6a025ab20b0292757e4c4e793d..0c6e95e8bde7419b3b2e2d7e29e12f90b89d754b 100644 (file)
@@ -124,12 +124,36 @@ extern const size_t uclogic_rdesc_v2_pen_template_size;
 extern const __u8 uclogic_rdesc_v1_frame_arr[];
 extern const size_t uclogic_rdesc_v1_frame_size;
 
-/* Report ID for tweaked v2 frame reports */
-#define UCLOGIC_RDESC_V2_FRAME_ID 0xf7
+/* Report ID for tweaked v2 frame button reports */
+#define UCLOGIC_RDESC_V2_FRAME_BUTTONS_ID 0xf7
 
-/* Fixed report descriptor for (tweaked) v2 frame reports */
-extern const __u8 uclogic_rdesc_v2_frame_arr[];
-extern const size_t uclogic_rdesc_v2_frame_size;
+/* Fixed report descriptor for (tweaked) v2 frame button reports */
+extern const __u8 uclogic_rdesc_v2_frame_buttons_arr[];
+extern const size_t uclogic_rdesc_v2_frame_buttons_size;
+
+/* Report ID for tweaked v2 frame touch ring/strip reports */
+#define UCLOGIC_RDESC_V2_FRAME_TOUCH_ID 0xf8
+
+/* Fixed report descriptor for (tweaked) v2 frame touch ring reports */
+extern const __u8 uclogic_rdesc_v2_frame_touch_ring_arr[];
+extern const size_t uclogic_rdesc_v2_frame_touch_ring_size;
+
+/* Fixed report descriptor for (tweaked) v2 frame touch strip reports */
+extern const __u8 uclogic_rdesc_v2_frame_touch_strip_arr[];
+extern const size_t uclogic_rdesc_v2_frame_touch_strip_size;
+
+/* Device ID byte offset in v2 frame touch ring/strip reports */
+#define UCLOGIC_RDESC_V2_FRAME_TOUCH_DEV_ID_BYTE       0x4
+
+/* Report ID for tweaked v2 frame dial reports */
+#define UCLOGIC_RDESC_V2_FRAME_DIAL_ID 0xf9
+
+/* Fixed report descriptor for (tweaked) v2 frame dial reports */
+extern const __u8 uclogic_rdesc_v2_frame_dial_arr[];
+extern const size_t uclogic_rdesc_v2_frame_dial_size;
+
+/* Device ID byte offset in v2 frame dial reports */
+#define UCLOGIC_RDESC_V2_FRAME_DIAL_DEV_ID_BYTE        0x4
 
 /* Fixed report descriptor for Ugee EX07 frame */
 extern const __u8 uclogic_rdesc_ugee_ex07_frame_arr[];
index df60c8fc2efd82e2604ddd5a5597dac87cf635c8..8024b1d370e25eca73cfef6f75b59de49a1063fd 100644 (file)
@@ -24,7 +24,7 @@
 /* Fixed report descriptor of PD1011 signature pad */
 static __u8 pd1011_rdesc_fixed[] = {
        0x05, 0x0D,             /*  Usage Page (Digitizer),             */
-       0x09, 0x02,             /*  Usage (Pen),                        */
+       0x09, 0x01,             /*  Usage (Digitizer),                  */
        0xA1, 0x01,             /*  Collection (Application),           */
        0x85, 0x02,             /*      Report ID (2),                  */
        0x09, 0x20,             /*      Usage (Stylus),                 */
index 07e3cbc86bef1076796a905938cc9b27e57714ad..e600dbf04dfc675568f75a31587eb5ad789f8dfa 100644 (file)
@@ -30,6 +30,8 @@
 #define TGL_H_DEVICE_ID                0x43FC
 #define ADL_S_DEVICE_ID                0x7AF8
 #define ADL_P_DEVICE_ID                0x51FC
+#define ADL_N_DEVICE_ID                0x54FC
+#define RPL_S_DEVICE_ID                0x7A78
 
 #define        REVISION_ID_CHT_A0      0x6
 #define        REVISION_ID_CHT_Ax_SI   0x0
index 8e9d9450cb835df16dbc556dd0043713dd7d986a..2c67ec17bec6fd935b40b5540d0994c2f028c8c9 100644 (file)
@@ -41,6 +41,8 @@ static const struct pci_device_id ish_pci_tbl[] = {
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, TGL_H_DEVICE_ID)},
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_S_DEVICE_ID)},
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_P_DEVICE_ID)},
+       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_N_DEVICE_ID)},
+       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, RPL_S_DEVICE_ID)},
        {0, }
 };
 MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
index 066c567dbaa229f3fa5948cbc3cc1484e8bf5e2b..620fe74f56769a4d652cbd0926b94efddc5c7ee8 100644 (file)
@@ -1777,7 +1777,7 @@ static int __wacom_initialize_battery(struct wacom *wacom,
        bat_desc->get_property = wacom_battery_get_property;
        sprintf(battery->bat_name, "wacom_battery_%ld", n);
        bat_desc->name = battery->bat_name;
-       bat_desc->type = POWER_SUPPLY_TYPE_USB;
+       bat_desc->type = POWER_SUPPLY_TYPE_BATTERY;
        bat_desc->use_for_apm = 0;
 
        ps_bat = devm_power_supply_register(dev, bat_desc, &psy_cfg);
index a7176fc0635dd2b7aa0fd7977938f8feb9b6a2ec..9470c2b0b52945bab3bf153679d9d295063f81c2 100644 (file)
@@ -1811,7 +1811,9 @@ int wacom_equivalent_usage(int usage)
                    usage == WACOM_HID_WD_TOUCHSTRIP2 ||
                    usage == WACOM_HID_WD_TOUCHRING ||
                    usage == WACOM_HID_WD_TOUCHRINGSTATUS ||
-                   usage == WACOM_HID_WD_REPORT_VALID) {
+                   usage == WACOM_HID_WD_REPORT_VALID ||
+                   usage == WACOM_HID_WD_BARRELSWITCH3 ||
+                   usage == WACOM_HID_WD_SEQUENCENUMBER) {
                        return usage;
                }
 
@@ -2196,8 +2198,11 @@ static void wacom_set_barrel_switch3_usage(struct wacom_wac *wacom_wac)
        if (!(features->quirks & WACOM_QUIRK_AESPEN) &&
            wacom_wac->hid_data.barrelswitch &&
            wacom_wac->hid_data.barrelswitch2 &&
-           wacom_wac->hid_data.serialhi)
+           wacom_wac->hid_data.serialhi &&
+           !wacom_wac->hid_data.barrelswitch3) {
                input_set_capability(input, EV_KEY, BTN_STYLUS3);
+               features->quirks |= WACOM_QUIRK_PEN_BUTTON3;
+       }
 }
 
 static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
@@ -2261,6 +2266,9 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
                features->quirks |= WACOM_QUIRK_TOOLSERIAL;
                wacom_map_usage(input, usage, field, EV_MSC, MSC_SERIAL, 0);
                break;
+       case HID_DG_SCANTIME:
+               wacom_map_usage(input, usage, field, EV_MSC, MSC_TIMESTAMP, 0);
+               break;
        case WACOM_HID_WD_SENSE:
                features->quirks |= WACOM_QUIRK_SENSE;
                wacom_map_usage(input, usage, field, EV_KEY, BTN_TOOL_PEN, 0);
@@ -2274,6 +2282,11 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
                input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
                wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);
                break;
+       case WACOM_HID_WD_BARRELSWITCH3:
+               wacom_wac->hid_data.barrelswitch3 = true;
+               wacom_map_usage(input, usage, field, EV_KEY, BTN_STYLUS3, 0);
+               features->quirks &= ~WACOM_QUIRK_PEN_BUTTON3;
+               break;
        }
 }
 
@@ -2390,6 +2403,14 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
        case WACOM_HID_WD_REPORT_VALID:
                wacom_wac->is_invalid_bt_frame = !value;
                return;
+       case WACOM_HID_WD_BARRELSWITCH3:
+               wacom_wac->hid_data.barrelswitch3 = value;
+               return;
+       case WACOM_HID_WD_SEQUENCENUMBER:
+               if (wacom_wac->hid_data.sequence_number != value)
+                       hid_warn(hdev, "Dropped %hu packets", (unsigned short)(value - wacom_wac->hid_data.sequence_number));
+               wacom_wac->hid_data.sequence_number = value + 1;
+               return;
        }
 
        /* send pen events only when touch is up or forced out
@@ -2442,12 +2463,15 @@ static void wacom_wac_pen_report(struct hid_device *hdev,
 
        if (!delay_pen_events(wacom_wac) && wacom_wac->tool[0]) {
                int id = wacom_wac->id[0];
-               int sw_state = wacom_wac->hid_data.barrelswitch |
-                              (wacom_wac->hid_data.barrelswitch2 << 1);
-
-               input_report_key(input, BTN_STYLUS, sw_state == 1);
-               input_report_key(input, BTN_STYLUS2, sw_state == 2);
-               input_report_key(input, BTN_STYLUS3, sw_state == 3);
+               if (wacom_wac->features.quirks & WACOM_QUIRK_PEN_BUTTON3 &&
+                   wacom_wac->hid_data.barrelswitch & wacom_wac->hid_data.barrelswitch2) {
+                       wacom_wac->hid_data.barrelswitch = 0;
+                       wacom_wac->hid_data.barrelswitch2 = 0;
+                       wacom_wac->hid_data.barrelswitch3 = 1;
+               }
+               input_report_key(input, BTN_STYLUS, wacom_wac->hid_data.barrelswitch);
+               input_report_key(input, BTN_STYLUS2, wacom_wac->hid_data.barrelswitch2);
+               input_report_key(input, BTN_STYLUS3, wacom_wac->hid_data.barrelswitch3);
 
                /*
                 * Non-USI EMR tools should have their IDs mangled to
@@ -2529,6 +2553,9 @@ static void wacom_wac_finger_usage_mapping(struct hid_device *hdev,
                        field->logical_maximum = 255;
                }
                break;
+       case HID_DG_SCANTIME:
+               wacom_map_usage(input, usage, field, EV_MSC, MSC_TIMESTAMP, 0);
+               break;
        }
 }
 
index 466b62cc16dc19e8f29618ed258ea5404a790150..fef1538005b5d582605c85957bca1ad9d40d7430 100644 (file)
@@ -86,6 +86,7 @@
 #define WACOM_QUIRK_AESPEN             0x0004
 #define WACOM_QUIRK_BATTERY            0x0008
 #define WACOM_QUIRK_TOOLSERIAL         0x0010
+#define WACOM_QUIRK_PEN_BUTTON3        0x0020
 
 /* device types */
 #define WACOM_DEVICETYPE_NONE           0x0000
 #define WACOM_HID_WD_DIGITIZERFNKEYS    (WACOM_HID_UP_WACOMDIGITIZER | 0x39)
 #define WACOM_HID_WD_SERIALNUMBER       (WACOM_HID_UP_WACOMDIGITIZER | 0x5b)
 #define WACOM_HID_WD_SERIALHI           (WACOM_HID_UP_WACOMDIGITIZER | 0x5c)
+#define WACOM_HID_WD_BARRELSWITCH3      (WACOM_HID_UP_WACOMDIGITIZER | 0x5d)
 #define WACOM_HID_WD_TOOLTYPE           (WACOM_HID_UP_WACOMDIGITIZER | 0x77)
 #define WACOM_HID_WD_DISTANCE           (WACOM_HID_UP_WACOMDIGITIZER | 0x0132)
 #define WACOM_HID_WD_TOUCHSTRIP         (WACOM_HID_UP_WACOMDIGITIZER | 0x0136)
 #define WACOM_HID_WD_TOUCHRING          (WACOM_HID_UP_WACOMDIGITIZER | 0x0138)
 #define WACOM_HID_WD_TOUCHRINGSTATUS    (WACOM_HID_UP_WACOMDIGITIZER | 0x0139)
 #define WACOM_HID_WD_REPORT_VALID       (WACOM_HID_UP_WACOMDIGITIZER | 0x01d0)
+#define WACOM_HID_WD_SEQUENCENUMBER     (WACOM_HID_UP_WACOMDIGITIZER | 0x0220)
 #define WACOM_HID_WD_ACCELEROMETER_X    (WACOM_HID_UP_WACOMDIGITIZER | 0x0401)
 #define WACOM_HID_WD_ACCELEROMETER_Y    (WACOM_HID_UP_WACOMDIGITIZER | 0x0402)
 #define WACOM_HID_WD_ACCELEROMETER_Z    (WACOM_HID_UP_WACOMDIGITIZER | 0x0403)
@@ -300,6 +303,7 @@ struct hid_data {
        bool tipswitch;
        bool barrelswitch;
        bool barrelswitch2;
+       bool barrelswitch3;
        bool serialhi;
        bool confidence;
        int x;
@@ -320,6 +324,7 @@ struct hid_data {
        int bat_connected;
        int ps_connected;
        bool pad_input_event_flag;
+       unsigned short sequence_number;
 };
 
 struct wacom_remote_data {
index 68a8a27ab3b7091d3d36974faa3eabb0b145c90a..590d3d550acba5005587179f3dff7fa06f616d65 100644 (file)
@@ -256,11 +256,14 @@ config SENSORS_AHT10
          will be called aht10.
 
 config SENSORS_AQUACOMPUTER_D5NEXT
-       tristate "Aquacomputer D5 Next watercooling pump"
+       tristate "Aquacomputer D5 Next, Octo, Farbwerk, and Farbwerk 360"
        depends on USB_HID
+       select CRC16
        help
-         If you say yes here you get support for the Aquacomputer D5 Next
-         watercooling pump sensors.
+         If you say yes here you get support for sensors and fans of
+         the Aquacomputer D5 Next watercooling pump, Octo fan
+         controller, Farbwerk and Farbwerk 360 RGB controllers, where
+         available.
 
          This driver can also be built as a module. If so, the module
          will be called aquacomputer_d5next.
@@ -415,6 +418,7 @@ config SENSORS_ATXP1
 config SENSORS_BT1_PVT
        tristate "Baikal-T1 Process, Voltage, Temperature sensor driver"
        depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+       select POLYNOMIAL
        help
          If you say yes here you get support for Baikal-T1 PVT sensor
          embedded into the SoC.
@@ -498,6 +502,7 @@ config SENSORS_DS1621
 config SENSORS_DELL_SMM
        tristate "Dell laptop SMM BIOS hwmon driver"
        depends on X86
+       imply THERMAL
        help
          This hwmon driver adds support for reporting temperature of different
          sensors and controls the fans on Dell laptops via System Management
@@ -814,6 +819,18 @@ config SENSORS_POWR1220
          This driver can also be built as a module. If so, the module
          will be called powr1220.
 
+config SENSORS_LAN966X
+       tristate "Microchip LAN966x Hardware Monitoring"
+       depends on SOC_LAN966 || COMPILE_TEST
+       select REGMAP
+       select POLYNOMIAL
+       help
+         If you say yes here you get support for temperature monitoring
+         on the Microchip LAN966x SoC.
+
+         This driver can also be built as a module. If so, the module
+         will be called lan966x-hwmon.
+
 config SENSORS_LINEAGE
        tristate "Lineage Compact Power Line Power Entry Module"
        depends on I2C
@@ -960,7 +977,7 @@ config SENSORS_LTC4261
 
 config SENSORS_LTQ_CPUTEMP
        bool "Lantiq cpu temperature sensor driver"
-       depends on LANTIQ
+       depends on SOC_XWAY
        help
          If you say yes here you get support for the temperature
          sensor inside your CPU.
@@ -1248,6 +1265,7 @@ config SENSORS_LM75
          temperature sensor chip, with models including:
 
                - Analog Devices ADT75
+               - Atmel (now Microchip) AT30TS74
                - Dallas Semiconductor DS75, DS1775 and DS7505
                - Global Mixed-mode Technology (GMT) G751
                - Maxim MAX6625 and MAX6626
@@ -1457,11 +1475,23 @@ config SENSORS_NCT6683
          This driver can also be built as a module. If so, the module
          will be called nct6683.
 
+config SENSORS_NCT6775_CORE
+       tristate
+       select REGMAP
+       help
+         This module contains common code shared by the platform and
+         i2c versions of the nct6775 driver; it is not useful on its
+         own.
+
+         If built as a module, the module will be called
+         nct6775-core.
+
 config SENSORS_NCT6775
-       tristate "Nuvoton NCT6775F and compatibles"
+       tristate "Platform driver for Nuvoton NCT6775F and compatibles"
        depends on !PPC
        depends on ACPI_WMI || ACPI_WMI=n
        select HWMON_VID
+       select SENSORS_NCT6775_CORE
        help
          If you say yes here you get support for the hardware monitoring
          functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D,
@@ -1472,6 +1502,23 @@ config SENSORS_NCT6775
          This driver can also be built as a module. If so, the module
          will be called nct6775.
 
+config SENSORS_NCT6775_I2C
+       tristate "I2C driver for Nuvoton NCT6775F and compatibles"
+       depends on I2C
+       select REGMAP_I2C
+       select SENSORS_NCT6775_CORE
+       help
+         If you say yes here you get support for the hardware monitoring
+         functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D,
+         NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D, and compatible
+         Super-I/O chips via their I2C interface.
+
+         If you're not building a kernel for a BMC, this is probably
+         not the driver you want (see CONFIG_SENSORS_NCT6775).
+
+         This driver can also be built as a module. If so, the module
+         will be called nct6775-i2c.
+
 config SENSORS_NCT7802
        tristate "Nuvoton NCT7802Y"
        depends on I2C
index 8a03289e2aa4584c27340b37960f40008ff3a54a..007e829d1d0d0c5b427b72b57ae628deb4a6bd67 100644 (file)
@@ -100,6 +100,7 @@ obj-$(CONFIG_SENSORS_IT87)  += it87.o
 obj-$(CONFIG_SENSORS_JC42)     += jc42.o
 obj-$(CONFIG_SENSORS_K8TEMP)   += k8temp.o
 obj-$(CONFIG_SENSORS_K10TEMP)  += k10temp.o
+obj-$(CONFIG_SENSORS_LAN966X)  += lan966x-hwmon.o
 obj-$(CONFIG_SENSORS_LINEAGE)  += lineage-pem.o
 obj-$(CONFIG_SENSORS_LOCHNAGAR)        += lochnagar-hwmon.o
 obj-$(CONFIG_SENSORS_LM63)     += lm63.o
@@ -154,7 +155,10 @@ obj-$(CONFIG_SENSORS_MLXREG_FAN) += mlxreg-fan.o
 obj-$(CONFIG_SENSORS_MENF21BMC_HWMON) += menf21bmc_hwmon.o
 obj-$(CONFIG_SENSORS_MR75203)  += mr75203.o
 obj-$(CONFIG_SENSORS_NCT6683)  += nct6683.o
+obj-$(CONFIG_SENSORS_NCT6775_CORE) += nct6775-core.o
+nct6775-objs                   := nct6775-platform.o
 obj-$(CONFIG_SENSORS_NCT6775)  += nct6775.o
+obj-$(CONFIG_SENSORS_NCT6775_I2C) += nct6775-i2c.o
 obj-$(CONFIG_SENSORS_NCT7802)  += nct7802.o
 obj-$(CONFIG_SENSORS_NCT7904)  += nct7904.o
 obj-$(CONFIG_SENSORS_NPCM7XX)  += npcm750-pwm-fan.o
index c405a5869581c9e3a4527fbdb2a7cc1d786822e7..d2545a1be9fc079145ecf39b2ed4f0fd0031d29a 100644 (file)
@@ -481,7 +481,7 @@ static struct sensor_template meter_attrs[] = {
        RO_SENSOR_TEMPLATE("power1_average_interval_max", show_val, 1),
        RO_SENSOR_TEMPLATE("power1_is_battery", show_val, 5),
        RW_SENSOR_TEMPLATE(POWER_AVG_INTERVAL_NAME, show_avg_interval,
-               set_avg_interval, 0),
+                          set_avg_interval, 0),
        {},
 };
 
@@ -530,6 +530,7 @@ static void remove_domain_devices(struct acpi_power_meter_resource *resource)
 
        for (i = 0; i < resource->num_domain_devices; i++) {
                struct acpi_device *obj = resource->domain_devices[i];
+
                if (!obj)
                        continue;
 
@@ -580,7 +581,7 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource)
        }
 
        resource->holders_dir = kobject_create_and_add("measures",
-                                       &resource->acpi_dev->dev.kobj);
+                                                      &resource->acpi_dev->dev.kobj);
        if (!resource->holders_dir) {
                res = -ENOMEM;
                goto exit_free;
@@ -590,7 +591,7 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource)
 
        for (i = 0; i < pss->package.count; i++) {
                struct acpi_device *obj;
-               union acpi_object *element = &(pss->package.elements[i]);
+               union acpi_object *element = &pss->package.elements[i];
 
                /* Refuse non-references */
                if (element->type != ACPI_TYPE_LOCAL_REFERENCE)
@@ -603,7 +604,7 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource)
                        continue;
 
                res = sysfs_create_link(resource->holders_dir, &obj->dev.kobj,
-                                     kobject_name(&obj->dev.kobj));
+                                       kobject_name(&obj->dev.kobj));
                if (res) {
                        acpi_dev_put(obj);
                        resource->domain_devices[i] = NULL;
@@ -788,7 +789,7 @@ static int read_capabilities(struct acpi_power_meter_resource *resource)
        str = &resource->model_number;
 
        for (i = 11; i < 14; i++) {
-               union acpi_object *element = &(pss->package.elements[i]);
+               union acpi_object *element = &pss->package.elements[i];
 
                if (element->type != ACPI_TYPE_STRING) {
                        res = -EINVAL;
@@ -868,8 +869,7 @@ static int acpi_power_meter_add(struct acpi_device *device)
        if (!device)
                return -EINVAL;
 
-       resource = kzalloc(sizeof(struct acpi_power_meter_resource),
-                          GFP_KERNEL);
+       resource = kzalloc(sizeof(*resource), GFP_KERNEL);
        if (!resource)
                return -ENOMEM;
 
@@ -884,7 +884,8 @@ static int acpi_power_meter_add(struct acpi_device *device)
        if (res)
                goto exit_free;
 
-       resource->trip[0] = resource->trip[1] = -1;
+       resource->trip[0] = -1;
+       resource->trip[1] = -1;
 
        res = setup_attrs(resource);
        if (res)
index 9d5b019651f2d715189caaf7ca447e5c9e0321c5..ac480e6e4818b5763b3a0ed9d2b1fdf21a00e945 100644 (file)
 #define CONFIG3_THERM          0x02
 
 #define CONFIG4_PINFUNC                0x03
+#define CONFIG4_THERM          0x01
+#define CONFIG4_SMBALERT       0x02
 #define CONFIG4_MAXDUTY                0x08
 #define CONFIG4_ATTN_IN10      0x30
 #define CONFIG4_ATTN_IN43      0xC0
@@ -1460,6 +1462,96 @@ static int adt7475_update_limits(struct i2c_client *client)
        return 0;
 }
 
+static int load_config3(const struct i2c_client *client, const char *propname)
+{
+       const char *function;
+       u8 config3;
+       int ret;
+
+       ret = of_property_read_string(client->dev.of_node, propname, &function);
+       if (!ret) {
+               ret = adt7475_read(REG_CONFIG3);
+               if (ret < 0)
+                       return ret;
+
+               config3 = ret & ~CONFIG3_SMBALERT;
+               if (!strcmp("pwm2", function))
+                       ;
+               else if (!strcmp("smbalert#", function))
+                       config3 |= CONFIG3_SMBALERT;
+               else
+                       return -EINVAL;
+
+               return i2c_smbus_write_byte_data(client, REG_CONFIG3, config3);
+       }
+
+       return 0;
+}
+
+static int load_config4(const struct i2c_client *client, const char *propname)
+{
+       const char *function;
+       u8 config4;
+       int ret;
+
+       ret = of_property_read_string(client->dev.of_node, propname, &function);
+       if (!ret) {
+               ret = adt7475_read(REG_CONFIG4);
+               if (ret < 0)
+                       return ret;
+
+               config4 = ret & ~CONFIG4_PINFUNC;
+
+               if (!strcmp("tach4", function))
+                       ;
+               else if (!strcmp("therm#", function))
+                       config4 |= CONFIG4_THERM;
+               else if (!strcmp("smbalert#", function))
+                       config4 |= CONFIG4_SMBALERT;
+               else if (!strcmp("gpio", function))
+                       config4 |= CONFIG4_PINFUNC;
+               else
+                       return -EINVAL;
+
+               return i2c_smbus_write_byte_data(client, REG_CONFIG4, config4);
+       }
+
+       return 0;
+}
+
+static int load_config(const struct i2c_client *client, enum chips chip)
+{
+       int err;
+       const char *prop1, *prop2;
+
+       switch (chip) {
+       case adt7473:
+       case adt7475:
+               prop1 = "adi,pin5-function";
+               prop2 = "adi,pin9-function";
+               break;
+       case adt7476:
+       case adt7490:
+               prop1 = "adi,pin10-function";
+               prop2 = "adi,pin14-function";
+               break;
+       }
+
+       err = load_config3(client, prop1);
+       if (err) {
+               dev_err(&client->dev, "failed to configure %s\n", prop1);
+               return err;
+       }
+
+       err = load_config4(client, prop2);
+       if (err) {
+               dev_err(&client->dev, "failed to configure %s\n", prop2);
+               return err;
+       }
+
+       return 0;
+}
+
 static int set_property_bit(const struct i2c_client *client, char *property,
                            u8 *config, u8 bit_index)
 {
@@ -1477,12 +1569,12 @@ static int set_property_bit(const struct i2c_client *client, char *property,
        return ret;
 }
 
-static int load_attenuators(const struct i2c_client *client, int chip,
+static int load_attenuators(const struct i2c_client *client, enum chips chip,
                            struct adt7475_data *data)
 {
-       int ret;
-
-       if (chip == adt7476 || chip == adt7490) {
+       switch (chip) {
+       case adt7476:
+       case adt7490:
                set_property_bit(client, "adi,bypass-attenuator-in0",
                                 &data->config4, 4);
                set_property_bit(client, "adi,bypass-attenuator-in1",
@@ -1492,18 +1584,15 @@ static int load_attenuators(const struct i2c_client *client, int chip,
                set_property_bit(client, "adi,bypass-attenuator-in4",
                                 &data->config4, 7);
 
-               ret = i2c_smbus_write_byte_data(client, REG_CONFIG4,
-                                               data->config4);
-               if (ret < 0)
-                       return ret;
-       } else if (chip == adt7473 || chip == adt7475) {
+               return i2c_smbus_write_byte_data(client, REG_CONFIG4,
+                                                data->config4);
+       case adt7473:
+       case adt7475:
                set_property_bit(client, "adi,bypass-attenuator-in1",
                                 &data->config2, 5);
 
-               ret = i2c_smbus_write_byte_data(client, REG_CONFIG2,
-                                               data->config2);
-               if (ret < 0)
-                       return ret;
+               return i2c_smbus_write_byte_data(client, REG_CONFIG2,
+                                                data->config2);
        }
 
        return 0;
@@ -1585,6 +1674,10 @@ static int adt7475_probe(struct i2c_client *client)
                revision = adt7475_read(REG_DEVID2) & 0x07;
        }
 
+       ret = load_config(client, chip);
+       if (ret)
+               return ret;
+
        config3 = adt7475_read(REG_CONFIG3);
        /* Pin PWM2 may alternatively be used for ALERT output */
        if (!(config3 & CONFIG3_SMBALERT))
index 525809cf7c95244a09bb574759f549f931a01f33..a0e69f7ece36e3aa2e61ebc04e825af58c0dd4bd 100644 (file)
@@ -1,30 +1,37 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * hwmon driver for Aquacomputer devices (D5 Next, Farbwerk 360)
+ * hwmon driver for Aquacomputer devices (D5 Next, Farbwerk, Farbwerk 360, Octo)
  *
  * Aquacomputer devices send HID reports (with ID 0x01) every second to report
  * sensor values.
  *
  * Copyright 2021 Aleksa Savic <savicaleksa83@gmail.com>
+ * Copyright 2022 Jack Doan <me@jackdoan.com>
  */
 
+#include <linux/crc16.h>
 #include <linux/debugfs.h>
 #include <linux/hid.h>
 #include <linux/hwmon.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/seq_file.h>
 #include <asm/unaligned.h>
 
 #define USB_VENDOR_ID_AQUACOMPUTER     0x0c70
+#define USB_PRODUCT_ID_FARBWERK                0xf00a
 #define USB_PRODUCT_ID_D5NEXT          0xf00e
 #define USB_PRODUCT_ID_FARBWERK360     0xf010
+#define USB_PRODUCT_ID_OCTO            0xf011
 
-enum kinds { d5next, farbwerk360 };
+enum kinds { d5next, farbwerk, farbwerk360, octo };
 
 static const char *const aqc_device_names[] = {
        [d5next] = "d5next",
-       [farbwerk360] = "farbwerk360"
+       [farbwerk] = "farbwerk",
+       [farbwerk360] = "farbwerk360",
+       [octo] = "octo"
 };
 
 #define DRIVER_NAME                    "aquacomputer_d5next"
@@ -35,6 +42,18 @@ static const char *const aqc_device_names[] = {
 #define SERIAL_SECOND_PART             5
 #define FIRMWARE_VERSION               13
 
+#define CTRL_REPORT_ID                 0x03
+
+/* The HID report that the official software always sends
+ * after writing values, currently same for all devices
+ */
+#define SECONDARY_CTRL_REPORT_ID       0x02
+#define SECONDARY_CTRL_REPORT_SIZE     0x0B
+
+static u8 secondary_ctrl_report[] = {
+       0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x34, 0xC6
+};
+
 /* Register offsets for the D5 Next pump */
 #define D5NEXT_POWER_CYCLES            24
 
@@ -53,14 +72,46 @@ static const char *const aqc_device_names[] = {
 #define D5NEXT_PUMP_CURRENT            112
 #define D5NEXT_FAN_CURRENT             99
 
+/* Register offsets for the Farbwerk RGB controller */
+#define FARBWERK_NUM_SENSORS           4
+#define FARBWERK_SENSOR_START          0x2f
+#define FARBWERK_SENSOR_SIZE           0x02
+#define FARBWERK_SENSOR_DISCONNECTED   0x7FFF
+
 /* Register offsets for the Farbwerk 360 RGB controller */
 #define FARBWERK360_NUM_SENSORS                4
-#define FARBWERK360_SENSOR_START               0x32
+#define FARBWERK360_SENSOR_START       0x32
 #define FARBWERK360_SENSOR_SIZE                0x02
 #define FARBWERK360_SENSOR_DISCONNECTED        0x7FFF
 
+/* Register offsets for the Octo fan controller */
+#define OCTO_POWER_CYCLES              0x18
+#define OCTO_NUM_FANS                  8
+#define OCTO_FAN_PERCENT_OFFSET                0x00
+#define OCTO_FAN_VOLTAGE_OFFSET                0x02
+#define OCTO_FAN_CURRENT_OFFSET                0x04
+#define OCTO_FAN_POWER_OFFSET          0x06
+#define OCTO_FAN_SPEED_OFFSET          0x08
+
+static u8 octo_sensor_fan_offsets[] = { 0x7D, 0x8A, 0x97, 0xA4, 0xB1, 0xBE, 0xCB, 0xD8 };
+
+#define OCTO_NUM_SENSORS               4
+#define OCTO_SENSOR_START              0x3D
+#define OCTO_SENSOR_SIZE               0x02
+#define OCTO_SENSOR_DISCONNECTED       0x7FFF
+
+#define OCTO_CTRL_REPORT_SIZE                  0x65F
+#define OCTO_CTRL_REPORT_CHECKSUM_OFFSET       0x65D
+#define OCTO_CTRL_REPORT_CHECKSUM_START                0x01
+#define OCTO_CTRL_REPORT_CHECKSUM_LENGTH       0x65C
+
+/* Fan speed registers in Octo control report (from 0-100%) */
+static u16 octo_ctrl_fan_offsets[] = { 0x5B, 0xB0, 0x105, 0x15A, 0x1AF, 0x204, 0x259, 0x2AE };
+
 /* Labels for D5 Next */
-#define L_D5NEXT_COOLANT_TEMP          "Coolant temp"
+static const char *const label_d5next_temp[] = {
+       "Coolant temp"
+};
 
 static const char *const label_d5next_speeds[] = {
        "Pump speed",
@@ -83,7 +134,7 @@ static const char *const label_d5next_current[] = {
        "Fan current"
 };
 
-/* Labels for Farbwerk 360 temperature sensors */
+/* Labels for Farbwerk, Farbwerk 360 and Octo temperature sensors */
 static const char *const label_temp_sensors[] = {
        "Sensor 1",
        "Sensor 2",
@@ -91,32 +142,182 @@ static const char *const label_temp_sensors[] = {
        "Sensor 4"
 };
 
+/* Labels for Octo */
+static const char *const label_fan_speed[] = {
+       "Fan 1 speed",
+       "Fan 2 speed",
+       "Fan 3 speed",
+       "Fan 4 speed",
+       "Fan 5 speed",
+       "Fan 6 speed",
+       "Fan 7 speed",
+       "Fan 8 speed"
+};
+
+static const char *const label_fan_power[] = {
+       "Fan 1 power",
+       "Fan 2 power",
+       "Fan 3 power",
+       "Fan 4 power",
+       "Fan 5 power",
+       "Fan 6 power",
+       "Fan 7 power",
+       "Fan 8 power"
+};
+
+static const char *const label_fan_voltage[] = {
+       "Fan 1 voltage",
+       "Fan 2 voltage",
+       "Fan 3 voltage",
+       "Fan 4 voltage",
+       "Fan 5 voltage",
+       "Fan 6 voltage",
+       "Fan 7 voltage",
+       "Fan 8 voltage"
+};
+
+static const char *const label_fan_current[] = {
+       "Fan 1 current",
+       "Fan 2 current",
+       "Fan 3 current",
+       "Fan 4 current",
+       "Fan 5 current",
+       "Fan 6 current",
+       "Fan 7 current",
+       "Fan 8 current"
+};
+
 struct aqc_data {
        struct hid_device *hdev;
        struct device *hwmon_dev;
        struct dentry *debugfs;
+       struct mutex mutex;     /* Used for locking access when reading and writing PWM values */
        enum kinds kind;
        const char *name;
 
+       int buffer_size;
+       u8 *buffer;
+       int checksum_start;
+       int checksum_length;
+       int checksum_offset;
+
        /* General info, same across all devices */
        u32 serial_number[2];
        u16 firmware_version;
 
-       /* D5 Next specific - how many times the device was powered on */
+       /* How many times the device was powered on */
        u32 power_cycles;
 
        /* Sensor values */
        s32 temp_input[4];
-       u16 speed_input[2];
-       u32 power_input[2];
-       u16 voltage_input[3];
-       u16 current_input[2];
+       u16 speed_input[8];
+       u32 power_input[8];
+       u16 voltage_input[8];
+       u16 current_input[8];
+
+       /* Label values */
+       const char *const *temp_label;
+       const char *const *speed_label;
+       const char *const *power_label;
+       const char *const *voltage_label;
+       const char *const *current_label;
 
        unsigned long updated;
 };
 
-static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
-                             int channel)
+/* Converts from centi-percent */
+static int aqc_percent_to_pwm(u16 val)
+{
+       return DIV_ROUND_CLOSEST(val * 255, 100 * 100);
+}
+
+/* Converts to centi-percent */
+static int aqc_pwm_to_percent(long val)
+{
+       if (val < 0 || val > 255)
+               return -EINVAL;
+
+       return DIV_ROUND_CLOSEST(val * 100 * 100, 255);
+}
+
+/* Expects the mutex to be locked */
+static int aqc_get_ctrl_data(struct aqc_data *priv)
+{
+       int ret;
+
+       memset(priv->buffer, 0x00, priv->buffer_size);
+       ret = hid_hw_raw_request(priv->hdev, CTRL_REPORT_ID, priv->buffer, priv->buffer_size,
+                                HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
+       if (ret < 0)
+               ret = -ENODATA;
+
+       return ret;
+}
+
+/* Expects the mutex to be locked */
+static int aqc_send_ctrl_data(struct aqc_data *priv)
+{
+       int ret;
+       u16 checksum;
+
+       /* Init and xorout value for CRC-16/USB is 0xffff */
+       checksum = crc16(0xffff, priv->buffer + priv->checksum_start, priv->checksum_length);
+       checksum ^= 0xffff;
+
+       /* Place the new checksum at the end of the report */
+       put_unaligned_be16(checksum, priv->buffer + priv->checksum_offset);
+
+       /* Send the patched up report back to the device */
+       ret = hid_hw_raw_request(priv->hdev, CTRL_REPORT_ID, priv->buffer, priv->buffer_size,
+                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+       if (ret < 0)
+               return ret;
+
+       /* The official software sends this report after every change, so do it here as well */
+       ret = hid_hw_raw_request(priv->hdev, SECONDARY_CTRL_REPORT_ID, secondary_ctrl_report,
+                                SECONDARY_CTRL_REPORT_SIZE, HID_FEATURE_REPORT,
+                                HID_REQ_SET_REPORT);
+       return ret;
+}
+
+/* Refreshes the control buffer and returns value at offset */
+static int aqc_get_ctrl_val(struct aqc_data *priv, int offset)
+{
+       int ret;
+
+       mutex_lock(&priv->mutex);
+
+       ret = aqc_get_ctrl_data(priv);
+       if (ret < 0)
+               goto unlock_and_return;
+
+       ret = get_unaligned_be16(priv->buffer + offset);
+
+unlock_and_return:
+       mutex_unlock(&priv->mutex);
+       return ret;
+}
+
+static int aqc_set_ctrl_val(struct aqc_data *priv, int offset, long val)
+{
+       int ret;
+
+       mutex_lock(&priv->mutex);
+
+       ret = aqc_get_ctrl_data(priv);
+       if (ret < 0)
+               goto unlock_and_return;
+
+       put_unaligned_be16((u16)val, priv->buffer + offset);
+
+       ret = aqc_send_ctrl_data(priv);
+
+unlock_and_return:
+       mutex_unlock(&priv->mutex);
+       return ret;
+}
+
+static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel)
 {
        const struct aqc_data *priv = data;
 
@@ -127,18 +328,49 @@ static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u3
                        if (channel == 0)
                                return 0444;
                        break;
+               case farbwerk:
                case farbwerk360:
+               case octo:
                        return 0444;
                default:
                        break;
                }
                break;
+       case hwmon_pwm:
+               switch (priv->kind) {
+               case octo:
+                       switch (attr) {
+                       case hwmon_pwm_input:
+                               return 0644;
+                       default:
+                               break;
+                       }
+                       break;
+               default:
+                       break;
+               }
+               break;
        case hwmon_fan:
        case hwmon_power:
-       case hwmon_in:
        case hwmon_curr:
                switch (priv->kind) {
                case d5next:
+                       if (channel < 2)
+                               return 0444;
+                       break;
+               case octo:
+                       return 0444;
+               default:
+                       break;
+               }
+               break;
+       case hwmon_in:
+               switch (priv->kind) {
+               case d5next:
+                       if (channel < 3)
+                               return 0444;
+                       break;
+               case octo:
                        return 0444;
                default:
                        break;
@@ -154,6 +386,7 @@ static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u3
 static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
                    int channel, long *val)
 {
+       int ret;
        struct aqc_data *priv = dev_get_drvdata(dev);
 
        if (time_after(jiffies, priv->updated + STATUS_UPDATE_INTERVAL))
@@ -172,6 +405,19 @@ static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
        case hwmon_power:
                *val = priv->power_input[channel];
                break;
+       case hwmon_pwm:
+               switch (priv->kind) {
+               case octo:
+                       ret = aqc_get_ctrl_val(priv, octo_ctrl_fan_offsets[channel]);
+                       if (ret < 0)
+                               return ret;
+
+                       *val = aqc_percent_to_pwm(ret);
+                       break;
+               default:
+                       break;
+               }
+               break;
        case hwmon_in:
                *val = priv->voltage_input[channel];
                break;
@@ -192,48 +438,51 @@ static int aqc_read_string(struct device *dev, enum hwmon_sensor_types type, u32
 
        switch (type) {
        case hwmon_temp:
-               switch (priv->kind) {
-               case d5next:
-                       *str = L_D5NEXT_COOLANT_TEMP;
-                       break;
-               case farbwerk360:
-                       *str = label_temp_sensors[channel];
-                       break;
-               default:
-                       break;
-               }
+               *str = priv->temp_label[channel];
                break;
        case hwmon_fan:
-               switch (priv->kind) {
-               case d5next:
-                       *str = label_d5next_speeds[channel];
-                       break;
-               default:
-                       break;
-               }
+               *str = priv->speed_label[channel];
                break;
        case hwmon_power:
-               switch (priv->kind) {
-               case d5next:
-                       *str = label_d5next_power[channel];
-                       break;
-               default:
-                       break;
-               }
+               *str = priv->power_label[channel];
                break;
        case hwmon_in:
-               switch (priv->kind) {
-               case d5next:
-                       *str = label_d5next_voltages[channel];
-                       break;
-               default:
-                       break;
-               }
+               *str = priv->voltage_label[channel];
                break;
        case hwmon_curr:
-               switch (priv->kind) {
-               case d5next:
-                       *str = label_d5next_current[channel];
+               *str = priv->current_label[channel];
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int aqc_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
+                    long val)
+{
+       int ret, pwm_value;
+       struct aqc_data *priv = dev_get_drvdata(dev);
+
+       switch (type) {
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+                       switch (priv->kind) {
+                       case octo:
+                               pwm_value = aqc_pwm_to_percent(val);
+                               if (pwm_value < 0)
+                                       return pwm_value;
+
+                               ret = aqc_set_ctrl_val(priv, octo_ctrl_fan_offsets[channel],
+                                                      pwm_value);
+                               if (ret < 0)
+                                       return ret;
+                               break;
+                       default:
+                               break;
+                       }
                        break;
                default:
                        break;
@@ -250,6 +499,7 @@ static const struct hwmon_ops aqc_hwmon_ops = {
        .is_visible = aqc_is_visible,
        .read = aqc_read,
        .read_string = aqc_read_string,
+       .write = aqc_write
 };
 
 static const struct hwmon_channel_info *aqc_info[] = {
@@ -259,16 +509,48 @@ static const struct hwmon_channel_info *aqc_info[] = {
                           HWMON_T_INPUT | HWMON_T_LABEL,
                           HWMON_T_INPUT | HWMON_T_LABEL),
        HWMON_CHANNEL_INFO(fan,
+                          HWMON_F_INPUT | HWMON_F_LABEL,
+                          HWMON_F_INPUT | HWMON_F_LABEL,
+                          HWMON_F_INPUT | HWMON_F_LABEL,
+                          HWMON_F_INPUT | HWMON_F_LABEL,
+                          HWMON_F_INPUT | HWMON_F_LABEL,
+                          HWMON_F_INPUT | HWMON_F_LABEL,
                           HWMON_F_INPUT | HWMON_F_LABEL,
                           HWMON_F_INPUT | HWMON_F_LABEL),
        HWMON_CHANNEL_INFO(power,
+                          HWMON_P_INPUT | HWMON_P_LABEL,
+                          HWMON_P_INPUT | HWMON_P_LABEL,
+                          HWMON_P_INPUT | HWMON_P_LABEL,
+                          HWMON_P_INPUT | HWMON_P_LABEL,
+                          HWMON_P_INPUT | HWMON_P_LABEL,
+                          HWMON_P_INPUT | HWMON_P_LABEL,
                           HWMON_P_INPUT | HWMON_P_LABEL,
                           HWMON_P_INPUT | HWMON_P_LABEL),
+       HWMON_CHANNEL_INFO(pwm,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT,
+                          HWMON_PWM_INPUT),
        HWMON_CHANNEL_INFO(in,
+                          HWMON_I_INPUT | HWMON_I_LABEL,
+                          HWMON_I_INPUT | HWMON_I_LABEL,
+                          HWMON_I_INPUT | HWMON_I_LABEL,
+                          HWMON_I_INPUT | HWMON_I_LABEL,
+                          HWMON_I_INPUT | HWMON_I_LABEL,
                           HWMON_I_INPUT | HWMON_I_LABEL,
                           HWMON_I_INPUT | HWMON_I_LABEL,
                           HWMON_I_INPUT | HWMON_I_LABEL),
        HWMON_CHANNEL_INFO(curr,
+                          HWMON_C_INPUT | HWMON_C_LABEL,
+                          HWMON_C_INPUT | HWMON_C_LABEL,
+                          HWMON_C_INPUT | HWMON_C_LABEL,
+                          HWMON_C_INPUT | HWMON_C_LABEL,
+                          HWMON_C_INPUT | HWMON_C_LABEL,
+                          HWMON_C_INPUT | HWMON_C_LABEL,
                           HWMON_C_INPUT | HWMON_C_LABEL,
                           HWMON_C_INPUT | HWMON_C_LABEL),
        NULL
@@ -279,8 +561,7 @@ static const struct hwmon_chip_info aqc_chip_info = {
        .info = aqc_info,
 };
 
-static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data,
-                        int size)
+static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size)
 {
        int i, sensor_value;
        struct aqc_data *priv;
@@ -315,6 +596,17 @@ static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8
                priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT);
                priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT);
                break;
+       case farbwerk:
+               /* Temperature sensor readings */
+               for (i = 0; i < FARBWERK_NUM_SENSORS; i++) {
+                       sensor_value = get_unaligned_be16(data + FARBWERK_SENSOR_START +
+                                                         i * FARBWERK_SENSOR_SIZE);
+                       if (sensor_value == FARBWERK_SENSOR_DISCONNECTED)
+                               priv->temp_input[i] = -ENODATA;
+                       else
+                               priv->temp_input[i] = sensor_value * 10;
+               }
+               break;
        case farbwerk360:
                /* Temperature sensor readings */
                for (i = 0; i < FARBWERK360_NUM_SENSORS; i++) {
@@ -326,6 +618,35 @@ static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8
                                priv->temp_input[i] = sensor_value * 10;
                }
                break;
+       case octo:
+               priv->power_cycles = get_unaligned_be32(data + OCTO_POWER_CYCLES);
+
+               /* Fan speed and related readings */
+               for (i = 0; i < OCTO_NUM_FANS; i++) {
+                       priv->speed_input[i] =
+                           get_unaligned_be16(data + octo_sensor_fan_offsets[i] +
+                                              OCTO_FAN_SPEED_OFFSET);
+                       priv->power_input[i] =
+                           get_unaligned_be16(data + octo_sensor_fan_offsets[i] +
+                                              OCTO_FAN_POWER_OFFSET) * 10000;
+                       priv->voltage_input[i] =
+                           get_unaligned_be16(data + octo_sensor_fan_offsets[i] +
+                                              OCTO_FAN_VOLTAGE_OFFSET) * 10;
+                       priv->current_input[i] =
+                           get_unaligned_be16(data + octo_sensor_fan_offsets[i] +
+                                              OCTO_FAN_CURRENT_OFFSET);
+               }
+
+               /* Temperature sensor readings */
+               for (i = 0; i < OCTO_NUM_SENSORS; i++) {
+                       sensor_value = get_unaligned_be16(data + OCTO_SENSOR_START +
+                                                         i * OCTO_SENSOR_SIZE);
+                       if (sensor_value == OCTO_SENSOR_DISCONNECTED)
+                               priv->temp_input[i] = -ENODATA;
+                       else
+                               priv->temp_input[i] = sensor_value * 10;
+               }
+               break;
        default:
                break;
        }
@@ -378,8 +699,14 @@ static void aqc_debugfs_init(struct aqc_data *priv)
        debugfs_create_file("serial_number", 0444, priv->debugfs, priv, &serial_number_fops);
        debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops);
 
-       if (priv->kind == d5next)
+       switch (priv->kind) {
+       case d5next:
+       case octo:
                debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops);
+               break;
+       default:
+               break;
+       }
 }
 
 #else
@@ -419,9 +746,35 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
        switch (hdev->product) {
        case USB_PRODUCT_ID_D5NEXT:
                priv->kind = d5next;
+
+               priv->temp_label = label_d5next_temp;
+               priv->speed_label = label_d5next_speeds;
+               priv->power_label = label_d5next_power;
+               priv->voltage_label = label_d5next_voltages;
+               priv->current_label = label_d5next_current;
+               break;
+       case USB_PRODUCT_ID_FARBWERK:
+               priv->kind = farbwerk;
+
+               priv->temp_label = label_temp_sensors;
                break;
        case USB_PRODUCT_ID_FARBWERK360:
                priv->kind = farbwerk360;
+
+               priv->temp_label = label_temp_sensors;
+               break;
+       case USB_PRODUCT_ID_OCTO:
+               priv->kind = octo;
+               priv->buffer_size = OCTO_CTRL_REPORT_SIZE;
+               priv->checksum_start = OCTO_CTRL_REPORT_CHECKSUM_START;
+               priv->checksum_length = OCTO_CTRL_REPORT_CHECKSUM_LENGTH;
+               priv->checksum_offset = OCTO_CTRL_REPORT_CHECKSUM_OFFSET;
+
+               priv->temp_label = label_temp_sensors;
+               priv->speed_label = label_fan_speed;
+               priv->power_label = label_fan_power;
+               priv->voltage_label = label_fan_voltage;
+               priv->current_label = label_fan_current;
                break;
        default:
                break;
@@ -429,6 +782,14 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
        priv->name = aqc_device_names[priv->kind];
 
+       priv->buffer = devm_kzalloc(&hdev->dev, priv->buffer_size, GFP_KERNEL);
+       if (!priv->buffer) {
+               ret = -ENOMEM;
+               goto fail_and_close;
+       }
+
+       mutex_init(&priv->mutex);
+
        priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, priv->name, priv,
                                                          &aqc_chip_info, NULL);
 
@@ -461,7 +822,9 @@ static void aqc_remove(struct hid_device *hdev)
 
 static const struct hid_device_id aqc_table[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_D5NEXT) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_FARBWERK) },
        { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_FARBWERK360) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_OCTO) },
        { }
 };
 
@@ -491,4 +854,5 @@ module_exit(aqc_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Aleksa Savic <savicaleksa83@gmail.com>");
+MODULE_AUTHOR("Jack Doan <me@jackdoan.com>");
 MODULE_DESCRIPTION("Hwmon driver for Aquacomputer devices");
index 464244ba8d584a7db2def5afd2b5f05365d80719..63b5b2d6e593072c69ccbe18e537bb69a9e1f5f2 100644 (file)
@@ -76,18 +76,8 @@ as370_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
        }
 }
 
-static const u32 as370_hwmon_temp_config[] = {
-       HWMON_T_INPUT,
-       0
-};
-
-static const struct hwmon_channel_info as370_hwmon_temp = {
-       .type = hwmon_temp,
-       .config = as370_hwmon_temp_config,
-};
-
 static const struct hwmon_channel_info *as370_hwmon_info[] = {
-       &as370_hwmon_temp,
+       HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
        NULL
 };
 
index b5cf0136360cdf0186b1e0b69c65e7caee697d47..57e11b2bab742b05e16edd60dfb46b31e14a16bd 100644 (file)
@@ -54,8 +54,10 @@ static char *mutex_path_override;
 /* ACPI mutex for locking access to the EC for the firmware */
 #define ASUS_HW_ACCESS_MUTEX_ASMX      "\\AMW0.ASMX"
 
-/* There are two variants of the vendor spelling */
-#define VENDOR_ASUS_UPPER_CASE "ASUSTeK COMPUTER INC."
+#define MAX_IDENTICAL_BOARD_VARIATIONS 3
+
+/* Moniker for the ACPI global lock (':' is not allowed in ASL identifiers) */
+#define ACPI_GLOBAL_LOCK_PSEUDO_PATH   ":GLOBAL_LOCK"
 
 typedef union {
        u32 value;
@@ -133,8 +135,44 @@ enum ec_sensors {
 #define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in)
 #define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out)
 
+enum board_family {
+       family_unknown,
+       family_amd_400_series,
+       family_amd_500_series,
+};
+
 /* All the known sensors for ASUS EC controllers */
-static const struct ec_sensor_info known_ec_sensors[] = {
+static const struct ec_sensor_info sensors_family_amd_400[] = {
+       [ec_sensor_temp_chipset] =
+               EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a),
+       [ec_sensor_temp_cpu] =
+               EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b),
+       [ec_sensor_temp_mb] =
+               EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c),
+       [ec_sensor_temp_t_sensor] =
+               EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d),
+       [ec_sensor_temp_vrm] =
+               EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e),
+       [ec_sensor_in_cpu_core] =
+               EC_SENSOR("CPU Core", hwmon_in, 2, 0x00, 0xa2),
+       [ec_sensor_fan_cpu_opt] =
+               EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xbc),
+       [ec_sensor_fan_vrm_hs] =
+               EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2),
+       [ec_sensor_fan_chipset] =
+               /* no chipset fans in this generation */
+               EC_SENSOR("Chipset", hwmon_fan, 0, 0x00, 0x00),
+       [ec_sensor_fan_water_flow] =
+               EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xb4),
+       [ec_sensor_curr_cpu] =
+               EC_SENSOR("CPU", hwmon_curr, 1, 0x00, 0xf4),
+       [ec_sensor_temp_water_in] =
+               EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x0d),
+       [ec_sensor_temp_water_out] =
+               EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x0b),
+};
+
+static const struct ec_sensor_info sensors_family_amd_500[] = {
        [ec_sensor_temp_chipset] =
                EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a),
        [ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b),
@@ -164,68 +202,134 @@ static const struct ec_sensor_info known_ec_sensors[] = {
        (SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB)
 #define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT)
 
-#define DMI_EXACT_MATCH_BOARD(vendor, name, sensors) {                         \
-       .matches = {                                                           \
-               DMI_EXACT_MATCH(DMI_BOARD_VENDOR, vendor),                     \
-               DMI_EXACT_MATCH(DMI_BOARD_NAME, name),                         \
-       },                                                                     \
-       .driver_data = (void *)(sensors), \
-}
+struct ec_board_info {
+       const char *board_names[MAX_IDENTICAL_BOARD_VARIATIONS];
+       unsigned long sensors;
+       /*
+        * Defines which mutex to use for guarding access to the state and the
+        * hardware. Can be either a full path to an AML mutex or the
+        * pseudo-path ACPI_GLOBAL_LOCK_PSEUDO_PATH to use the global ACPI lock,
+        * or left empty to use a regular mutex object, in which case access to
+        * the hardware is not guarded.
+        */
+       const char *mutex_path;
+       enum board_family family;
+};
 
-static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "PRIME X570-PRO",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
-               SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
-               SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
-                             "ROG CROSSHAIR VIII DARK HERO",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
-               SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW |
-               SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
-                             "ROG CROSSHAIR VIII FORMULA",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
-               SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
-               SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
-               SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
-                             "ROG CROSSHAIR VIII HERO (WI-FI)",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
-               SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
-               SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
-                             "ROG CROSSHAIR VIII IMPACT",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET |
-               SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB |
-               SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB |
-               SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS |
-               SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB |
-               SENSOR_TEMP_T_SENSOR |
-               SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET |
-               SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING",
-               SENSOR_SET_TEMP_CHIPSET_CPU_MB |
-               SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
-       DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING",
-               SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS |
-               SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
+static const struct ec_board_info board_info[] = {
+       {
+               .board_names = {"PRIME X470-PRO"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+                       SENSOR_FAN_CPU_OPT |
+                       SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+               .mutex_path = ACPI_GLOBAL_LOCK_PSEUDO_PATH,
+               .family = family_amd_400_series,
+       },
+       {
+               .board_names = {"PRIME X570-PRO"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ProArt X570-CREATOR WIFI"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CPU_OPT |
+                       SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+       },
+       {
+               .board_names = {"Pro WS X570-ACE"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET |
+                       SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG CROSSHAIR VIII DARK HERO"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR |
+                       SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+                       SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW |
+                       SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {
+                       "ROG CROSSHAIR VIII FORMULA"
+                       "ROG CROSSHAIR VIII HERO",
+                       "ROG CROSSHAIR VIII HERO (WI-FI)",
+               },
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR |
+                       SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+                       SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+                       SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU |
+                       SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG CROSSHAIR VIII IMPACT"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+                       SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
+                       SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG STRIX B550-E GAMING"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+                       SENSOR_FAN_CPU_OPT,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG STRIX B550-I GAMING"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+                       SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU |
+                       SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG STRIX X570-E GAMING"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+                       SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
+                       SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG STRIX X570-E GAMING WIFI II"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_CURR_CPU |
+                       SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG STRIX X570-F GAMING"},
+               .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+                       SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
+       {
+               .board_names = {"ROG STRIX X570-I GAMING"},
+               .sensors = SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS |
+                       SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
+                       SENSOR_IN_CPU_CORE,
+               .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+               .family = family_amd_500_series,
+       },
        {}
 };
 
@@ -234,8 +338,49 @@ struct ec_sensor {
        s32 cached_value;
 };
 
+struct lock_data {
+       union {
+               acpi_handle aml;
+               /* global lock handle */
+               u32 glk;
+       } mutex;
+       bool (*lock)(struct lock_data *data);
+       bool (*unlock)(struct lock_data *data);
+};
+
+/*
+ * The next function pairs implement options for locking access to the
+ * state and the EC
+ */
+static bool lock_via_acpi_mutex(struct lock_data *data)
+{
+       /*
+        * ASUS DSDT does not specify that access to the EC has to be guarded,
+        * but firmware does access it via ACPI
+        */
+       return ACPI_SUCCESS(acpi_acquire_mutex(data->mutex.aml,
+                                              NULL, ACPI_LOCK_DELAY_MS));
+}
+
+static bool unlock_acpi_mutex(struct lock_data *data)
+{
+       return ACPI_SUCCESS(acpi_release_mutex(data->mutex.aml, NULL));
+}
+
+static bool lock_via_global_acpi_lock(struct lock_data *data)
+{
+       return ACPI_SUCCESS(acpi_acquire_global_lock(ACPI_LOCK_DELAY_MS,
+                                                    &data->mutex.glk));
+}
+
+static bool unlock_global_acpi_lock(struct lock_data *data)
+{
+       return ACPI_SUCCESS(acpi_release_global_lock(data->mutex.glk));
+}
+
 struct ec_sensors_data {
-       unsigned long board_sensors;
+       const struct ec_board_info *board_info;
+       const struct ec_sensor_info *sensors_info;
        struct ec_sensor *sensors;
        /* EC registers to read from */
        u16 *registers;
@@ -244,7 +389,7 @@ struct ec_sensors_data {
        u8 banks[ASUS_EC_MAX_BANK + 1];
        /* in jiffies */
        unsigned long last_updated;
-       acpi_handle aml_mutex;
+       struct lock_data lock_data;
        /* number of board EC sensors */
        u8 nr_sensors;
        /*
@@ -278,7 +423,7 @@ static bool is_sensor_data_signed(const struct ec_sensor_info *si)
 static const struct ec_sensor_info *
 get_sensor_info(const struct ec_sensors_data *state, int index)
 {
-       return &known_ec_sensors[state->sensors[index].info_index];
+       return state->sensors_info + state->sensors[index].info_index;
 }
 
 static int find_ec_sensor_index(const struct ec_sensors_data *ec,
@@ -301,11 +446,6 @@ static int __init bank_compare(const void *a, const void *b)
        return *((const s8 *)a) - *((const s8 *)b);
 }
 
-static int __init board_sensors_count(unsigned long sensors)
-{
-       return hweight_long(sensors);
-}
-
 static void __init setup_sensor_data(struct ec_sensors_data *ec)
 {
        struct ec_sensor *s = ec->sensors;
@@ -316,14 +456,14 @@ static void __init setup_sensor_data(struct ec_sensors_data *ec)
        ec->nr_banks = 0;
        ec->nr_registers = 0;
 
-       for_each_set_bit(i, &ec->board_sensors,
-                         BITS_PER_TYPE(ec->board_sensors)) {
+       for_each_set_bit(i, &ec->board_info->sensors,
+                        BITS_PER_TYPE(ec->board_info->sensors)) {
                s->info_index = i;
                s->cached_value = 0;
                ec->nr_registers +=
-                       known_ec_sensors[s->info_index].addr.components.size;
+                       ec->sensors_info[s->info_index].addr.components.size;
                bank_found = false;
-               bank = known_ec_sensors[s->info_index].addr.components.bank;
+               bank = ec->sensors_info[s->info_index].addr.components.bank;
                for (j = 0; j < ec->nr_banks; j++) {
                        if (ec->banks[j] == bank) {
                                bank_found = true;
@@ -353,23 +493,36 @@ static void __init fill_ec_registers(struct ec_sensors_data *ec)
        }
 }
 
-static acpi_handle __init asus_hw_access_mutex(struct device *dev)
+static int __init setup_lock_data(struct device *dev)
 {
        const char *mutex_path;
-       acpi_handle res;
        int status;
+       struct ec_sensors_data *state = dev_get_drvdata(dev);
 
        mutex_path = mutex_path_override ?
-               mutex_path_override : ASUS_HW_ACCESS_MUTEX_ASMX;
+               mutex_path_override : state->board_info->mutex_path;
 
-       status = acpi_get_handle(NULL, (acpi_string)mutex_path, &res);
-       if (ACPI_FAILURE(status)) {
-               dev_err(dev,
-                       "Could not get hardware access guard mutex '%s': error %d",
-                       mutex_path, status);
-               return NULL;
+       if (!mutex_path || !strlen(mutex_path)) {
+               dev_err(dev, "Hardware access guard mutex name is empty");
+               return -EINVAL;
        }
-       return res;
+       if (!strcmp(mutex_path, ACPI_GLOBAL_LOCK_PSEUDO_PATH)) {
+               state->lock_data.mutex.glk = 0;
+               state->lock_data.lock = lock_via_global_acpi_lock;
+               state->lock_data.unlock = unlock_global_acpi_lock;
+       } else {
+               status = acpi_get_handle(NULL, (acpi_string)mutex_path,
+                                        &state->lock_data.mutex.aml);
+               if (ACPI_FAILURE(status)) {
+                       dev_err(dev,
+                               "Failed to get hardware access guard AML mutex '%s': error %d",
+                               mutex_path, status);
+                       return -ENOENT;
+               }
+               state->lock_data.lock = lock_via_acpi_mutex;
+               state->lock_data.unlock = unlock_acpi_mutex;
+       }
+       return 0;
 }
 
 static int asus_ec_bank_switch(u8 bank, u8 *old)
@@ -457,10 +610,11 @@ static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data)
 static void update_sensor_values(struct ec_sensors_data *ec, u8 *data)
 {
        const struct ec_sensor_info *si;
-       struct ec_sensor *s;
+       struct ec_sensor *s, *sensor_end;
 
-       for (s = ec->sensors; s != ec->sensors + ec->nr_sensors; s++) {
-               si = &known_ec_sensors[s->info_index];
+       sensor_end = ec->sensors + ec->nr_sensors;
+       for (s = ec->sensors; s != sensor_end; s++) {
+               si = ec->sensors_info + s->info_index;
                s->cached_value = get_sensor_value(si, data);
                data += si->addr.components.size;
        }
@@ -471,15 +625,9 @@ static int update_ec_sensors(const struct device *dev,
 {
        int status;
 
-       /*
-        * ASUS DSDT does not specify that access to the EC has to be guarded,
-        * but firmware does access it via ACPI
-        */
-       if (ACPI_FAILURE(acpi_acquire_mutex(ec->aml_mutex, NULL,
-                                           ACPI_LOCK_DELAY_MS))) {
-               dev_err(dev, "Failed to acquire AML mutex");
-               status = -EBUSY;
-               goto cleanup;
+       if (!ec->lock_data.lock(&ec->lock_data)) {
+               dev_warn(dev, "Failed to acquire mutex");
+               return -EBUSY;
        }
 
        status = asus_ec_block_read(dev, ec);
@@ -487,10 +635,10 @@ static int update_ec_sensors(const struct device *dev,
        if (!status) {
                update_sensor_values(ec, ec->read_buffer);
        }
-       if (ACPI_FAILURE(acpi_release_mutex(ec->aml_mutex, NULL))) {
-               dev_err(dev, "Failed to release AML mutex");
-       }
-cleanup:
+
+       if (!ec->lock_data.unlock(&ec->lock_data))
+               dev_err(dev, "Failed to release mutex");
+
        return status;
 }
 
@@ -597,12 +745,24 @@ static struct hwmon_chip_info asus_ec_chip_info = {
        .ops = &asus_ec_hwmon_ops,
 };
 
-static unsigned long __init get_board_sensors(void)
+static const struct ec_board_info * __init get_board_info(void)
 {
-       const struct dmi_system_id *dmi_entry =
-               dmi_first_match(asus_ec_dmi_table);
+       const char *dmi_board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+       const char *dmi_board_name = dmi_get_system_info(DMI_BOARD_NAME);
+       const struct ec_board_info *board;
 
-       return dmi_entry ? (unsigned long)dmi_entry->driver_data : 0;
+       if (!dmi_board_vendor || !dmi_board_name ||
+           strcasecmp(dmi_board_vendor, "ASUSTeK COMPUTER INC."))
+               return NULL;
+
+       for (board = board_info; board->sensors; board++) {
+               if (match_string(board->board_names,
+                                MAX_IDENTICAL_BOARD_VARIATIONS,
+                                dmi_board_name) >= 0)
+                       return board;
+       }
+
+       return NULL;
 }
 
 static int __init asus_ec_probe(struct platform_device *pdev)
@@ -610,17 +770,18 @@ static int __init asus_ec_probe(struct platform_device *pdev)
        const struct hwmon_channel_info **ptr_asus_ec_ci;
        int nr_count[hwmon_max] = { 0 }, nr_types = 0;
        struct hwmon_channel_info *asus_ec_hwmon_chan;
+       const struct ec_board_info *pboard_info;
        const struct hwmon_chip_info *chip_info;
        struct device *dev = &pdev->dev;
        struct ec_sensors_data *ec_data;
        const struct ec_sensor_info *si;
        enum hwmon_sensor_types type;
-       unsigned long board_sensors;
        struct device *hwdev;
        unsigned int i;
+       int status;
 
-       board_sensors = get_board_sensors();
-       if (!board_sensors)
+       pboard_info = get_board_info();
+       if (!pboard_info)
                return -ENODEV;
 
        ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data),
@@ -629,11 +790,31 @@ static int __init asus_ec_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        dev_set_drvdata(dev, ec_data);
-       ec_data->board_sensors = board_sensors;
-       ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors);
+       ec_data->board_info = pboard_info;
+
+       switch (ec_data->board_info->family) {
+       case family_amd_400_series:
+               ec_data->sensors_info = sensors_family_amd_400;
+               break;
+       case family_amd_500_series:
+               ec_data->sensors_info = sensors_family_amd_500;
+               break;
+       default:
+               dev_err(dev, "Unknown board family: %d",
+                       ec_data->board_info->family);
+               return -EINVAL;
+       }
+
+       ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors);
        ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors,
                                        sizeof(struct ec_sensor), GFP_KERNEL);
 
+       status = setup_lock_data(dev);
+       if (status) {
+               dev_err(dev, "Failed to setup state/EC locking: %d", status);
+               return status;
+       }
+
        setup_sensor_data(ec_data);
        ec_data->registers = devm_kcalloc(dev, ec_data->nr_registers,
                                          sizeof(u16), GFP_KERNEL);
@@ -645,8 +826,6 @@ static int __init asus_ec_probe(struct platform_device *pdev)
 
        fill_ec_registers(ec_data);
 
-       ec_data->aml_mutex = asus_hw_access_mutex(dev);
-
        for (i = 0; i < ec_data->nr_sensors; ++i) {
                si = get_sensor_info(ec_data, i);
                if (!nr_count[si->type])
@@ -703,7 +882,14 @@ static struct platform_driver asus_ec_sensors_platform_driver = {
        },
 };
 
-MODULE_DEVICE_TABLE(dmi, asus_ec_dmi_table);
+MODULE_DEVICE_TABLE(acpi, acpi_ec_ids);
+/*
+ * we use module_platform_driver_probe() rather than module_platform_driver()
+ * because the probe function (and its dependants) are marked with __init, which
+ * means we can't put it into the .probe member of the platform_driver struct
+ * above, and we can't mark the asus_ec_sensors_platform_driver object as __init
+ * because the object is referenced from the module exit code.
+ */
 module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe);
 
 module_param_named(mutex_path, mutex_path_override, charp, 0);
index 74ce5211eb752c856ee38fd6bedeb900fc193717..21ab172774ec58b27bc1420c7b91529fada02f93 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/polynomial.h>
 #include <linux/seqlock.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
@@ -65,7 +66,7 @@ static const struct pvt_sensor_info pvt_info[] = {
  *     48380,
  * where T = [-48380, 147438] mC and N = [0, 1023].
  */
-static const struct pvt_poly __maybe_unused poly_temp_to_N = {
+static const struct polynomial __maybe_unused poly_temp_to_N = {
        .total_divider = 10000,
        .terms = {
                {4, 18322, 10000, 10000},
@@ -76,7 +77,7 @@ static const struct pvt_poly __maybe_unused poly_temp_to_N = {
        }
 };
 
-static const struct pvt_poly poly_N_to_temp = {
+static const struct polynomial poly_N_to_temp = {
        .total_divider = 1,
        .terms = {
                {4, -16743, 1000, 1},
@@ -97,7 +98,7 @@ static const struct pvt_poly poly_N_to_temp = {
  * N = (18658e-3*V - 11572) / 10,
  * V = N * 10^5 / 18658 + 11572 * 10^4 / 18658.
  */
-static const struct pvt_poly __maybe_unused poly_volt_to_N = {
+static const struct polynomial __maybe_unused poly_volt_to_N = {
        .total_divider = 10,
        .terms = {
                {1, 18658, 1000, 1},
@@ -105,7 +106,7 @@ static const struct pvt_poly __maybe_unused poly_volt_to_N = {
        }
 };
 
-static const struct pvt_poly poly_N_to_volt = {
+static const struct polynomial poly_N_to_volt = {
        .total_divider = 10,
        .terms = {
                {1, 100000, 18658, 1},
@@ -113,31 +114,6 @@ static const struct pvt_poly poly_N_to_volt = {
        }
 };
 
-/*
- * Here is the polynomial calculation function, which performs the
- * redistributed terms calculations. It's pretty straightforward. We walk
- * over each degree term up to the free one, and perform the redistributed
- * multiplication of the term coefficient, its divider (as for the rationale
- * fraction representation), data power and the rational fraction divider
- * leftover. Then all of this is collected in a total sum variable, which
- * value is normalized by the total divider before being returned.
- */
-static long pvt_calc_poly(const struct pvt_poly *poly, long data)
-{
-       const struct pvt_poly_term *term = poly->terms;
-       long tmp, ret = 0;
-       int deg;
-
-       do {
-               tmp = term->coef;
-               for (deg = 0; deg < term->deg; ++deg)
-                       tmp = mult_frac(tmp, data, term->divider);
-               ret += tmp / term->divider_leftover;
-       } while ((term++)->deg);
-
-       return ret / poly->total_divider;
-}
-
 static inline u32 pvt_update(void __iomem *reg, u32 mask, u32 data)
 {
        u32 old;
@@ -324,9 +300,9 @@ static int pvt_read_data(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
        } while (read_seqretry(&cache->data_seqlock, seq));
 
        if (type == PVT_TEMP)
-               *val = pvt_calc_poly(&poly_N_to_temp, data);
+               *val = polynomial_calc(&poly_N_to_temp, data);
        else
-               *val = pvt_calc_poly(&poly_N_to_volt, data);
+               *val = polynomial_calc(&poly_N_to_volt, data);
 
        return 0;
 }
@@ -345,9 +321,9 @@ static int pvt_read_limit(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
                data = FIELD_GET(PVT_THRES_HI_MASK, data);
 
        if (type == PVT_TEMP)
-               *val = pvt_calc_poly(&poly_N_to_temp, data);
+               *val = polynomial_calc(&poly_N_to_temp, data);
        else
-               *val = pvt_calc_poly(&poly_N_to_volt, data);
+               *val = polynomial_calc(&poly_N_to_volt, data);
 
        return 0;
 }
@@ -360,10 +336,10 @@ static int pvt_write_limit(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
 
        if (type == PVT_TEMP) {
                val = clamp(val, PVT_TEMP_MIN, PVT_TEMP_MAX);
-               data = pvt_calc_poly(&poly_temp_to_N, val);
+               data = polynomial_calc(&poly_temp_to_N, val);
        } else {
                val = clamp(val, PVT_VOLT_MIN, PVT_VOLT_MAX);
-               data = pvt_calc_poly(&poly_volt_to_N, val);
+               data = polynomial_calc(&poly_volt_to_N, val);
        }
 
        /* Serialize limit update, since a part of the register is changed. */
@@ -522,9 +498,9 @@ static int pvt_read_data(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
                return -ETIMEDOUT;
 
        if (type == PVT_TEMP)
-               *val = pvt_calc_poly(&poly_N_to_temp, data);
+               *val = polynomial_calc(&poly_N_to_temp, data);
        else
-               *val = pvt_calc_poly(&poly_N_to_volt, data);
+               *val = polynomial_calc(&poly_N_to_volt, data);
 
        return 0;
 }
index 84cb1ede7bc0b016de75641a70f3657b2ac4ca08..071aa6f4e109b76567f21b9d02a2c6c934d56c9e 100644 (file)
 #include <linux/errno.h>
 #include <linux/hwmon.h>
 #include <linux/init.h>
+#include <linux/kconfig.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/thermal.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
 #define I8K_SMM_GET_DELL_SIG1  0xfea3
 #define I8K_SMM_GET_DELL_SIG2  0xffa3
 
+/* in usecs */
+#define DELL_SMM_MAX_DURATION  250000
+
 #define I8K_FAN_MULT           30
-#define I8K_FAN_MAX_RPM                30000
+#define I8K_FAN_RPM_THRESHOLD  1000
 #define I8K_MAX_TEMP           127
 
 #define I8K_FN_NONE            0x00
@@ -80,6 +86,11 @@ struct dell_smm_data {
        int *fan_nominal_speed[DELL_SMM_NO_FANS];
 };
 
+struct dell_smm_cooling_data {
+       u8 fan_num;
+       struct dell_smm_data *data;
+};
+
 MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
 MODULE_AUTHOR("Pali Rohár <pali@kernel.org>");
 MODULE_DESCRIPTION("Dell laptop SMM BIOS hwmon driver");
@@ -231,6 +242,9 @@ static int i8k_smm_func(void *par)
        pr_debug("smm(0x%.4x 0x%.4x) = 0x%.4x  (took %7lld usecs)\n", eax, ebx,
                 (rc ? 0xffff : regs->eax & 0xffff), duration);
 
+       if (duration > DELL_SMM_MAX_DURATION)
+               pr_warn_once("SMM call took %lld usecs!\n", duration);
+
        return rc;
 }
 
@@ -318,7 +332,7 @@ static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, u8
        if (data->disallow_fan_support)
                return -EINVAL;
 
-       return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
+       return i8k_smm(&regs) ? : (regs.eax & 0xffff);
 }
 
 /*
@@ -638,9 +652,50 @@ static void __init i8k_init_procfs(struct device *dev)
 
 #endif
 
-/*
- * Hwmon interface
- */
+static int dell_smm_get_max_state(struct thermal_cooling_device *dev, unsigned long *state)
+{
+       struct dell_smm_cooling_data *cdata = dev->devdata;
+
+       *state = cdata->data->i8k_fan_max;
+
+       return 0;
+}
+
+static int dell_smm_get_cur_state(struct thermal_cooling_device *dev, unsigned long *state)
+{
+       struct dell_smm_cooling_data *cdata = dev->devdata;
+       int ret;
+
+       ret = i8k_get_fan_status(cdata->data, cdata->fan_num);
+       if (ret < 0)
+               return ret;
+
+       *state = ret;
+
+       return 0;
+}
+
+static int dell_smm_set_cur_state(struct thermal_cooling_device *dev, unsigned long state)
+{
+       struct dell_smm_cooling_data *cdata = dev->devdata;
+       struct dell_smm_data *data = cdata->data;
+       int ret;
+
+       if (state > data->i8k_fan_max)
+               return -EINVAL;
+
+       mutex_lock(&data->i8k_mutex);
+       ret = i8k_set_fan(data, cdata->fan_num, (int)state);
+       mutex_unlock(&data->i8k_mutex);
+
+       return ret;
+}
+
+static const struct thermal_cooling_device_ops dell_smm_cooling_ops = {
+       .get_max_state = dell_smm_get_max_state,
+       .get_cur_state = dell_smm_get_cur_state,
+       .set_cur_state = dell_smm_set_cur_state,
+};
 
 static umode_t dell_smm_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr,
                                   int channel)
@@ -727,6 +782,7 @@ static int dell_smm_read(struct device *dev, enum hwmon_sensor_types type, u32 a
                         long *val)
 {
        struct dell_smm_data *data = dev_get_drvdata(dev);
+       int mult = data->i8k_fan_mult;
        int ret;
 
        switch (type) {
@@ -755,11 +811,11 @@ static int dell_smm_read(struct device *dev, enum hwmon_sensor_types type, u32 a
 
                        return 0;
                case hwmon_fan_min:
-                       *val = data->fan_nominal_speed[channel][0];
+                       *val = data->fan_nominal_speed[channel][0] * mult;
 
                        return 0;
                case hwmon_fan_max:
-                       *val = data->fan_nominal_speed[channel][data->i8k_fan_max];
+                       *val = data->fan_nominal_speed[channel][data->i8k_fan_max] * mult;
 
                        return 0;
                case hwmon_fan_target:
@@ -770,7 +826,7 @@ static int dell_smm_read(struct device *dev, enum hwmon_sensor_types type, u32 a
                        if (ret > data->i8k_fan_max)
                                ret = data->i8k_fan_max;
 
-                       *val = data->fan_nominal_speed[channel][ret];
+                       *val = data->fan_nominal_speed[channel][ret] * mult;
 
                        return 0;
                default:
@@ -941,6 +997,37 @@ static const struct hwmon_chip_info dell_smm_chip_info = {
        .info = dell_smm_info,
 };
 
+static int __init dell_smm_init_cdev(struct device *dev, u8 fan_num)
+{
+       struct dell_smm_data *data = dev_get_drvdata(dev);
+       struct thermal_cooling_device *cdev;
+       struct dell_smm_cooling_data *cdata;
+       int ret = 0;
+       char *name;
+
+       name = kasprintf(GFP_KERNEL, "dell-smm-fan%u", fan_num + 1);
+       if (!name)
+               return -ENOMEM;
+
+       cdata = devm_kmalloc(dev, sizeof(*cdata), GFP_KERNEL);
+       if (cdata) {
+               cdata->fan_num = fan_num;
+               cdata->data = data;
+               cdev = devm_thermal_of_cooling_device_register(dev, NULL, name, cdata,
+                                                              &dell_smm_cooling_ops);
+               if (IS_ERR(cdev)) {
+                       devm_kfree(dev, cdata);
+                       ret = PTR_ERR(cdev);
+               }
+       } else {
+               ret = -ENOMEM;
+       }
+
+       kfree(name);
+
+       return ret;
+}
+
 static int __init dell_smm_init_hwmon(struct device *dev)
 {
        struct dell_smm_data *data = dev_get_drvdata(dev);
@@ -967,6 +1054,15 @@ static int __init dell_smm_init_hwmon(struct device *dev)
                        continue;
 
                data->fan[i] = true;
+
+               /* the cooling device is not critical, ignore failures */
+               if (IS_REACHABLE(CONFIG_THERMAL)) {
+                       err = dell_smm_init_cdev(dev, i);
+                       if (err < 0)
+                               dev_warn(dev, "Failed to register cooling device for fan %u\n",
+                                        i + 1);
+               }
+
                data->fan_nominal_speed[i] = devm_kmalloc_array(dev, data->i8k_fan_max + 1,
                                                                sizeof(*data->fan_nominal_speed[i]),
                                                                GFP_KERNEL);
@@ -982,6 +1078,13 @@ static int __init dell_smm_init_hwmon(struct device *dev)
                                break;
                        }
                        data->fan_nominal_speed[i][state] = err;
+                       /*
+                        * Autodetect fan multiplier based on nominal rpm if multiplier
+                        * was not specified as module param or in DMI. If fan reports
+                        * rpm value too high then set multiplier to 1.
+                        */
+                       if (!fan_mult && err > I8K_FAN_RPM_THRESHOLD)
+                               data->i8k_fan_mult = 1;
                }
        }
 
@@ -1270,15 +1373,12 @@ static int __init dell_smm_probe(struct platform_device *pdev)
        struct dell_smm_data *data;
        const struct dmi_system_id *id, *fan_control;
        int ret;
-       u8 fan;
 
        data = devm_kzalloc(&pdev->dev, sizeof(struct dell_smm_data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
        mutex_init(&data->i8k_mutex);
-       data->i8k_fan_mult = I8K_FAN_MULT;
-       data->i8k_fan_max = I8K_FAN_HIGH;
        platform_set_drvdata(pdev, data);
 
        if (dmi_check_system(i8k_blacklist_fan_support_dmi_table)) {
@@ -1313,7 +1413,9 @@ static int __init dell_smm_probe(struct platform_device *pdev)
                        fan_max = conf->fan_max;
        }
 
-       data->i8k_fan_max = fan_max ? : I8K_FAN_HIGH;   /* Must not be 0 */
+       /* All options must not be 0 */
+       data->i8k_fan_mult = fan_mult ? : I8K_FAN_MULT;
+       data->i8k_fan_max = fan_max ? : I8K_FAN_HIGH;
        data->i8k_pwm_mult = DIV_ROUND_UP(255, data->i8k_fan_max);
 
        fan_control = dmi_first_match(i8k_whitelist_fan_control);
@@ -1325,25 +1427,6 @@ static int __init dell_smm_probe(struct platform_device *pdev)
                dev_info(&pdev->dev, "enabling support for setting automatic/manual fan control\n");
        }
 
-       if (!fan_mult) {
-               /*
-                * Autodetect fan multiplier based on nominal rpm
-                * If fan reports rpm value too high then set multiplier to 1
-                */
-               for (fan = 0; fan < DELL_SMM_NO_FANS; ++fan) {
-                       ret = i8k_get_fan_nominal_speed(data, fan, data->i8k_fan_max);
-                       if (ret < 0)
-                               continue;
-
-                       if (ret > I8K_FAN_MAX_RPM)
-                               data->i8k_fan_mult = 1;
-                       break;
-               }
-       } else {
-               /* Fan multiplier was specified in module param or in dmi */
-               data->i8k_fan_mult = fan_mult;
-       }
-
        ret = dell_smm_init_hwmon(&pdev->dev);
        if (ret)
                return ret;
index 989e2c8496dd2d8693bbf9d3c06d319975bcf158..2e2cd79d89ebc93607b9bdb7186a8799d3b63493 100644 (file)
@@ -764,7 +764,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
                         "hwmon: '%s' is not a valid name attribute, please fix\n",
                         name);
 
-       id = ida_simple_get(&hwmon_ida, 0, 0, GFP_KERNEL);
+       id = ida_alloc(&hwmon_ida, GFP_KERNEL);
        if (id < 0)
                return ERR_PTR(id);
 
@@ -856,7 +856,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 free_hwmon:
        hwmon_dev_release(hdev);
 ida_remove:
-       ida_simple_remove(&hwmon_ida, id);
+       ida_free(&hwmon_ida, id);
        return ERR_PTR(err);
 }
 
@@ -886,11 +886,12 @@ EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups);
 
 /**
  * hwmon_device_register_with_info - register w/ hwmon
- * @dev: the parent device
- * @name: hwmon name attribute
- * @drvdata: driver data to attach to created device
- * @chip: pointer to hwmon chip information
+ * @dev: the parent device (mandatory)
+ * @name: hwmon name attribute (mandatory)
+ * @drvdata: driver data to attach to created device (optional)
+ * @chip: pointer to hwmon chip information (mandatory)
  * @extra_groups: pointer to list of additional non-standard attribute groups
+ *     (optional)
  *
  * hwmon_device_unregister() must be called when the device is no
  * longer needed.
@@ -903,19 +904,41 @@ hwmon_device_register_with_info(struct device *dev, const char *name,
                                const struct hwmon_chip_info *chip,
                                const struct attribute_group **extra_groups)
 {
-       if (!name)
-               return ERR_PTR(-EINVAL);
-
-       if (chip && (!chip->ops || !chip->ops->is_visible || !chip->info))
+       if (!dev || !name || !chip)
                return ERR_PTR(-EINVAL);
 
-       if (chip && !dev)
+       if (!chip->ops || !chip->ops->is_visible || !chip->info)
                return ERR_PTR(-EINVAL);
 
        return __hwmon_device_register(dev, name, drvdata, chip, extra_groups);
 }
 EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);
 
+/**
+ * hwmon_device_register_for_thermal - register hwmon device for thermal subsystem
+ * @dev: the parent device
+ * @name: hwmon name attribute
+ * @drvdata: driver data to attach to created device
+ *
+ * The use of this function is restricted. It is provided for legacy reasons
+ * and must only be called from the thermal subsystem.
+ *
+ * hwmon_device_unregister() must be called when the device is no
+ * longer needed.
+ *
+ * Returns the pointer to the new device.
+ */
+struct device *
+hwmon_device_register_for_thermal(struct device *dev, const char *name,
+                                 void *drvdata)
+{
+       if (!name || !dev)
+               return ERR_PTR(-EINVAL);
+
+       return __hwmon_device_register(dev, name, drvdata, NULL, NULL);
+}
+EXPORT_SYMBOL_NS_GPL(hwmon_device_register_for_thermal, HWMON_THERMAL);
+
 /**
  * hwmon_device_register - register w/ hwmon
  * @dev: the device to register
@@ -945,7 +968,7 @@ void hwmon_device_unregister(struct device *dev)
 
        if (likely(sscanf(dev_name(dev), HWMON_ID_FORMAT, &id) == 1)) {
                device_unregister(dev);
-               ida_simple_remove(&hwmon_ida, id);
+               ida_free(&hwmon_ida, id);
        } else
                dev_dbg(dev->parent,
                        "hwmon_device_unregister() failed: bad class ID!\n");
@@ -1057,6 +1080,59 @@ void devm_hwmon_device_unregister(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(devm_hwmon_device_unregister);
 
+static char *__hwmon_sanitize_name(struct device *dev, const char *old_name)
+{
+       char *name, *p;
+
+       if (dev)
+               name = devm_kstrdup(dev, old_name, GFP_KERNEL);
+       else
+               name = kstrdup(old_name, GFP_KERNEL);
+       if (!name)
+               return ERR_PTR(-ENOMEM);
+
+       for (p = name; *p; p++)
+               if (hwmon_is_bad_char(*p))
+                       *p = '_';
+
+       return name;
+}
+
+/**
+ * hwmon_sanitize_name - Replaces invalid characters in a hwmon name
+ * @name: NUL-terminated name
+ *
+ * Allocates a new string where any invalid characters will be replaced
+ * by an underscore. It is the responsibility of the caller to release
+ * the memory.
+ *
+ * Returns newly allocated name, or ERR_PTR on error.
+ */
+char *hwmon_sanitize_name(const char *name)
+{
+       return __hwmon_sanitize_name(NULL, name);
+}
+EXPORT_SYMBOL_GPL(hwmon_sanitize_name);
+
+/**
+ * devm_hwmon_sanitize_name - resource managed hwmon_sanitize_name()
+ * @dev: device to allocate memory for
+ * @name: NUL-terminated name
+ *
+ * Allocates a new string where any invalid characters will be replaced
+ * by an underscore.
+ *
+ * Returns newly allocated name, or ERR_PTR on error.
+ */
+char *devm_hwmon_sanitize_name(struct device *dev, const char *name)
+{
+       if (!dev)
+               return ERR_PTR(-EINVAL);
+
+       return __hwmon_sanitize_name(dev, name);
+}
+EXPORT_SYMBOL_GPL(devm_hwmon_sanitize_name);
+
 static void __init hwmon_pci_quirks(void)
 {
 #if defined CONFIG_X86 && defined CONFIG_PCI
index de6baf6ca3d1e007debad7768cc1df82885b5f45..5c4cf742f5ae7bf6c190b279d31f2001a91cb5d6 100644 (file)
@@ -482,7 +482,7 @@ static void aem_delete(struct aem_data *data)
        ipmi_destroy_user(data->ipmi.user);
        platform_set_drvdata(data->pdev, NULL);
        platform_device_unregister(data->pdev);
-       ida_simple_remove(&aem_ida, data->id);
+       ida_free(&aem_ida, data->id);
        kfree(data);
 }
 
@@ -539,7 +539,7 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle)
                data->power_period[i] = AEM_DEFAULT_POWER_INTERVAL;
 
        /* Create sub-device for this fw instance */
-       data->id = ida_simple_get(&aem_ida, 0, 0, GFP_KERNEL);
+       data->id = ida_alloc(&aem_ida, GFP_KERNEL);
        if (data->id < 0)
                goto id_err;
 
@@ -600,7 +600,7 @@ ipmi_err:
        platform_set_drvdata(data->pdev, NULL);
        platform_device_unregister(data->pdev);
 dev_err:
-       ida_simple_remove(&aem_ida, data->id);
+       ida_free(&aem_ida, data->id);
 id_err:
        kfree(data);
 
@@ -679,7 +679,7 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe,
                data->power_period[i] = AEM_DEFAULT_POWER_INTERVAL;
 
        /* Create sub-device for this fw instance */
-       data->id = ida_simple_get(&aem_ida, 0, 0, GFP_KERNEL);
+       data->id = ida_alloc(&aem_ida, GFP_KERNEL);
        if (data->id < 0)
                goto id_err;
 
@@ -740,7 +740,7 @@ ipmi_err:
        platform_set_drvdata(data->pdev, NULL);
        platform_device_unregister(data->pdev);
 dev_err:
-       ida_simple_remove(&aem_ida, data->id);
+       ida_free(&aem_ida, data->id);
 id_err:
        kfree(data);
 
index 7a08e4c44a4b4b580b1a6345f989530d1336a504..6e82f7200d1cc54be282a90248f1ef10706c2d74 100644 (file)
@@ -515,7 +515,6 @@ static int m10bmc_hwmon_probe(struct platform_device *pdev)
        struct intel_m10bmc *m10bmc = dev_get_drvdata(pdev->dev.parent);
        struct device *hwmon_dev, *dev = &pdev->dev;
        struct m10bmc_hwmon *hw;
-       int i;
 
        hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
        if (!hw)
@@ -528,13 +527,9 @@ static int m10bmc_hwmon_probe(struct platform_device *pdev)
        hw->chip.info = hw->bdata->hinfo;
        hw->chip.ops = &m10bmc_hwmon_ops;
 
-       hw->hw_name = devm_kstrdup(dev, id->name, GFP_KERNEL);
-       if (!hw->hw_name)
-               return -ENOMEM;
-
-       for (i = 0; hw->hw_name[i]; i++)
-               if (hwmon_is_bad_char(hw->hw_name[i]))
-                       hw->hw_name[i] = '_';
+       hw->hw_name = devm_hwmon_sanitize_name(dev, id->name);
+       if (IS_ERR(hw->hw_name))
+               return PTR_ERR(hw->hw_name);
 
        hwmon_dev = devm_hwmon_device_register_with_info(dev, hw->hw_name,
                                                         hw, &hw->chip, NULL);
index cb347a6bd8d93c7312e9b30b3e9d9fc7c4ed6b53..07f7f8b5b73d74ddf6d45d60d8ffbd24dea62bf0 100644 (file)
@@ -63,6 +63,7 @@ static const unsigned short normal_i2c[] = {
 #define STM_MANID              0x104a  /* ST Microelectronics */
 #define GT_MANID               0x1c68  /* Giantec */
 #define GT_MANID2              0x132d  /* Giantec, 2nd mfg ID */
+#define SI_MANID               0x1c85  /* Seiko Instruments */
 
 /* SMBUS register */
 #define SMBUS_STMOUT           BIT(7)  /* SMBus time-out, active low */
@@ -156,6 +157,10 @@ static const unsigned short normal_i2c[] = {
 #define STTS3000_DEVID         0x0200
 #define STTS3000_DEVID_MASK    0xffff
 
+/* Seiko Instruments */
+#define S34TS04A_DEVID         0x2221
+#define S34TS04A_DEVID_MASK    0xffff
+
 static u16 jc42_hysteresis[] = { 0, 1500, 3000, 6000 };
 
 struct jc42_chips {
@@ -186,6 +191,7 @@ static struct jc42_chips jc42_chips[] = {
        { ONS_MANID, CAT34TS04_DEVID, CAT34TS04_DEVID_MASK },
        { ONS_MANID, N34TS04_DEVID, N34TS04_DEVID_MASK },
        { NXP_MANID, SE98_DEVID, SE98_DEVID_MASK },
+       { SI_MANID,  S34TS04A_DEVID, S34TS04A_DEVID_MASK },
        { STM_MANID, STTS424_DEVID, STTS424_DEVID_MASK },
        { STM_MANID, STTS424E_DEVID, STTS424E_DEVID_MASK },
        { STM_MANID, STTS2002_DEVID, STTS2002_DEVID_MASK },
@@ -443,6 +449,8 @@ static int jc42_detect(struct i2c_client *client, struct i2c_board_info *info)
 }
 
 static const struct hwmon_channel_info *jc42_info[] = {
+       HWMON_CHANNEL_INFO(chip,
+                          HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL),
        HWMON_CHANNEL_INFO(temp,
                           HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
                           HWMON_T_CRIT | HWMON_T_MAX_HYST |
diff --git a/drivers/hwmon/lan966x-hwmon.c b/drivers/hwmon/lan966x-hwmon.c
new file mode 100644 (file)
index 0000000..f41df05
--- /dev/null
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/hwmon.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/polynomial.h>
+#include <linux/regmap.h>
+
+/*
+ * The original translation formulae of the temperature (in degrees of Celsius)
+ * are as follows:
+ *
+ *   T = -3.4627e-11*(N^4) + 1.1023e-7*(N^3) + -1.9165e-4*(N^2) +
+ *       3.0604e-1*(N^1) + -5.6197e1
+ *
+ * where [-56.197, 136.402]C and N = [0, 1023].
+ *
+ * They must be accordingly altered to be suitable for the integer arithmetics.
+ * The technique is called 'factor redistribution', which just makes sure the
+ * multiplications and divisions are made so to have a result of the operations
+ * within the integer numbers limit. In addition we need to translate the
+ * formulae to accept millidegrees of Celsius. Here what it looks like after
+ * the alterations:
+ *
+ *   T = -34627e-12*(N^4) + 110230e-9*(N^3) + -191650e-6*(N^2) +
+ *       306040e-3*(N^1) + -56197
+ *
+ * where T = [-56197, 136402]mC and N = [0, 1023].
+ */
+
+static const struct polynomial poly_N_to_temp = {
+       .terms = {
+               {4,  -34627, 1000, 1},
+               {3,  110230, 1000, 1},
+               {2, -191650, 1000, 1},
+               {1,  306040, 1000, 1},
+               {0,  -56197,    1, 1}
+       }
+};
+
+#define PVT_SENSOR_CTRL                0x0 /* unused */
+#define PVT_SENSOR_CFG         0x4
+#define   SENSOR_CFG_CLK_CFG           GENMASK(27, 20)
+#define   SENSOR_CFG_TRIM_VAL          GENMASK(13, 9)
+#define   SENSOR_CFG_SAMPLE_ENA                BIT(8)
+#define   SENSOR_CFG_START_CAPTURE     BIT(7)
+#define   SENSOR_CFG_CONTINIOUS_MODE   BIT(6)
+#define   SENSOR_CFG_PSAMPLE_ENA       GENMASK(1, 0)
+#define PVT_SENSOR_STAT                0x8
+#define   SENSOR_STAT_DATA_VALID       BIT(10)
+#define   SENSOR_STAT_DATA             GENMASK(9, 0)
+
+#define FAN_CFG                        0x0
+#define   FAN_CFG_DUTY_CYCLE           GENMASK(23, 16)
+#define   INV_POL                      BIT(3)
+#define   GATE_ENA                     BIT(2)
+#define   PWM_OPEN_COL_ENA             BIT(1)
+#define   FAN_STAT_CFG                 BIT(0)
+#define FAN_PWM_FREQ           0x4
+#define   FAN_PWM_CYC_10US             GENMASK(25, 15)
+#define   FAN_PWM_FREQ_FREQ            GENMASK(14, 0)
+#define FAN_CNT                        0xc
+#define   FAN_CNT_DATA                 GENMASK(15, 0)
+
+#define LAN966X_PVT_CLK                1200000 /* 1.2 MHz */
+
+struct lan966x_hwmon {
+       struct regmap *regmap_pvt;
+       struct regmap *regmap_fan;
+       struct clk *clk;
+       unsigned long clk_rate;
+};
+
+static int lan966x_hwmon_read_temp(struct device *dev, long *val)
+{
+       struct lan966x_hwmon *hwmon = dev_get_drvdata(dev);
+       unsigned int data;
+       int ret;
+
+       ret = regmap_read(hwmon->regmap_pvt, PVT_SENSOR_STAT, &data);
+       if (ret < 0)
+               return ret;
+
+       if (!(data & SENSOR_STAT_DATA_VALID))
+               return -ENODATA;
+
+       *val = polynomial_calc(&poly_N_to_temp,
+                              FIELD_GET(SENSOR_STAT_DATA, data));
+
+       return 0;
+}
+
+static int lan966x_hwmon_read_fan(struct device *dev, long *val)
+{
+       struct lan966x_hwmon *hwmon = dev_get_drvdata(dev);
+       unsigned int data;
+       int ret;
+
+       ret = regmap_read(hwmon->regmap_fan, FAN_CNT, &data);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Data is given in pulses per second. Assume two pulses
+        * per revolution.
+        */
+       *val = FIELD_GET(FAN_CNT_DATA, data) * 60 / 2;
+
+       return 0;
+}
+
+static int lan966x_hwmon_read_pwm(struct device *dev, long *val)
+{
+       struct lan966x_hwmon *hwmon = dev_get_drvdata(dev);
+       unsigned int data;
+       int ret;
+
+       ret = regmap_read(hwmon->regmap_fan, FAN_CFG, &data);
+       if (ret < 0)
+               return ret;
+
+       *val = FIELD_GET(FAN_CFG_DUTY_CYCLE, data);
+
+       return 0;
+}
+
+static int lan966x_hwmon_read_pwm_freq(struct device *dev, long *val)
+{
+       struct lan966x_hwmon *hwmon = dev_get_drvdata(dev);
+       unsigned long tmp;
+       unsigned int data;
+       int ret;
+
+       ret = regmap_read(hwmon->regmap_fan, FAN_PWM_FREQ, &data);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Datasheet says it is sys_clk / 256 / pwm_freq. But in reality
+        * it is sys_clk / 256 / (pwm_freq + 1).
+        */
+       data = FIELD_GET(FAN_PWM_FREQ_FREQ, data) + 1;
+       tmp = DIV_ROUND_CLOSEST(hwmon->clk_rate, 256);
+       *val = DIV_ROUND_CLOSEST(tmp, data);
+
+       return 0;
+}
+
+static int lan966x_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+                             u32 attr, int channel, long *val)
+{
+       switch (type) {
+       case hwmon_temp:
+               return lan966x_hwmon_read_temp(dev, val);
+       case hwmon_fan:
+               return lan966x_hwmon_read_fan(dev, val);
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+                       return lan966x_hwmon_read_pwm(dev, val);
+               case hwmon_pwm_freq:
+                       return lan966x_hwmon_read_pwm_freq(dev, val);
+               default:
+                       return -EOPNOTSUPP;
+               }
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int lan966x_hwmon_write_pwm(struct device *dev, long val)
+{
+       struct lan966x_hwmon *hwmon = dev_get_drvdata(dev);
+
+       if (val < 0 || val > 255)
+               return -EINVAL;
+
+       return regmap_update_bits(hwmon->regmap_fan, FAN_CFG,
+                                 FAN_CFG_DUTY_CYCLE,
+                                 FIELD_PREP(FAN_CFG_DUTY_CYCLE, val));
+}
+
+static int lan966x_hwmon_write_pwm_freq(struct device *dev, long val)
+{
+       struct lan966x_hwmon *hwmon = dev_get_drvdata(dev);
+
+       if (val <= 0)
+               return -EINVAL;
+
+       val = DIV_ROUND_CLOSEST(hwmon->clk_rate, val);
+       val = DIV_ROUND_CLOSEST(val, 256) - 1;
+       val = clamp_val(val, 0, FAN_PWM_FREQ_FREQ);
+
+       return regmap_update_bits(hwmon->regmap_fan, FAN_PWM_FREQ,
+                                 FAN_PWM_FREQ_FREQ,
+                                 FIELD_PREP(FAN_PWM_FREQ_FREQ, val));
+}
+
+static int lan966x_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+                              u32 attr, int channel, long val)
+{
+       switch (type) {
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+                       return lan966x_hwmon_write_pwm(dev, val);
+               case hwmon_pwm_freq:
+                       return lan966x_hwmon_write_pwm_freq(dev, val);
+               default:
+                       return -EOPNOTSUPP;
+               }
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static umode_t lan966x_hwmon_is_visible(const void *data,
+                                       enum hwmon_sensor_types type,
+                                       u32 attr, int channel)
+{
+       umode_t mode = 0;
+
+       switch (type) {
+       case hwmon_temp:
+               switch (attr) {
+               case hwmon_temp_input:
+                       mode = 0444;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       case hwmon_fan:
+               switch (attr) {
+               case hwmon_fan_input:
+                       mode = 0444;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+               case hwmon_pwm_freq:
+                       mode = 0644;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       default:
+               break;
+       }
+
+       return mode;
+}
+
+static const struct hwmon_channel_info *lan966x_hwmon_info[] = {
+       HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+       HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+       HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT),
+       HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT | HWMON_PWM_FREQ),
+       NULL
+};
+
+static const struct hwmon_ops lan966x_hwmon_ops = {
+       .is_visible = lan966x_hwmon_is_visible,
+       .read = lan966x_hwmon_read,
+       .write = lan966x_hwmon_write,
+};
+
+static const struct hwmon_chip_info lan966x_hwmon_chip_info = {
+       .ops = &lan966x_hwmon_ops,
+       .info = lan966x_hwmon_info,
+};
+
+static void lan966x_hwmon_disable(void *data)
+{
+       struct lan966x_hwmon *hwmon = data;
+
+       regmap_update_bits(hwmon->regmap_pvt, PVT_SENSOR_CFG,
+                          SENSOR_CFG_SAMPLE_ENA | SENSOR_CFG_CONTINIOUS_MODE,
+                          0);
+}
+
+static int lan966x_hwmon_enable(struct device *dev,
+                               struct lan966x_hwmon *hwmon)
+{
+       unsigned int mask = SENSOR_CFG_CLK_CFG |
+                           SENSOR_CFG_SAMPLE_ENA |
+                           SENSOR_CFG_START_CAPTURE |
+                           SENSOR_CFG_CONTINIOUS_MODE |
+                           SENSOR_CFG_PSAMPLE_ENA;
+       unsigned int val;
+       unsigned int div;
+       int ret;
+
+       /* enable continuous mode */
+       val = SENSOR_CFG_SAMPLE_ENA | SENSOR_CFG_CONTINIOUS_MODE;
+
+       /* set PVT clock to be between 1.15 and 1.25 MHz */
+       div = DIV_ROUND_CLOSEST(hwmon->clk_rate, LAN966X_PVT_CLK);
+       val |= FIELD_PREP(SENSOR_CFG_CLK_CFG, div);
+
+       ret = regmap_update_bits(hwmon->regmap_pvt, PVT_SENSOR_CFG,
+                                mask, val);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, lan966x_hwmon_disable, hwmon);
+}
+
+static struct regmap *lan966x_init_regmap(struct platform_device *pdev,
+                                         const char *name)
+{
+       struct regmap_config regmap_config = {
+               .reg_bits = 32,
+               .reg_stride = 4,
+               .val_bits = 32,
+       };
+       void __iomem *base;
+
+       base = devm_platform_ioremap_resource_byname(pdev, name);
+       if (IS_ERR(base))
+               return ERR_CAST(base);
+
+       regmap_config.name = name;
+
+       return devm_regmap_init_mmio(&pdev->dev, base, &regmap_config);
+}
+
+static void lan966x_clk_disable(void *data)
+{
+       struct lan966x_hwmon *hwmon = data;
+
+       clk_disable_unprepare(hwmon->clk);
+}
+
+static int lan966x_clk_enable(struct device *dev, struct lan966x_hwmon *hwmon)
+{
+       int ret;
+
+       ret = clk_prepare_enable(hwmon->clk);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, lan966x_clk_disable, hwmon);
+}
+
+static int lan966x_hwmon_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct lan966x_hwmon *hwmon;
+       struct device *hwmon_dev;
+       int ret;
+
+       hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+       if (!hwmon)
+               return -ENOMEM;
+
+       hwmon->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(hwmon->clk))
+               return dev_err_probe(dev, PTR_ERR(hwmon->clk),
+                                    "failed to get clock\n");
+
+       ret = lan966x_clk_enable(dev, hwmon);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to enable clock\n");
+
+       hwmon->clk_rate = clk_get_rate(hwmon->clk);
+
+       hwmon->regmap_pvt = lan966x_init_regmap(pdev, "pvt");
+       if (IS_ERR(hwmon->regmap_pvt))
+               return dev_err_probe(dev, PTR_ERR(hwmon->regmap_pvt),
+                                    "failed to get regmap for PVT registers\n");
+
+       hwmon->regmap_fan = lan966x_init_regmap(pdev, "fan");
+       if (IS_ERR(hwmon->regmap_fan))
+               return dev_err_probe(dev, PTR_ERR(hwmon->regmap_fan),
+                                    "failed to get regmap for fan registers\n");
+
+       ret = lan966x_hwmon_enable(dev, hwmon);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to enable sensor\n");
+
+       hwmon_dev = devm_hwmon_device_register_with_info(&pdev->dev,
+                               "lan966x_hwmon", hwmon,
+                               &lan966x_hwmon_chip_info, NULL);
+       if (IS_ERR(hwmon_dev))
+               return dev_err_probe(dev, PTR_ERR(hwmon_dev),
+                                    "failed to register hwmon device\n");
+
+       return 0;
+}
+
+static const struct of_device_id lan966x_hwmon_of_match[] = {
+       { .compatible = "microchip,lan9668-hwmon" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, lan966x_hwmon_of_match);
+
+static struct platform_driver lan966x_hwmon_driver = {
+       .probe = lan966x_hwmon_probe,
+       .driver = {
+               .name = "lan966x-hwmon",
+               .of_match_table = lan966x_hwmon_of_match,
+       },
+};
+module_platform_driver(lan966x_hwmon_driver);
+
+MODULE_DESCRIPTION("LAN966x Hardware Monitoring Driver");
+MODULE_AUTHOR("Michael Walle <michael@walle.cc>");
+MODULE_LICENSE("GPL");
index afdbb63237b9e4311d18b81194be35b42ae56eea..66dc826f7962c69ac912f66a0f5f1f18118cbf4d 100644 (file)
@@ -26,6 +26,7 @@
 
 enum lm75_type {               /* keep sorted in alphabetical order */
        adt75,
+       at30ts74,
        ds1775,
        ds75,
        ds7505,
@@ -128,6 +129,14 @@ static const struct lm75_params device_params[] = {
                .default_resolution = 12,
                .default_sample_time = MSEC_PER_SEC / 10,
        },
+       [at30ts74] = {
+               .set_mask = 3 << 5,     /* 12-bit mode*/
+               .default_resolution = 12,
+               .default_sample_time = 200,
+               .num_sample_times = 4,
+               .sample_times = (unsigned int []){ 25, 50, 100, 200 },
+               .resolutions = (u8 []) {9, 10, 11, 12 },
+       },
        [ds1775] = {
                .clr_mask = 3 << 5,
                .set_mask = 2 << 5,     /* 11-bit mode */
@@ -645,6 +654,7 @@ static int lm75_probe(struct i2c_client *client)
 
 static const struct i2c_device_id lm75_ids[] = {
        { "adt75", adt75, },
+       { "at30ts74", at30ts74, },
        { "ds1775", ds1775, },
        { "ds75", ds75, },
        { "ds7505", ds7505, },
@@ -680,6 +690,10 @@ static const struct of_device_id __maybe_unused lm75_of_match[] = {
                .compatible = "adi,adt75",
                .data = (void *)adt75
        },
+       {
+               .compatible = "atmel,at30ts74",
+               .data = (void *)at30ts74
+       },
        {
                .compatible = "dallas,ds1775",
                .data = (void *)ds1775
index 12370dcefa6aba3cbbe963bc274ff6334fbb46f1..905f5689f9074df702352e6507016b24893deaad 100644 (file)
 #include <linux/init.h>
 #include <linux/hwmon.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
-#include <linux/sysfs.h>
 
 /*
  * Addresses to scan
index 1c9493c7081324a6b7781e2ff398332ad15306b4..3820f0e615108122168380be35bb82089cce5a3e 100644 (file)
@@ -1707,6 +1707,7 @@ static void lm90_restore_conf(void *_data)
 
 static int lm90_init_client(struct i2c_client *client, struct lm90_data *data)
 {
+       struct device_node *np = client->dev.of_node;
        int config, convrate;
 
        convrate = lm90_read_reg(client, LM90_REG_R_CONVRATE);
@@ -1727,6 +1728,9 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data)
 
        /* Check Temperature Range Select */
        if (data->flags & LM90_HAVE_EXTENDED_TEMP) {
+               if (of_property_read_bool(np, "ti,extended-range-enable"))
+                       config |= 0x04;
+
                if (config & 0x04)
                        data->flags |= LM90_FLAG_ADT7461_EXT;
        }
index 7352d2b3c756d7c9820f972ac5444430c6824af7..72489d5d7eaf9734403009e1c1408f8630451ca9 100644 (file)
@@ -811,68 +811,32 @@ static const struct hwmon_ops ltc2992_hwmon_ops = {
        .write = ltc2992_write,
 };
 
-static const u32 ltc2992_chip_config[] = {
-       HWMON_C_IN_RESET_HISTORY,
-       0
-};
-
-static const struct hwmon_channel_info ltc2992_chip = {
-       .type = hwmon_chip,
-       .config = ltc2992_chip_config,
-};
-
-static const u32 ltc2992_in_config[] = {
-       HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN | HWMON_I_MAX |
-       HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
-       HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN | HWMON_I_MAX |
-       HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
-       HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN | HWMON_I_MAX |
-       HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
-       HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN | HWMON_I_MAX |
-       HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
-       HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN | HWMON_I_MAX |
-       HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
-       HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN | HWMON_I_MAX |
-       HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
-       0
-};
-
-static const struct hwmon_channel_info ltc2992_in = {
-       .type = hwmon_in,
-       .config = ltc2992_in_config,
-};
-
-static const u32 ltc2992_curr_config[] = {
-       HWMON_C_INPUT | HWMON_C_LOWEST | HWMON_C_HIGHEST | HWMON_C_MIN | HWMON_C_MAX |
-       HWMON_C_MIN_ALARM | HWMON_C_MAX_ALARM,
-       HWMON_C_INPUT | HWMON_C_LOWEST | HWMON_C_HIGHEST | HWMON_C_MIN | HWMON_C_MAX |
-       HWMON_C_MIN_ALARM | HWMON_C_MAX_ALARM,
-       0
-};
-
-static const struct hwmon_channel_info ltc2992_curr = {
-       .type = hwmon_curr,
-       .config = ltc2992_curr_config,
-};
-
-static const u32 ltc2992_power_config[] = {
-       HWMON_P_INPUT | HWMON_P_INPUT_LOWEST | HWMON_P_INPUT_HIGHEST | HWMON_P_MIN | HWMON_P_MAX |
-       HWMON_P_MIN_ALARM | HWMON_P_MAX_ALARM,
-       HWMON_P_INPUT | HWMON_P_INPUT_LOWEST | HWMON_P_INPUT_HIGHEST | HWMON_P_MIN | HWMON_P_MAX |
-       HWMON_P_MIN_ALARM | HWMON_P_MAX_ALARM,
-       0
-};
-
-static const struct hwmon_channel_info ltc2992_power = {
-       .type = hwmon_power,
-       .config = ltc2992_power_config,
-};
-
 static const struct hwmon_channel_info *ltc2992_info[] = {
-       &ltc2992_chip,
-       &ltc2992_in,
-       &ltc2992_curr,
-       &ltc2992_power,
+       HWMON_CHANNEL_INFO(chip,
+                          HWMON_C_IN_RESET_HISTORY),
+       HWMON_CHANNEL_INFO(in,
+                          HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN |
+                          HWMON_I_MAX | HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
+                          HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN |
+                          HWMON_I_MAX | HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
+                          HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN |
+                          HWMON_I_MAX | HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
+                          HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN |
+                          HWMON_I_MAX | HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
+                          HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN |
+                          HWMON_I_MAX | HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM,
+                          HWMON_I_INPUT | HWMON_I_LOWEST | HWMON_I_HIGHEST | HWMON_I_MIN |
+                          HWMON_I_MAX | HWMON_I_MIN_ALARM | HWMON_I_MAX_ALARM),
+       HWMON_CHANNEL_INFO(curr,
+                          HWMON_C_INPUT | HWMON_C_LOWEST | HWMON_C_HIGHEST | HWMON_C_MIN |
+                          HWMON_C_MAX | HWMON_C_MIN_ALARM | HWMON_C_MAX_ALARM,
+                          HWMON_C_INPUT | HWMON_C_LOWEST | HWMON_C_HIGHEST | HWMON_C_MIN |
+                          HWMON_C_MAX | HWMON_C_MIN_ALARM | HWMON_C_MAX_ALARM),
+       HWMON_CHANNEL_INFO(power,
+                          HWMON_P_INPUT | HWMON_P_INPUT_LOWEST | HWMON_P_INPUT_HIGHEST |
+                          HWMON_P_MIN | HWMON_P_MAX | HWMON_P_MIN_ALARM | HWMON_P_MAX_ALARM,
+                          HWMON_P_INPUT | HWMON_P_INPUT_LOWEST | HWMON_P_INPUT_HIGHEST |
+                          HWMON_P_MIN | HWMON_P_MAX | HWMON_P_MIN_ALARM | HWMON_P_MAX_ALARM),
        NULL
 };
 
index 1ba1e31459690aec45cb39584a1799bbb2bb138f..26278b0f17a989ac0a6a23bcd5dc8d5143bd46e6 100644 (file)
@@ -223,16 +223,6 @@ static int pvt_read(struct device *dev, enum hwmon_sensor_types type,
        }
 }
 
-static const u32 pvt_chip_config[] = {
-       HWMON_C_REGISTER_TZ,
-       0
-};
-
-static const struct hwmon_channel_info pvt_chip = {
-       .type = hwmon_chip,
-       .config = pvt_chip_config,
-};
-
 static struct hwmon_channel_info pvt_temp = {
        .type = hwmon_temp,
 };
@@ -555,7 +545,7 @@ static int mr75203_probe(struct platform_device *pdev)
        pvt_info = devm_kcalloc(dev, val + 2, sizeof(*pvt_info), GFP_KERNEL);
        if (!pvt_info)
                return -ENOMEM;
-       pvt_info[0] = &pvt_chip;
+       pvt_info[0] = HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ);
        index = 1;
 
        if (ts_num) {
similarity index 66%
rename from drivers/hwmon/nct6775.c
rename to drivers/hwmon/nct6775-core.c
index 2b91f7e05126ed770e331d522dcd5bba8aa6aa0d..446964cbae4c0667dd5172dcbd8d54b35f089f5c 100644 (file)
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
-#include <linux/platform_device.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
-#include <linux/hwmon-vid.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
-#include <linux/acpi.h>
 #include <linux/bitops.h>
-#include <linux/dmi.h>
-#include <linux/io.h>
 #include <linux/nospec.h>
-#include <linux/wmi.h>
+#include <linux/regmap.h>
 #include "lm75.h"
+#include "nct6775.h"
 
-#define USE_ALTERNATE
+#undef DEFAULT_SYMBOL_NAMESPACE
+#define DEFAULT_SYMBOL_NAMESPACE HWMON_NCT6775
 
-enum kinds { nct6106, nct6116, nct6775, nct6776, nct6779, nct6791, nct6792,
-            nct6793, nct6795, nct6796, nct6797, nct6798 };
+#define USE_ALTERNATE
 
 /* used to set data->name = nct6775_device_names[data->sio_kind] */
 static const char * const nct6775_device_names[] = {
@@ -79,242 +75,6 @@ static const char * const nct6775_device_names[] = {
        "nct6798",
 };
 
-static const char * const nct6775_sio_names[] __initconst = {
-       "NCT6106D",
-       "NCT6116D",
-       "NCT6775F",
-       "NCT6776D/F",
-       "NCT6779D",
-       "NCT6791D",
-       "NCT6792D",
-       "NCT6793D",
-       "NCT6795D",
-       "NCT6796D",
-       "NCT6797D",
-       "NCT6798D",
-};
-
-static unsigned short force_id;
-module_param(force_id, ushort, 0);
-MODULE_PARM_DESC(force_id, "Override the detected device ID");
-
-static unsigned short fan_debounce;
-module_param(fan_debounce, ushort, 0);
-MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal");
-
-#define DRVNAME "nct6775"
-
-/*
- * Super-I/O constants and functions
- */
-
-#define NCT6775_LD_ACPI                0x0a
-#define NCT6775_LD_HWM         0x0b
-#define NCT6775_LD_VID         0x0d
-#define NCT6775_LD_12          0x12
-
-#define SIO_REG_LDSEL          0x07    /* Logical device select */
-#define SIO_REG_DEVID          0x20    /* Device ID (2 bytes) */
-#define SIO_REG_ENABLE         0x30    /* Logical device enable */
-#define SIO_REG_ADDR           0x60    /* Logical device address (2 bytes) */
-
-#define SIO_NCT6106_ID         0xc450
-#define SIO_NCT6116_ID         0xd280
-#define SIO_NCT6775_ID         0xb470
-#define SIO_NCT6776_ID         0xc330
-#define SIO_NCT6779_ID         0xc560
-#define SIO_NCT6791_ID         0xc800
-#define SIO_NCT6792_ID         0xc910
-#define SIO_NCT6793_ID         0xd120
-#define SIO_NCT6795_ID         0xd350
-#define SIO_NCT6796_ID         0xd420
-#define SIO_NCT6797_ID         0xd450
-#define SIO_NCT6798_ID         0xd428
-#define SIO_ID_MASK            0xFFF8
-
-enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
-enum sensor_access { access_direct, access_asuswmi };
-
-struct nct6775_sio_data {
-       int sioreg;
-       int ld;
-       enum kinds kind;
-       enum sensor_access access;
-
-       /* superio_() callbacks  */
-       void (*sio_outb)(struct nct6775_sio_data *sio_data, int reg, int val);
-       int (*sio_inb)(struct nct6775_sio_data *sio_data, int reg);
-       void (*sio_select)(struct nct6775_sio_data *sio_data, int ld);
-       int (*sio_enter)(struct nct6775_sio_data *sio_data);
-       void (*sio_exit)(struct nct6775_sio_data *sio_data);
-};
-
-#define ASUSWMI_MONITORING_GUID                "466747A0-70EC-11DE-8A39-0800200C9A66"
-#define ASUSWMI_METHODID_RSIO          0x5253494F
-#define ASUSWMI_METHODID_WSIO          0x5753494F
-#define ASUSWMI_METHODID_RHWM          0x5248574D
-#define ASUSWMI_METHODID_WHWM          0x5748574D
-#define ASUSWMI_UNSUPPORTED_METHOD     0xFFFFFFFE
-
-static int nct6775_asuswmi_evaluate_method(u32 method_id, u8 bank, u8 reg, u8 val, u32 *retval)
-{
-#if IS_ENABLED(CONFIG_ACPI_WMI)
-       u32 args = bank | (reg << 8) | (val << 16);
-       struct acpi_buffer input = { (acpi_size) sizeof(args), &args };
-       struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-       acpi_status status;
-       union acpi_object *obj;
-       u32 tmp = ASUSWMI_UNSUPPORTED_METHOD;
-
-       status = wmi_evaluate_method(ASUSWMI_MONITORING_GUID, 0,
-                                    method_id, &input, &output);
-
-       if (ACPI_FAILURE(status))
-               return -EIO;
-
-       obj = output.pointer;
-       if (obj && obj->type == ACPI_TYPE_INTEGER)
-               tmp = obj->integer.value;
-
-       if (retval)
-               *retval = tmp;
-
-       kfree(obj);
-
-       if (tmp == ASUSWMI_UNSUPPORTED_METHOD)
-               return -ENODEV;
-       return 0;
-#else
-       return -EOPNOTSUPP;
-#endif
-}
-
-static inline int nct6775_asuswmi_write(u8 bank, u8 reg, u8 val)
-{
-       return nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_WHWM, bank,
-                                             reg, val, NULL);
-}
-
-static inline int nct6775_asuswmi_read(u8 bank, u8 reg, u8 *val)
-{
-       u32 ret, tmp = 0;
-
-       ret = nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_RHWM, bank,
-                                             reg, 0, &tmp);
-       *val = tmp;
-       return ret;
-}
-
-static int superio_wmi_inb(struct nct6775_sio_data *sio_data, int reg)
-{
-       int tmp = 0;
-
-       nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_RSIO, sio_data->ld,
-                                       reg, 0, &tmp);
-       return tmp;
-}
-
-static void superio_wmi_outb(struct nct6775_sio_data *sio_data, int reg, int val)
-{
-       nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_WSIO, sio_data->ld,
-                                       reg, val, NULL);
-}
-
-static void superio_wmi_select(struct nct6775_sio_data *sio_data, int ld)
-{
-       sio_data->ld = ld;
-}
-
-static int superio_wmi_enter(struct nct6775_sio_data *sio_data)
-{
-       return 0;
-}
-
-static void superio_wmi_exit(struct nct6775_sio_data *sio_data)
-{
-}
-
-static void superio_outb(struct nct6775_sio_data *sio_data, int reg, int val)
-{
-       int ioreg = sio_data->sioreg;
-
-       outb(reg, ioreg);
-       outb(val, ioreg + 1);
-}
-
-static int superio_inb(struct nct6775_sio_data *sio_data, int reg)
-{
-       int ioreg = sio_data->sioreg;
-
-       outb(reg, ioreg);
-       return inb(ioreg + 1);
-}
-
-static void superio_select(struct nct6775_sio_data *sio_data, int ld)
-{
-       int ioreg = sio_data->sioreg;
-
-       outb(SIO_REG_LDSEL, ioreg);
-       outb(ld, ioreg + 1);
-}
-
-static int superio_enter(struct nct6775_sio_data *sio_data)
-{
-       int ioreg = sio_data->sioreg;
-
-       /*
-        * Try to reserve <ioreg> and <ioreg + 1> for exclusive access.
-        */
-       if (!request_muxed_region(ioreg, 2, DRVNAME))
-               return -EBUSY;
-
-       outb(0x87, ioreg);
-       outb(0x87, ioreg);
-
-       return 0;
-}
-
-static void superio_exit(struct nct6775_sio_data *sio_data)
-{
-       int ioreg = sio_data->sioreg;
-
-       outb(0xaa, ioreg);
-       outb(0x02, ioreg);
-       outb(0x02, ioreg + 1);
-       release_region(ioreg, 2);
-}
-
-/*
- * ISA constants
- */
-
-#define IOREGION_ALIGNMENT     (~7)
-#define IOREGION_OFFSET                5
-#define IOREGION_LENGTH                2
-#define ADDR_REG_OFFSET                0
-#define DATA_REG_OFFSET                1
-
-#define NCT6775_REG_BANK       0x4E
-#define NCT6775_REG_CONFIG     0x40
-#define NCT6775_PORT_CHIPID    0x58
-
-/*
- * Not currently used:
- * REG_MAN_ID has the value 0x5ca3 for all supported chips.
- * REG_CHIP_ID == 0x88/0xa1/0xc1 depending on chip model.
- * REG_MAN_ID is at port 0x4f
- * REG_CHIP_ID is at port 0x58
- */
-
-#define NUM_TEMP       10      /* Max number of temp attribute sets w/ limits*/
-#define NUM_TEMP_FIXED 6       /* Max number of fixed temp attribute sets */
-#define NUM_TSI_TEMP   8       /* Max number of TSI temp register pairs */
-
-#define NUM_REG_ALARM  7       /* Max number of alarm registers */
-#define NUM_REG_BEEP   5       /* Max number of beep registers */
-
-#define NUM_FAN                7
-
 /* Common and NCT6775 specific data */
 
 /* Voltage min/max registers for nr=7..14 are in bank 5 */
@@ -333,11 +93,6 @@ static const u16 NCT6775_REG_IN[] = {
 #define NCT6775_REG_DIODE              0x5E
 #define NCT6775_DIODE_MASK             0x02
 
-#define NCT6775_REG_FANDIV1            0x506
-#define NCT6775_REG_FANDIV2            0x507
-
-#define NCT6775_REG_CR_FAN_DEBOUNCE    0xf0
-
 static const u16 NCT6775_REG_ALARM[NUM_REG_ALARM] = { 0x459, 0x45A, 0x45B };
 
 /* 0..15 voltages, 16..23 fans, 24..29 temperatures, 30..31 intrusion */
@@ -351,10 +106,6 @@ static const s8 NCT6775_ALARM_BITS[] = {
        4, 5, 13, -1, -1, -1,           /* temp1..temp6 */
        12, -1 };                       /* intrusion0, intrusion1 */
 
-#define FAN_ALARM_BASE         16
-#define TEMP_ALARM_BASE                24
-#define INTRUSION_ALARM_BASE   30
-
 static const u16 NCT6775_REG_BEEP[NUM_REG_BEEP] = { 0x56, 0x57, 0x453, 0x4e };
 
 /*
@@ -370,11 +121,6 @@ static const s8 NCT6775_BEEP_BITS[] = {
        4, 5, 13, -1, -1, -1,           /* temp1..temp6 */
        12, -1 };                       /* intrusion0, intrusion1 */
 
-#define BEEP_ENABLE_BASE               15
-
-static const u8 NCT6775_REG_CR_CASEOPEN_CLR[] = { 0xe6, 0xee };
-static const u8 NCT6775_CR_CASEOPEN_CLR_MASK[] = { 0x20, 0x01 };
-
 /* DC or PWM output fan configuration */
 static const u8 NCT6775_REG_PWM_MODE[] = { 0x04, 0x04, 0x12 };
 static const u8 NCT6775_PWM_MODE_MASK[] = { 0x01, 0x02, 0x01 };
@@ -690,8 +436,6 @@ static const u16 NCT6779_REG_TEMP_CRIT[32] = {
 
 /* NCT6791 specific data */
 
-#define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE    0x28
-
 static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[NUM_FAN] = { 0, 0x239 };
 static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[NUM_FAN] = { 0, 0x23a };
 static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[NUM_FAN] = { 0, 0x23b };
@@ -1191,165 +935,6 @@ static inline unsigned int tsi_temp_from_reg(unsigned int reg)
  * Data structures and manipulation thereof
  */
 
-struct nct6775_data {
-       int addr;       /* IO base of hw monitor block */
-       struct nct6775_sio_data *sio_data;
-       enum kinds kind;
-       const char *name;
-
-       const struct attribute_group *groups[7];
-
-       u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
-                                   * 3=temp_crit, 4=temp_lcrit
-                                   */
-       u8 temp_src[NUM_TEMP];
-       u16 reg_temp_config[NUM_TEMP];
-       const char * const *temp_label;
-       u32 temp_mask;
-       u32 virt_temp_mask;
-
-       u16 REG_CONFIG;
-       u16 REG_VBAT;
-       u16 REG_DIODE;
-       u8 DIODE_MASK;
-
-       const s8 *ALARM_BITS;
-       const s8 *BEEP_BITS;
-
-       const u16 *REG_VIN;
-       const u16 *REG_IN_MINMAX[2];
-
-       const u16 *REG_TARGET;
-       const u16 *REG_FAN;
-       const u16 *REG_FAN_MODE;
-       const u16 *REG_FAN_MIN;
-       const u16 *REG_FAN_PULSES;
-       const u16 *FAN_PULSE_SHIFT;
-       const u16 *REG_FAN_TIME[3];
-
-       const u16 *REG_TOLERANCE_H;
-
-       const u8 *REG_PWM_MODE;
-       const u8 *PWM_MODE_MASK;
-
-       const u16 *REG_PWM[7];  /* [0]=pwm, [1]=pwm_start, [2]=pwm_floor,
-                                * [3]=pwm_max, [4]=pwm_step,
-                                * [5]=weight_duty_step, [6]=weight_duty_base
-                                */
-       const u16 *REG_PWM_READ;
-
-       const u16 *REG_CRITICAL_PWM_ENABLE;
-       u8 CRITICAL_PWM_ENABLE_MASK;
-       const u16 *REG_CRITICAL_PWM;
-
-       const u16 *REG_AUTO_TEMP;
-       const u16 *REG_AUTO_PWM;
-
-       const u16 *REG_CRITICAL_TEMP;
-       const u16 *REG_CRITICAL_TEMP_TOLERANCE;
-
-       const u16 *REG_TEMP_SOURCE;     /* temp register sources */
-       const u16 *REG_TEMP_SEL;
-       const u16 *REG_WEIGHT_TEMP_SEL;
-       const u16 *REG_WEIGHT_TEMP[3];  /* 0=base, 1=tolerance, 2=step */
-
-       const u16 *REG_TEMP_OFFSET;
-
-       const u16 *REG_ALARM;
-       const u16 *REG_BEEP;
-
-       const u16 *REG_TSI_TEMP;
-
-       unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg);
-       unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg);
-
-       struct mutex update_lock;
-       bool valid;             /* true if following fields are valid */
-       unsigned long last_updated;     /* In jiffies */
-
-       /* Register values */
-       u8 bank;                /* current register bank */
-       u8 in_num;              /* number of in inputs we have */
-       u8 in[15][3];           /* [0]=in, [1]=in_max, [2]=in_min */
-       unsigned int rpm[NUM_FAN];
-       u16 fan_min[NUM_FAN];
-       u8 fan_pulses[NUM_FAN];
-       u8 fan_div[NUM_FAN];
-       u8 has_pwm;
-       u8 has_fan;             /* some fan inputs can be disabled */
-       u8 has_fan_min;         /* some fans don't have min register */
-       bool has_fan_div;
-
-       u8 num_temp_alarms;     /* 2, 3, or 6 */
-       u8 num_temp_beeps;      /* 2, 3, or 6 */
-       u8 temp_fixed_num;      /* 3 or 6 */
-       u8 temp_type[NUM_TEMP_FIXED];
-       s8 temp_offset[NUM_TEMP_FIXED];
-       s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
-                               * 3=temp_crit, 4=temp_lcrit */
-       s16 tsi_temp[NUM_TSI_TEMP];
-       u64 alarms;
-       u64 beeps;
-
-       u8 pwm_num;     /* number of pwm */
-       u8 pwm_mode[NUM_FAN];   /* 0->DC variable voltage,
-                                * 1->PWM variable duty cycle
-                                */
-       enum pwm_enable pwm_enable[NUM_FAN];
-                       /* 0->off
-                        * 1->manual
-                        * 2->thermal cruise mode (also called SmartFan I)
-                        * 3->fan speed cruise mode
-                        * 4->SmartFan III
-                        * 5->enhanced variable thermal cruise (SmartFan IV)
-                        */
-       u8 pwm[7][NUM_FAN];     /* [0]=pwm, [1]=pwm_start, [2]=pwm_floor,
-                                * [3]=pwm_max, [4]=pwm_step,
-                                * [5]=weight_duty_step, [6]=weight_duty_base
-                                */
-
-       u8 target_temp[NUM_FAN];
-       u8 target_temp_mask;
-       u32 target_speed[NUM_FAN];
-       u32 target_speed_tolerance[NUM_FAN];
-       u8 speed_tolerance_limit;
-
-       u8 temp_tolerance[2][NUM_FAN];
-       u8 tolerance_mask;
-
-       u8 fan_time[3][NUM_FAN]; /* 0 = stop_time, 1 = step_up, 2 = step_down */
-
-       /* Automatic fan speed control registers */
-       int auto_pwm_num;
-       u8 auto_pwm[NUM_FAN][7];
-       u8 auto_temp[NUM_FAN][7];
-       u8 pwm_temp_sel[NUM_FAN];
-       u8 pwm_weight_temp_sel[NUM_FAN];
-       u8 weight_temp[3][NUM_FAN];     /* 0->temp_step, 1->temp_step_tol,
-                                        * 2->temp_base
-                                        */
-
-       u8 vid;
-       u8 vrm;
-
-       bool have_vid;
-
-       u16 have_temp;
-       u16 have_temp_fixed;
-       u16 have_tsi_temp;
-       u16 have_in;
-
-       /* Remember extra register values over suspend/resume */
-       u8 vbat;
-       u8 fandiv1;
-       u8 fandiv2;
-       u8 sio_reg_enable;
-
-       /* nct6775_*() callbacks  */
-       u16 (*read_value)(struct nct6775_data *data, u16 reg);
-       int (*write_value)(struct nct6775_data *data, u16 reg, u16 value);
-};
-
 struct sensor_device_template {
        struct device_attribute dev_attr;
        union {
@@ -1405,10 +990,8 @@ struct sensor_template_group {
        int base;
 };
 
-static struct attribute_group *
-nct6775_create_attr_group(struct device *dev,
-                         const struct sensor_template_group *tg,
-                         int repeat)
+static int nct6775_add_template_attr_group(struct device *dev, struct nct6775_data *data,
+                                          const struct sensor_template_group *tg, int repeat)
 {
        struct attribute_group *group;
        struct sensor_device_attr_u *su;
@@ -1419,28 +1002,28 @@ nct6775_create_attr_group(struct device *dev,
        int i, count;
 
        if (repeat <= 0)
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        t = tg->templates;
        for (count = 0; *t; t++, count++)
                ;
 
        if (count == 0)
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        group = devm_kzalloc(dev, sizeof(*group), GFP_KERNEL);
        if (group == NULL)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
 
        attrs = devm_kcalloc(dev, repeat * count + 1, sizeof(*attrs),
                             GFP_KERNEL);
        if (attrs == NULL)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
 
        su = devm_kzalloc(dev, array3_size(repeat, count, sizeof(*su)),
                               GFP_KERNEL);
        if (su == NULL)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
 
        group->attrs = attrs;
        group->is_visible = tg->is_visible;
@@ -1478,10 +1061,10 @@ nct6775_create_attr_group(struct device *dev,
                }
        }
 
-       return group;
+       return nct6775_add_attr_group(data, group);
 }
 
-static bool is_word_sized(struct nct6775_data *data, u16 reg)
+bool nct6775_reg_is_word_sized(struct nct6775_data *data, u16 reg)
 {
        switch (data->kind) {
        case nct6106:
@@ -1538,180 +1121,81 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
        }
        return false;
 }
+EXPORT_SYMBOL_GPL(nct6775_reg_is_word_sized);
 
-static inline void nct6775_wmi_set_bank(struct nct6775_data *data, u16 reg)
-{
-       u8 bank = reg >> 8;
-
-       data->bank = bank;
-}
-
-static u16 nct6775_wmi_read_value(struct nct6775_data *data, u16 reg)
+/* We left-align 8-bit temperature values to make the code simpler */
+static int nct6775_read_temp(struct nct6775_data *data, u16 reg, u16 *val)
 {
-       int res, err, word_sized = is_word_sized(data, reg);
-       u8 tmp = 0;
-
-       nct6775_wmi_set_bank(data, reg);
+       int err;
 
-       err = nct6775_asuswmi_read(data->bank, reg & 0xff, &tmp);
+       err = nct6775_read_value(data, reg, val);
        if (err)
-               return 0;
-
-       res = tmp;
-       if (word_sized) {
-               err = nct6775_asuswmi_read(data->bank, (reg & 0xff) + 1, &tmp);
-               if (err)
-                       return 0;
-
-               res = (res << 8) + tmp;
-       }
-       return res;
-}
-
-static int nct6775_wmi_write_value(struct nct6775_data *data, u16 reg, u16 value)
-{
-       int res, word_sized = is_word_sized(data, reg);
-
-       nct6775_wmi_set_bank(data, reg);
-
-       if (word_sized) {
-               res = nct6775_asuswmi_write(data->bank, reg & 0xff, value >> 8);
-               if (res)
-                       return res;
-
-               res = nct6775_asuswmi_write(data->bank, (reg & 0xff) + 1, value);
-       } else {
-               res = nct6775_asuswmi_write(data->bank, reg & 0xff, value);
-       }
-
-       return res;
-}
-
-/*
- * On older chips, only registers 0x50-0x5f are banked.
- * On more recent chips, all registers are banked.
- * Assume that is the case and set the bank number for each access.
- * Cache the bank number so it only needs to be set if it changes.
- */
-static inline void nct6775_set_bank(struct nct6775_data *data, u16 reg)
-{
-       u8 bank = reg >> 8;
-
-       if (data->bank != bank) {
-               outb_p(NCT6775_REG_BANK, data->addr + ADDR_REG_OFFSET);
-               outb_p(bank, data->addr + DATA_REG_OFFSET);
-               data->bank = bank;
-       }
-}
+               return err;
 
-static u16 nct6775_read_value(struct nct6775_data *data, u16 reg)
-{
-       int res, word_sized = is_word_sized(data, reg);
-
-       nct6775_set_bank(data, reg);
-       outb_p(reg & 0xff, data->addr + ADDR_REG_OFFSET);
-       res = inb_p(data->addr + DATA_REG_OFFSET);
-       if (word_sized) {
-               outb_p((reg & 0xff) + 1,
-                      data->addr + ADDR_REG_OFFSET);
-               res = (res << 8) + inb_p(data->addr + DATA_REG_OFFSET);
-       }
-       return res;
-}
+       if (!nct6775_reg_is_word_sized(data, reg))
+               *val <<= 8;
 
-static int nct6775_write_value(struct nct6775_data *data, u16 reg, u16 value)
-{
-       int word_sized = is_word_sized(data, reg);
-
-       nct6775_set_bank(data, reg);
-       outb_p(reg & 0xff, data->addr + ADDR_REG_OFFSET);
-       if (word_sized) {
-               outb_p(value >> 8, data->addr + DATA_REG_OFFSET);
-               outb_p((reg & 0xff) + 1,
-                      data->addr + ADDR_REG_OFFSET);
-       }
-       outb_p(value & 0xff, data->addr + DATA_REG_OFFSET);
        return 0;
 }
 
-/* We left-align 8-bit temperature values to make the code simpler */
-static u16 nct6775_read_temp(struct nct6775_data *data, u16 reg)
-{
-       u16 res;
-
-       res = data->read_value(data, reg);
-       if (!is_word_sized(data, reg))
-               res <<= 8;
-
-       return res;
-}
-
-static int nct6775_write_temp(struct nct6775_data *data, u16 reg, u16 value)
-{
-       if (!is_word_sized(data, reg))
-               value >>= 8;
-       return data->write_value(data, reg, value);
-}
-
 /* This function assumes that the caller holds data->update_lock */
-static void nct6775_write_fan_div(struct nct6775_data *data, int nr)
+static int nct6775_write_fan_div(struct nct6775_data *data, int nr)
 {
-       u8 reg;
+       u16 reg;
+       int err;
+       u16 fandiv_reg = nr < 2 ? NCT6775_REG_FANDIV1 : NCT6775_REG_FANDIV2;
+       unsigned int oddshift = (nr & 1) * 4; /* masks shift by four if nr is odd */
 
-       switch (nr) {
-       case 0:
-               reg = (data->read_value(data, NCT6775_REG_FANDIV1) & 0x70)
-                   | (data->fan_div[0] & 0x7);
-               data->write_value(data, NCT6775_REG_FANDIV1, reg);
-               break;
-       case 1:
-               reg = (data->read_value(data, NCT6775_REG_FANDIV1) & 0x7)
-                   | ((data->fan_div[1] << 4) & 0x70);
-               data->write_value(data, NCT6775_REG_FANDIV1, reg);
-               break;
-       case 2:
-               reg = (data->read_value(data, NCT6775_REG_FANDIV2) & 0x70)
-                   | (data->fan_div[2] & 0x7);
-               data->write_value(data, NCT6775_REG_FANDIV2, reg);
-               break;
-       case 3:
-               reg = (data->read_value(data, NCT6775_REG_FANDIV2) & 0x7)
-                   | ((data->fan_div[3] << 4) & 0x70);
-               data->write_value(data, NCT6775_REG_FANDIV2, reg);
-               break;
-       }
+       err = nct6775_read_value(data, fandiv_reg, &reg);
+       if (err)
+               return err;
+       reg &= 0x70 >> oddshift;
+       reg |= data->fan_div[nr] & (0x7 << oddshift);
+       return nct6775_write_value(data, fandiv_reg, reg);
 }
 
-static void nct6775_write_fan_div_common(struct nct6775_data *data, int nr)
+static int nct6775_write_fan_div_common(struct nct6775_data *data, int nr)
 {
        if (data->kind == nct6775)
-               nct6775_write_fan_div(data, nr);
+               return nct6775_write_fan_div(data, nr);
+       return 0;
 }
 
-static void nct6775_update_fan_div(struct nct6775_data *data)
+static int nct6775_update_fan_div(struct nct6775_data *data)
 {
-       u8 i;
+       int err;
+       u16 i;
 
-       i = data->read_value(data, NCT6775_REG_FANDIV1);
+       err = nct6775_read_value(data, NCT6775_REG_FANDIV1, &i);
+       if (err)
+               return err;
        data->fan_div[0] = i & 0x7;
        data->fan_div[1] = (i & 0x70) >> 4;
-       i = data->read_value(data, NCT6775_REG_FANDIV2);
+       err = nct6775_read_value(data, NCT6775_REG_FANDIV2, &i);
+       if (err)
+               return err;
        data->fan_div[2] = i & 0x7;
        if (data->has_fan & BIT(3))
                data->fan_div[3] = (i & 0x70) >> 4;
+
+       return 0;
 }
 
-static void nct6775_update_fan_div_common(struct nct6775_data *data)
+static int nct6775_update_fan_div_common(struct nct6775_data *data)
 {
        if (data->kind == nct6775)
-               nct6775_update_fan_div(data);
+               return nct6775_update_fan_div(data);
+       return 0;
 }
 
-static void nct6775_init_fan_div(struct nct6775_data *data)
+static int nct6775_init_fan_div(struct nct6775_data *data)
 {
-       int i;
+       int i, err;
+
+       err = nct6775_update_fan_div_common(data);
+       if (err)
+               return err;
 
-       nct6775_update_fan_div_common(data);
        /*
         * For all fans, start with highest divider value if the divider
         * register is not initialized. This ensures that we get a
@@ -1723,19 +1207,26 @@ static void nct6775_init_fan_div(struct nct6775_data *data)
                        continue;
                if (data->fan_div[i] == 0) {
                        data->fan_div[i] = 7;
-                       nct6775_write_fan_div_common(data, i);
+                       err = nct6775_write_fan_div_common(data, i);
+                       if (err)
+                               return err;
                }
        }
+
+       return 0;
 }
 
-static void nct6775_init_fan_common(struct device *dev,
-                                   struct nct6775_data *data)
+static int nct6775_init_fan_common(struct device *dev,
+                                  struct nct6775_data *data)
 {
-       int i;
-       u8 reg;
+       int i, err;
+       u16 reg;
 
-       if (data->has_fan_div)
-               nct6775_init_fan_div(data);
+       if (data->has_fan_div) {
+               err = nct6775_init_fan_div(data);
+               if (err)
+                       return err;
+       }
 
        /*
         * If fan_min is not set (0), set it to 0xff to disable it. This
@@ -1743,23 +1234,30 @@ static void nct6775_init_fan_common(struct device *dev,
         */
        for (i = 0; i < ARRAY_SIZE(data->fan_min); i++) {
                if (data->has_fan_min & BIT(i)) {
-                       reg = data->read_value(data, data->REG_FAN_MIN[i]);
-                       if (!reg)
-                               data->write_value(data, data->REG_FAN_MIN[i],
-                                                 data->has_fan_div ? 0xff
-                                                                   : 0xff1f);
+                       err = nct6775_read_value(data, data->REG_FAN_MIN[i], &reg);
+                       if (err)
+                               return err;
+                       if (!reg) {
+                               err = nct6775_write_value(data, data->REG_FAN_MIN[i],
+                                                         data->has_fan_div ? 0xff : 0xff1f);
+                               if (err)
+                                       return err;
+                       }
                }
        }
+
+       return 0;
 }
 
-static void nct6775_select_fan_div(struct device *dev,
-                                  struct nct6775_data *data, int nr, u16 reg)
+static int nct6775_select_fan_div(struct device *dev,
+                                 struct nct6775_data *data, int nr, u16 reg)
 {
+       int err;
        u8 fan_div = data->fan_div[nr];
        u16 fan_min;
 
        if (!data->has_fan_div)
-               return;
+               return 0;
 
        /*
         * If we failed to measure the fan speed, or the reported value is not
@@ -1791,36 +1289,46 @@ static void nct6775_select_fan_div(struct device *dev,
                        }
                        if (fan_min != data->fan_min[nr]) {
                                data->fan_min[nr] = fan_min;
-                               data->write_value(data, data->REG_FAN_MIN[nr],
-                                                 fan_min);
+                               err = nct6775_write_value(data, data->REG_FAN_MIN[nr], fan_min);
+                               if (err)
+                                       return err;
                        }
                }
                data->fan_div[nr] = fan_div;
-               nct6775_write_fan_div_common(data, nr);
+               err = nct6775_write_fan_div_common(data, nr);
+               if (err)
+                       return err;
        }
+
+       return 0;
 }
 
-static void nct6775_update_pwm(struct device *dev)
+static int nct6775_update_pwm(struct device *dev)
 {
        struct nct6775_data *data = dev_get_drvdata(dev);
-       int i, j;
-       int fanmodecfg, reg;
+       int i, j, err;
+       u16 fanmodecfg, reg;
        bool duty_is_dc;
 
        for (i = 0; i < data->pwm_num; i++) {
                if (!(data->has_pwm & BIT(i)))
                        continue;
 
-               duty_is_dc = data->REG_PWM_MODE[i] &&
-                 (data->read_value(data, data->REG_PWM_MODE[i])
-                  & data->PWM_MODE_MASK[i]);
+               err = nct6775_read_value(data, data->REG_PWM_MODE[i], &reg);
+               if (err)
+                       return err;
+               duty_is_dc = data->REG_PWM_MODE[i] && (reg & data->PWM_MODE_MASK[i]);
                data->pwm_mode[i] = !duty_is_dc;
 
-               fanmodecfg = data->read_value(data, data->REG_FAN_MODE[i]);
+               err = nct6775_read_value(data, data->REG_FAN_MODE[i], &fanmodecfg);
+               if (err)
+                       return err;
                for (j = 0; j < ARRAY_SIZE(data->REG_PWM); j++) {
                        if (data->REG_PWM[j] && data->REG_PWM[j][i]) {
-                               data->pwm[j][i] = data->read_value(data,
-                                                                  data->REG_PWM[j][i]);
+                               err = nct6775_read_value(data, data->REG_PWM[j][i], &reg);
+                               if (err)
+                                       return err;
+                               data->pwm[j][i] = reg;
                        }
                }
 
@@ -1835,17 +1343,22 @@ static void nct6775_update_pwm(struct device *dev)
                        u8 t = fanmodecfg & 0x0f;
 
                        if (data->REG_TOLERANCE_H) {
-                               t |= (data->read_value(data,
-                                     data->REG_TOLERANCE_H[i]) & 0x70) >> 1;
+                               err = nct6775_read_value(data, data->REG_TOLERANCE_H[i], &reg);
+                               if (err)
+                                       return err;
+                               t |= (reg & 0x70) >> 1;
                        }
                        data->target_speed_tolerance[i] = t;
                }
 
-               data->temp_tolerance[1][i] =
-                       data->read_value(data,
-                                        data->REG_CRITICAL_TEMP_TOLERANCE[i]);
+               err = nct6775_read_value(data, data->REG_CRITICAL_TEMP_TOLERANCE[i], &reg);
+               if (err)
+                       return err;
+               data->temp_tolerance[1][i] = reg;
 
-               reg = data->read_value(data, data->REG_TEMP_SEL[i]);
+               err = nct6775_read_value(data, data->REG_TEMP_SEL[i], &reg);
+               if (err)
+                       return err;
                data->pwm_temp_sel[i] = reg & 0x1f;
                /* If fan can stop, report floor as 0 */
                if (reg & 0x80)
@@ -1854,7 +1367,9 @@ static void nct6775_update_pwm(struct device *dev)
                if (!data->REG_WEIGHT_TEMP_SEL[i])
                        continue;
 
-               reg = data->read_value(data, data->REG_WEIGHT_TEMP_SEL[i]);
+               err = nct6775_read_value(data, data->REG_WEIGHT_TEMP_SEL[i], &reg);
+               if (err)
+                       return err;
                data->pwm_weight_temp_sel[i] = reg & 0x1f;
                /* If weight is disabled, report weight source as 0 */
                if (!(reg & 0x80))
@@ -1862,29 +1377,37 @@ static void nct6775_update_pwm(struct device *dev)
 
                /* Weight temp data */
                for (j = 0; j < ARRAY_SIZE(data->weight_temp); j++) {
-                       data->weight_temp[j][i] = data->read_value(data,
-                                                                  data->REG_WEIGHT_TEMP[j][i]);
+                       err = nct6775_read_value(data, data->REG_WEIGHT_TEMP[j][i], &reg);
+                       if (err)
+                               return err;
+                       data->weight_temp[j][i] = reg;
                }
        }
+
+       return 0;
 }
 
-static void nct6775_update_pwm_limits(struct device *dev)
+static int nct6775_update_pwm_limits(struct device *dev)
 {
        struct nct6775_data *data = dev_get_drvdata(dev);
-       int i, j;
-       u8 reg;
-       u16 reg_t;
+       int i, j, err;
+       u16 reg, reg_t;
 
        for (i = 0; i < data->pwm_num; i++) {
                if (!(data->has_pwm & BIT(i)))
                        continue;
 
                for (j = 0; j < ARRAY_SIZE(data->fan_time); j++) {
-                       data->fan_time[j][i] =
-                         data->read_value(data, data->REG_FAN_TIME[j][i]);
+                       err = nct6775_read_value(data, data->REG_FAN_TIME[j][i], &reg);
+                       if (err)
+                               return err;
+                       data->fan_time[j][i] = reg;
                }
 
-               reg_t = data->read_value(data, data->REG_TARGET[i]);
+               err = nct6775_read_value(data, data->REG_TARGET[i], &reg_t);
+               if (err)
+                       return err;
+
                /* Update only in matching mode or if never updated */
                if (!data->target_temp[i] ||
                    data->pwm_enable[i] == thermal_cruise)
@@ -1892,29 +1415,37 @@ static void nct6775_update_pwm_limits(struct device *dev)
                if (!data->target_speed[i] ||
                    data->pwm_enable[i] == speed_cruise) {
                        if (data->REG_TOLERANCE_H) {
-                               reg_t |= (data->read_value(data,
-                                       data->REG_TOLERANCE_H[i]) & 0x0f) << 8;
+                               err = nct6775_read_value(data, data->REG_TOLERANCE_H[i], &reg);
+                               if (err)
+                                       return err;
+                               reg_t |= (reg & 0x0f) << 8;
                        }
                        data->target_speed[i] = reg_t;
                }
 
                for (j = 0; j < data->auto_pwm_num; j++) {
-                       data->auto_pwm[i][j] =
-                         data->read_value(data,
-                                          NCT6775_AUTO_PWM(data, i, j));
-                       data->auto_temp[i][j] =
-                         data->read_value(data,
-                                          NCT6775_AUTO_TEMP(data, i, j));
+                       err = nct6775_read_value(data, NCT6775_AUTO_PWM(data, i, j), &reg);
+                       if (err)
+                               return err;
+                       data->auto_pwm[i][j] = reg;
+
+                       err = nct6775_read_value(data, NCT6775_AUTO_TEMP(data, i, j), &reg);
+                       if (err)
+                               return err;
+                       data->auto_temp[i][j] = reg;
                }
 
                /* critical auto_pwm temperature data */
-               data->auto_temp[i][data->auto_pwm_num] =
-                       data->read_value(data, data->REG_CRITICAL_TEMP[i]);
+               err = nct6775_read_value(data, data->REG_CRITICAL_TEMP[i], &reg);
+               if (err)
+                       return err;
+               data->auto_temp[i][data->auto_pwm_num] = reg;
 
                switch (data->kind) {
                case nct6775:
-                       reg = data->read_value(data,
-                                              NCT6775_REG_CRITICAL_ENAB[i]);
+                       err = nct6775_read_value(data, NCT6775_REG_CRITICAL_ENAB[i], &reg);
+                       if (err)
+                               return err;
                        data->auto_pwm[i][data->auto_pwm_num] =
                                                (reg & 0x02) ? 0xff : 0x00;
                        break;
@@ -1931,120 +1462,158 @@ static void nct6775_update_pwm_limits(struct device *dev)
                case nct6796:
                case nct6797:
                case nct6798:
-                       reg = data->read_value(data,
-                                       data->REG_CRITICAL_PWM_ENABLE[i]);
-                       if (reg & data->CRITICAL_PWM_ENABLE_MASK)
-                               reg = data->read_value(data,
-                                       data->REG_CRITICAL_PWM[i]);
-                       else
+                       err = nct6775_read_value(data, data->REG_CRITICAL_PWM_ENABLE[i], &reg);
+                       if (err)
+                               return err;
+                       if (reg & data->CRITICAL_PWM_ENABLE_MASK) {
+                               err = nct6775_read_value(data, data->REG_CRITICAL_PWM[i], &reg);
+                               if (err)
+                                       return err;
+                       } else {
                                reg = 0xff;
+                       }
                        data->auto_pwm[i][data->auto_pwm_num] = reg;
                        break;
                }
        }
+
+       return 0;
 }
 
 static struct nct6775_data *nct6775_update_device(struct device *dev)
 {
        struct nct6775_data *data = dev_get_drvdata(dev);
-       int i, j;
+       int i, j, err = 0;
+       u16 reg;
 
        mutex_lock(&data->update_lock);
 
        if (time_after(jiffies, data->last_updated + HZ + HZ / 2)
            || !data->valid) {
                /* Fan clock dividers */
-               nct6775_update_fan_div_common(data);
+               err = nct6775_update_fan_div_common(data);
+               if (err)
+                       goto out;
 
                /* Measured voltages and limits */
                for (i = 0; i < data->in_num; i++) {
                        if (!(data->have_in & BIT(i)))
                                continue;
 
-                       data->in[i][0] = data->read_value(data,
-                                                         data->REG_VIN[i]);
-                       data->in[i][1] = data->read_value(data,
-                                         data->REG_IN_MINMAX[0][i]);
-                       data->in[i][2] = data->read_value(data,
-                                         data->REG_IN_MINMAX[1][i]);
+                       err = nct6775_read_value(data, data->REG_VIN[i], &reg);
+                       if (err)
+                               goto out;
+                       data->in[i][0] = reg;
+
+                       err = nct6775_read_value(data, data->REG_IN_MINMAX[0][i], &reg);
+                       if (err)
+                               goto out;
+                       data->in[i][1] = reg;
+
+                       err = nct6775_read_value(data, data->REG_IN_MINMAX[1][i], &reg);
+                       if (err)
+                               goto out;
+                       data->in[i][2] = reg;
                }
 
                /* Measured fan speeds and limits */
                for (i = 0; i < ARRAY_SIZE(data->rpm); i++) {
-                       u16 reg;
-
                        if (!(data->has_fan & BIT(i)))
                                continue;
 
-                       reg = data->read_value(data, data->REG_FAN[i]);
+                       err = nct6775_read_value(data, data->REG_FAN[i], &reg);
+                       if (err)
+                               goto out;
                        data->rpm[i] = data->fan_from_reg(reg,
                                                          data->fan_div[i]);
 
-                       if (data->has_fan_min & BIT(i))
-                               data->fan_min[i] = data->read_value(data,
-                                          data->REG_FAN_MIN[i]);
+                       if (data->has_fan_min & BIT(i)) {
+                               err = nct6775_read_value(data, data->REG_FAN_MIN[i], &reg);
+                               if (err)
+                                       goto out;
+                               data->fan_min[i] = reg;
+                       }
 
                        if (data->REG_FAN_PULSES[i]) {
-                               data->fan_pulses[i] =
-                                 (data->read_value(data,
-                                                   data->REG_FAN_PULSES[i])
-                                  >> data->FAN_PULSE_SHIFT[i]) & 0x03;
+                               err = nct6775_read_value(data, data->REG_FAN_PULSES[i], &reg);
+                               if (err)
+                                       goto out;
+                               data->fan_pulses[i] = (reg >> data->FAN_PULSE_SHIFT[i]) & 0x03;
                        }
 
-                       nct6775_select_fan_div(dev, data, i, reg);
+                       err = nct6775_select_fan_div(dev, data, i, reg);
+                       if (err)
+                               goto out;
                }
 
-               nct6775_update_pwm(dev);
-               nct6775_update_pwm_limits(dev);
+               err = nct6775_update_pwm(dev);
+               if (err)
+                       goto out;
+
+               err = nct6775_update_pwm_limits(dev);
+               if (err)
+                       goto out;
 
                /* Measured temperatures and limits */
                for (i = 0; i < NUM_TEMP; i++) {
                        if (!(data->have_temp & BIT(i)))
                                continue;
                        for (j = 0; j < ARRAY_SIZE(data->reg_temp); j++) {
-                               if (data->reg_temp[j][i])
-                                       data->temp[j][i] = nct6775_read_temp(data,
-                                                                            data->reg_temp[j][i]);
+                               if (data->reg_temp[j][i]) {
+                                       err = nct6775_read_temp(data, data->reg_temp[j][i], &reg);
+                                       if (err)
+                                               goto out;
+                                       data->temp[j][i] = reg;
+                               }
                        }
                        if (i >= NUM_TEMP_FIXED ||
                            !(data->have_temp_fixed & BIT(i)))
                                continue;
-                       data->temp_offset[i] = data->read_value(data,
-                                                                  data->REG_TEMP_OFFSET[i]);
+                       err = nct6775_read_value(data, data->REG_TEMP_OFFSET[i], &reg);
+                       if (err)
+                               goto out;
+                       data->temp_offset[i] = reg;
                }
 
                for (i = 0; i < NUM_TSI_TEMP; i++) {
                        if (!(data->have_tsi_temp & BIT(i)))
                                continue;
-                       data->tsi_temp[i] = data->read_value(data, data->REG_TSI_TEMP[i]);
+                       err = nct6775_read_value(data, data->REG_TSI_TEMP[i], &reg);
+                       if (err)
+                               goto out;
+                       data->tsi_temp[i] = reg;
                }
 
                data->alarms = 0;
                for (i = 0; i < NUM_REG_ALARM; i++) {
-                       u8 alarm;
+                       u16 alarm;
 
                        if (!data->REG_ALARM[i])
                                continue;
-                       alarm = data->read_value(data, data->REG_ALARM[i]);
+                       err = nct6775_read_value(data, data->REG_ALARM[i], &alarm);
+                       if (err)
+                               goto out;
                        data->alarms |= ((u64)alarm) << (i << 3);
                }
 
                data->beeps = 0;
                for (i = 0; i < NUM_REG_BEEP; i++) {
-                       u8 beep;
+                       u16 beep;
 
                        if (!data->REG_BEEP[i])
                                continue;
-                       beep = data->read_value(data, data->REG_BEEP[i]);
+                       err = nct6775_read_value(data, data->REG_BEEP[i], &beep);
+                       if (err)
+                               goto out;
                        data->beeps |= ((u64)beep) << (i << 3);
                }
 
                data->last_updated = jiffies;
                data->valid = true;
        }
-
+out:
        mutex_unlock(&data->update_lock);
-       return data;
+       return err ? ERR_PTR(err) : data;
 }
 
 /*
@@ -2058,6 +1627,9 @@ show_in_reg(struct device *dev, struct device_attribute *attr, char *buf)
        int index = sattr->index;
        int nr = sattr->nr;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%ld\n", in_from_reg(data->in[nr][index], nr));
 }
 
@@ -2077,34 +1649,39 @@ store_in_reg(struct device *dev, struct device_attribute *attr, const char *buf,
                return err;
        mutex_lock(&data->update_lock);
        data->in[nr][index] = in_to_reg(val, nr);
-       data->write_value(data, data->REG_IN_MINMAX[index - 1][nr],
-                         data->in[nr][index]);
+       err = nct6775_write_value(data, data->REG_IN_MINMAX[index - 1][nr], data->in[nr][index]);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
-static ssize_t
-show_alarm(struct device *dev, struct device_attribute *attr, char *buf)
+ssize_t
+nct6775_show_alarm(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
-       int nr = data->ALARM_BITS[sattr->index];
+       int nr;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       nr = data->ALARM_BITS[sattr->index];
        return sprintf(buf, "%u\n",
                       (unsigned int)((data->alarms >> nr) & 0x01));
 }
+EXPORT_SYMBOL_GPL(nct6775_show_alarm);
 
 static int find_temp_source(struct nct6775_data *data, int index, int count)
 {
        int source = data->temp_src[index];
-       int nr;
+       int nr, err;
 
        for (nr = 0; nr < count; nr++) {
-               int src;
+               u16 src;
 
-               src = data->read_value(data,
-                                      data->REG_TEMP_SOURCE[nr]) & 0x1f;
-               if (src == source)
+               err = nct6775_read_value(data, data->REG_TEMP_SOURCE[nr], &src);
+               if (err)
+                       return err;
+               if ((src & 0x1f) == source)
                        return nr;
        }
        return -ENODEV;
@@ -2118,6 +1695,9 @@ show_temp_alarm(struct device *dev, struct device_attribute *attr, char *buf)
        unsigned int alarm = 0;
        int nr;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        /*
         * For temperatures, there is no fixed mapping from registers to alarm
         * bits. Alarm bits are determined by the temperature source mapping.
@@ -2131,20 +1711,25 @@ show_temp_alarm(struct device *dev, struct device_attribute *attr, char *buf)
        return sprintf(buf, "%u\n", alarm);
 }
 
-static ssize_t
-show_beep(struct device *dev, struct device_attribute *attr, char *buf)
+ssize_t
+nct6775_show_beep(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        struct nct6775_data *data = nct6775_update_device(dev);
-       int nr = data->BEEP_BITS[sattr->index];
+       int nr;
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       nr = data->BEEP_BITS[sattr->index];
 
        return sprintf(buf, "%u\n",
                       (unsigned int)((data->beeps >> nr) & 0x01));
 }
+EXPORT_SYMBOL_GPL(nct6775_show_beep);
 
-static ssize_t
-store_beep(struct device *dev, struct device_attribute *attr, const char *buf,
-          size_t count)
+ssize_t
+nct6775_store_beep(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
        struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
        struct nct6775_data *data = dev_get_drvdata(dev);
@@ -2164,11 +1749,12 @@ store_beep(struct device *dev, struct device_attribute *attr, const char *buf,
                data->beeps |= (1ULL << nr);
        else
                data->beeps &= ~(1ULL << nr);
-       data->write_value(data, data->REG_BEEP[regindex],
-                         (data->beeps >> (regindex << 3)) & 0xff);
+       err = nct6775_write_value(data, data->REG_BEEP[regindex],
+                                 (data->beeps >> (regindex << 3)) & 0xff);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
+EXPORT_SYMBOL_GPL(nct6775_store_beep);
 
 static ssize_t
 show_temp_beep(struct device *dev, struct device_attribute *attr, char *buf)
@@ -2178,6 +1764,9 @@ show_temp_beep(struct device *dev, struct device_attribute *attr, char *buf)
        unsigned int beep = 0;
        int nr;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        /*
         * For temperatures, there is no fixed mapping from registers to beep
         * enable bits. Beep enable bits are determined by the temperature
@@ -2220,11 +1809,11 @@ store_temp_beep(struct device *dev, struct device_attribute *attr,
                data->beeps |= (1ULL << bit);
        else
                data->beeps &= ~(1ULL << bit);
-       data->write_value(data, data->REG_BEEP[regindex],
-                         (data->beeps >> (regindex << 3)) & 0xff);
+       err = nct6775_write_value(data, data->REG_BEEP[regindex],
+                                 (data->beeps >> (regindex << 3)) & 0xff);
        mutex_unlock(&data->update_lock);
 
-       return count;
+       return err ? : count;
 }
 
 static umode_t nct6775_in_is_visible(struct kobject *kobj,
@@ -2237,17 +1826,14 @@ static umode_t nct6775_in_is_visible(struct kobject *kobj,
        if (!(data->have_in & BIT(in)))
                return 0;
 
-       return attr->mode;
+       return nct6775_attr_mode(data, attr);
 }
 
-SENSOR_TEMPLATE_2(in_input, "in%d_input", S_IRUGO, show_in_reg, NULL, 0, 0);
-SENSOR_TEMPLATE(in_alarm, "in%d_alarm", S_IRUGO, show_alarm, NULL, 0);
-SENSOR_TEMPLATE(in_beep, "in%d_beep", S_IWUSR | S_IRUGO, show_beep, store_beep,
-               0);
-SENSOR_TEMPLATE_2(in_min, "in%d_min", S_IWUSR | S_IRUGO, show_in_reg,
-                 store_in_reg, 0, 1);
-SENSOR_TEMPLATE_2(in_max, "in%d_max", S_IWUSR | S_IRUGO, show_in_reg,
-                 store_in_reg, 0, 2);
+SENSOR_TEMPLATE_2(in_input, "in%d_input", 0444, show_in_reg, NULL, 0, 0);
+SENSOR_TEMPLATE(in_alarm, "in%d_alarm", 0444, nct6775_show_alarm, NULL, 0);
+SENSOR_TEMPLATE(in_beep, "in%d_beep", 0644, nct6775_show_beep, nct6775_store_beep, 0);
+SENSOR_TEMPLATE_2(in_min, "in%d_min", 0644, show_in_reg, store_in_reg, 0, 1);
+SENSOR_TEMPLATE_2(in_max, "in%d_max", 0644, show_in_reg, store_in_reg, 0, 2);
 
 /*
  * nct6775_in_is_visible uses the index into the following array
@@ -2275,6 +1861,9 @@ show_fan(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->rpm[nr]);
 }
 
@@ -2285,6 +1874,9 @@ show_fan_min(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n",
                       data->fan_from_reg_min(data->fan_min[nr],
                                              data->fan_div[nr]));
@@ -2297,6 +1889,9 @@ show_fan_div(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%u\n", div_from_reg(data->fan_div[nr]));
 }
 
@@ -2382,16 +1977,18 @@ write_div:
                        nr + 1, div_from_reg(data->fan_div[nr]),
                        div_from_reg(new_div));
                data->fan_div[nr] = new_div;
-               nct6775_write_fan_div_common(data, nr);
+               err = nct6775_write_fan_div_common(data, nr);
+               if (err)
+                       goto write_min;
                /* Give the chip time to sample a new speed value */
                data->last_updated = jiffies;
        }
 
 write_min:
-       data->write_value(data, data->REG_FAN_MIN[nr], data->fan_min[nr]);
+       err = nct6775_write_value(data, data->REG_FAN_MIN[nr], data->fan_min[nr]);
        mutex_unlock(&data->update_lock);
 
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -2399,8 +1996,12 @@ show_fan_pulses(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
-       int p = data->fan_pulses[sattr->index];
+       int p;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       p = data->fan_pulses[sattr->index];
        return sprintf(buf, "%d\n", p ? : 4);
 }
 
@@ -2413,7 +2014,7 @@ store_fan_pulses(struct device *dev, struct device_attribute *attr,
        int nr = sattr->index;
        unsigned long val;
        int err;
-       u8 reg;
+       u16 reg;
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -2424,13 +2025,16 @@ store_fan_pulses(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->fan_pulses[nr] = val & 3;
-       reg = data->read_value(data, data->REG_FAN_PULSES[nr]);
+       err = nct6775_read_value(data, data->REG_FAN_PULSES[nr], &reg);
+       if (err)
+               goto out;
        reg &= ~(0x03 << data->FAN_PULSE_SHIFT[nr]);
        reg |= (val & 3) << data->FAN_PULSE_SHIFT[nr];
-       data->write_value(data, data->REG_FAN_PULSES[nr], reg);
+       err = nct6775_write_value(data, data->REG_FAN_PULSES[nr], reg);
+out:
        mutex_unlock(&data->update_lock);
 
-       return count;
+       return err ? : count;
 }
 
 static umode_t nct6775_fan_is_visible(struct kobject *kobj,
@@ -2455,19 +2059,16 @@ static umode_t nct6775_fan_is_visible(struct kobject *kobj,
        if (nr == 5 && data->kind != nct6775)
                return 0;
 
-       return attr->mode;
+       return nct6775_attr_mode(data, attr);
 }
 
-SENSOR_TEMPLATE(fan_input, "fan%d_input", S_IRUGO, show_fan, NULL, 0);
-SENSOR_TEMPLATE(fan_alarm, "fan%d_alarm", S_IRUGO, show_alarm, NULL,
-               FAN_ALARM_BASE);
-SENSOR_TEMPLATE(fan_beep, "fan%d_beep", S_IWUSR | S_IRUGO, show_beep,
-               store_beep, FAN_ALARM_BASE);
-SENSOR_TEMPLATE(fan_pulses, "fan%d_pulses", S_IWUSR | S_IRUGO, show_fan_pulses,
-               store_fan_pulses, 0);
-SENSOR_TEMPLATE(fan_min, "fan%d_min", S_IWUSR | S_IRUGO, show_fan_min,
-               store_fan_min, 0);
-SENSOR_TEMPLATE(fan_div, "fan%d_div", S_IRUGO, show_fan_div, NULL, 0);
+SENSOR_TEMPLATE(fan_input, "fan%d_input", 0444, show_fan, NULL, 0);
+SENSOR_TEMPLATE(fan_alarm, "fan%d_alarm", 0444, nct6775_show_alarm, NULL, FAN_ALARM_BASE);
+SENSOR_TEMPLATE(fan_beep, "fan%d_beep", 0644, nct6775_show_beep,
+               nct6775_store_beep, FAN_ALARM_BASE);
+SENSOR_TEMPLATE(fan_pulses, "fan%d_pulses", 0644, show_fan_pulses, store_fan_pulses, 0);
+SENSOR_TEMPLATE(fan_min, "fan%d_min", 0644, show_fan_min, store_fan_min, 0);
+SENSOR_TEMPLATE(fan_div, "fan%d_div", 0444, show_fan_div, NULL, 0);
 
 /*
  * nct6775_fan_is_visible uses the index into the following array
@@ -2497,6 +2098,9 @@ show_temp_label(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%s\n", data->temp_label[data->temp_src[nr]]);
 }
 
@@ -2508,6 +2112,9 @@ show_temp(struct device *dev, struct device_attribute *attr, char *buf)
        int nr = sattr->nr;
        int index = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", LM75_TEMP_FROM_REG(data->temp[index][nr]));
 }
 
@@ -2528,10 +2135,9 @@ store_temp(struct device *dev, struct device_attribute *attr, const char *buf,
 
        mutex_lock(&data->update_lock);
        data->temp[index][nr] = LM75_TEMP_TO_REG(val);
-       nct6775_write_temp(data, data->reg_temp[index][nr],
-                          data->temp[index][nr]);
+       err = nct6775_write_temp(data, data->reg_temp[index][nr], data->temp[index][nr]);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -2540,6 +2146,9 @@ show_temp_offset(struct device *dev, struct device_attribute *attr, char *buf)
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->temp_offset[sattr->index] * 1000);
 }
 
@@ -2561,10 +2170,10 @@ store_temp_offset(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->temp_offset[nr] = val;
-       data->write_value(data, data->REG_TEMP_OFFSET[nr], val);
+       err = nct6775_write_value(data, data->REG_TEMP_OFFSET[nr], val);
        mutex_unlock(&data->update_lock);
 
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -2574,6 +2183,9 @@ show_temp_type(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", (int)data->temp_type[nr]);
 }
 
@@ -2586,7 +2198,11 @@ store_temp_type(struct device *dev, struct device_attribute *attr,
        int nr = sattr->index;
        unsigned long val;
        int err;
-       u8 vbat, diode, vbit, dbit;
+       u8 vbit, dbit;
+       u16 vbat, diode;
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -2600,12 +2216,21 @@ store_temp_type(struct device *dev, struct device_attribute *attr,
        data->temp_type[nr] = val;
        vbit = 0x02 << nr;
        dbit = data->DIODE_MASK << nr;
-       vbat = data->read_value(data, data->REG_VBAT) & ~vbit;
-       diode = data->read_value(data, data->REG_DIODE) & ~dbit;
-       switch (val) {
-       case 1: /* CPU diode (diode, current mode) */
-               vbat |= vbit;
-               diode |= dbit;
+
+       err = nct6775_read_value(data, data->REG_VBAT, &vbat);
+       if (err)
+               goto out;
+       vbat &= ~vbit;
+
+       err = nct6775_read_value(data, data->REG_DIODE, &diode);
+       if (err)
+               goto out;
+       diode &= ~dbit;
+
+       switch (val) {
+       case 1: /* CPU diode (diode, current mode) */
+               vbat |= vbit;
+               diode |= dbit;
                break;
        case 3: /* diode, voltage mode */
                vbat |= dbit;
@@ -2613,11 +2238,13 @@ store_temp_type(struct device *dev, struct device_attribute *attr,
        case 4: /* thermistor */
                break;
        }
-       data->write_value(data, data->REG_VBAT, vbat);
-       data->write_value(data, data->REG_DIODE, diode);
-
+       err = nct6775_write_value(data, data->REG_VBAT, vbat);
+       if (err)
+               goto out;
+       err = nct6775_write_value(data, data->REG_DIODE, diode);
+out:
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static umode_t nct6775_temp_is_visible(struct kobject *kobj,
@@ -2656,26 +2283,19 @@ static umode_t nct6775_temp_is_visible(struct kobject *kobj,
        if (nr > 7 && !(data->have_temp_fixed & BIT(temp)))
                return 0;
 
-       return attr->mode;
+       return nct6775_attr_mode(data, attr);
 }
 
-SENSOR_TEMPLATE_2(temp_input, "temp%d_input", S_IRUGO, show_temp, NULL, 0, 0);
-SENSOR_TEMPLATE(temp_label, "temp%d_label", S_IRUGO, show_temp_label, NULL, 0);
-SENSOR_TEMPLATE_2(temp_max, "temp%d_max", S_IRUGO | S_IWUSR, show_temp,
-                 store_temp, 0, 1);
-SENSOR_TEMPLATE_2(temp_max_hyst, "temp%d_max_hyst", S_IRUGO | S_IWUSR,
-                 show_temp, store_temp, 0, 2);
-SENSOR_TEMPLATE_2(temp_crit, "temp%d_crit", S_IRUGO | S_IWUSR, show_temp,
-                 store_temp, 0, 3);
-SENSOR_TEMPLATE_2(temp_lcrit, "temp%d_lcrit", S_IRUGO | S_IWUSR, show_temp,
-                 store_temp, 0, 4);
-SENSOR_TEMPLATE(temp_offset, "temp%d_offset", S_IRUGO | S_IWUSR,
-               show_temp_offset, store_temp_offset, 0);
-SENSOR_TEMPLATE(temp_type, "temp%d_type", S_IRUGO | S_IWUSR, show_temp_type,
-               store_temp_type, 0);
-SENSOR_TEMPLATE(temp_alarm, "temp%d_alarm", S_IRUGO, show_temp_alarm, NULL, 0);
-SENSOR_TEMPLATE(temp_beep, "temp%d_beep", S_IRUGO | S_IWUSR, show_temp_beep,
-               store_temp_beep, 0);
+SENSOR_TEMPLATE_2(temp_input, "temp%d_input", 0444, show_temp, NULL, 0, 0);
+SENSOR_TEMPLATE(temp_label, "temp%d_label", 0444, show_temp_label, NULL, 0);
+SENSOR_TEMPLATE_2(temp_max, "temp%d_max", 0644, show_temp, store_temp, 0, 1);
+SENSOR_TEMPLATE_2(temp_max_hyst, "temp%d_max_hyst", 0644, show_temp, store_temp, 0, 2);
+SENSOR_TEMPLATE_2(temp_crit, "temp%d_crit", 0644, show_temp, store_temp, 0, 3);
+SENSOR_TEMPLATE_2(temp_lcrit, "temp%d_lcrit", 0644, show_temp, store_temp, 0, 4);
+SENSOR_TEMPLATE(temp_offset, "temp%d_offset", 0644, show_temp_offset, store_temp_offset, 0);
+SENSOR_TEMPLATE(temp_type, "temp%d_type", 0644, show_temp_type, store_temp_type, 0);
+SENSOR_TEMPLATE(temp_alarm, "temp%d_alarm", 0444, show_temp_alarm, NULL, 0);
+SENSOR_TEMPLATE(temp_beep, "temp%d_beep", 0644, show_temp_beep, store_temp_beep, 0);
 
 /*
  * nct6775_temp_is_visible uses the index into the following array
@@ -2707,6 +2327,9 @@ static ssize_t show_tsi_temp(struct device *dev, struct device_attribute *attr,
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sysfs_emit(buf, "%u\n", tsi_temp_from_reg(data->tsi_temp[sattr->index]));
 }
 
@@ -2727,7 +2350,7 @@ static umode_t nct6775_tsi_temp_is_visible(struct kobject *kobj, struct attribut
        struct nct6775_data *data = dev_get_drvdata(dev);
        int temp = index / 2;
 
-       return (data->have_tsi_temp & BIT(temp)) ? attr->mode : 0;
+       return (data->have_tsi_temp & BIT(temp)) ? nct6775_attr_mode(data, attr) : 0;
 }
 
 /*
@@ -2746,6 +2369,9 @@ show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->pwm_mode[sattr->index]);
 }
 
@@ -2758,7 +2384,7 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
        int nr = sattr->index;
        unsigned long val;
        int err;
-       u8 reg;
+       u16 reg;
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -2776,13 +2402,16 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->pwm_mode[nr] = val;
-       reg = data->read_value(data, data->REG_PWM_MODE[nr]);
+       err = nct6775_read_value(data, data->REG_PWM_MODE[nr], &reg);
+       if (err)
+               goto out;
        reg &= ~data->PWM_MODE_MASK[nr];
        if (!val)
                reg |= data->PWM_MODE_MASK[nr];
-       data->write_value(data, data->REG_PWM_MODE[nr], reg);
+       err = nct6775_write_value(data, data->REG_PWM_MODE[nr], reg);
+out:
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -2792,16 +2421,23 @@ show_pwm(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
        int nr = sattr->nr;
        int index = sattr->index;
-       int pwm;
+       int err;
+       u16 pwm;
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
 
        /*
         * For automatic fan control modes, show current pwm readings.
         * Otherwise, show the configured value.
         */
-       if (index == 0 && data->pwm_enable[nr] > manual)
-               pwm = data->read_value(data, data->REG_PWM_READ[nr]);
-       else
+       if (index == 0 && data->pwm_enable[nr] > manual) {
+               err = nct6775_read_value(data, data->REG_PWM_READ[nr], &pwm);
+               if (err)
+                       return err;
+       } else {
                pwm = data->pwm[index][nr];
+       }
 
        return sprintf(buf, "%d\n", pwm);
 }
@@ -2819,7 +2455,7 @@ store_pwm(struct device *dev, struct device_attribute *attr, const char *buf,
        int maxval[7]
          = { 255, 255, data->pwm[3][nr] ? : 255, 255, 255, 255, 255 };
        int err;
-       u8 reg;
+       u16 reg;
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -2828,16 +2464,21 @@ store_pwm(struct device *dev, struct device_attribute *attr, const char *buf,
 
        mutex_lock(&data->update_lock);
        data->pwm[index][nr] = val;
-       data->write_value(data, data->REG_PWM[index][nr], val);
+       err = nct6775_write_value(data, data->REG_PWM[index][nr], val);
+       if (err)
+               goto out;
        if (index == 2) { /* floor: disable if val == 0 */
-               reg = data->read_value(data, data->REG_TEMP_SEL[nr]);
+               err = nct6775_read_value(data, data->REG_TEMP_SEL[nr], &reg);
+               if (err)
+                       goto out;
                reg &= 0x7f;
                if (val)
                        reg |= 0x80;
-               data->write_value(data, data->REG_TEMP_SEL[nr], reg);
+               err = nct6775_write_value(data, data->REG_TEMP_SEL[nr], reg);
        }
+out:
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 /* Returns 0 if OK, -EINVAL otherwise */
@@ -2864,40 +2505,54 @@ static int check_trip_points(struct nct6775_data *data, int nr)
        return 0;
 }
 
-static void pwm_update_registers(struct nct6775_data *data, int nr)
+static int pwm_update_registers(struct nct6775_data *data, int nr)
 {
-       u8 reg;
+       u16 reg;
+       int err;
 
        switch (data->pwm_enable[nr]) {
        case off:
        case manual:
                break;
        case speed_cruise:
-               reg = data->read_value(data, data->REG_FAN_MODE[nr]);
+               err = nct6775_read_value(data, data->REG_FAN_MODE[nr], &reg);
+               if (err)
+                       return err;
                reg = (reg & ~data->tolerance_mask) |
                  (data->target_speed_tolerance[nr] & data->tolerance_mask);
-               data->write_value(data, data->REG_FAN_MODE[nr], reg);
-               data->write_value(data, data->REG_TARGET[nr],
-                                   data->target_speed[nr] & 0xff);
+               err = nct6775_write_value(data, data->REG_FAN_MODE[nr], reg);
+               if (err)
+                       return err;
+               err = nct6775_write_value(data, data->REG_TARGET[nr],
+                                         data->target_speed[nr] & 0xff);
+               if (err)
+                       return err;
                if (data->REG_TOLERANCE_H) {
                        reg = (data->target_speed[nr] >> 8) & 0x0f;
                        reg |= (data->target_speed_tolerance[nr] & 0x38) << 1;
-                       data->write_value(data,
-                                         data->REG_TOLERANCE_H[nr],
-                                         reg);
+                       err = nct6775_write_value(data, data->REG_TOLERANCE_H[nr], reg);
+                       if (err)
+                               return err;
                }
                break;
        case thermal_cruise:
-               data->write_value(data, data->REG_TARGET[nr],
-                                 data->target_temp[nr]);
+               err = nct6775_write_value(data, data->REG_TARGET[nr], data->target_temp[nr]);
+               if (err)
+                       return err;
                fallthrough;
        default:
-               reg = data->read_value(data, data->REG_FAN_MODE[nr]);
+               err = nct6775_read_value(data, data->REG_FAN_MODE[nr], &reg);
+               if (err)
+                       return err;
                reg = (reg & ~data->tolerance_mask) |
                  data->temp_tolerance[0][nr];
-               data->write_value(data, data->REG_FAN_MODE[nr], reg);
+               err = nct6775_write_value(data, data->REG_FAN_MODE[nr], reg);
+               if (err)
+                       return err;
                break;
        }
+
+       return 0;
 }
 
 static ssize_t
@@ -2906,6 +2561,9 @@ show_pwm_enable(struct device *dev, struct device_attribute *attr, char *buf)
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->pwm_enable[sattr->index]);
 }
 
@@ -2943,15 +2601,22 @@ store_pwm_enable(struct device *dev, struct device_attribute *attr,
                 * turn off pwm control: select manual mode, set pwm to maximum
                 */
                data->pwm[0][nr] = 255;
-               data->write_value(data, data->REG_PWM[0][nr], 255);
+               err = nct6775_write_value(data, data->REG_PWM[0][nr], 255);
+               if (err)
+                       goto out;
        }
-       pwm_update_registers(data, nr);
-       reg = data->read_value(data, data->REG_FAN_MODE[nr]);
+       err = pwm_update_registers(data, nr);
+       if (err)
+               goto out;
+       err = nct6775_read_value(data, data->REG_FAN_MODE[nr], &reg);
+       if (err)
+               goto out;
        reg &= 0x0f;
        reg |= pwm_enable_to_reg(val) << 4;
-       data->write_value(data, data->REG_FAN_MODE[nr], reg);
+       err = nct6775_write_value(data, data->REG_FAN_MODE[nr], reg);
+out:
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -2978,6 +2643,9 @@ show_pwm_temp_sel(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int index = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return show_pwm_temp_sel_common(data, buf, data->pwm_temp_sel[index]);
 }
 
@@ -2989,7 +2657,11 @@ store_pwm_temp_sel(struct device *dev, struct device_attribute *attr,
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
        unsigned long val;
-       int err, reg, src;
+       int err, src;
+       u16 reg;
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -3002,13 +2674,16 @@ store_pwm_temp_sel(struct device *dev, struct device_attribute *attr,
        mutex_lock(&data->update_lock);
        src = data->temp_src[val - 1];
        data->pwm_temp_sel[nr] = src;
-       reg = data->read_value(data, data->REG_TEMP_SEL[nr]);
+       err = nct6775_read_value(data, data->REG_TEMP_SEL[nr], &reg);
+       if (err)
+               goto out;
        reg &= 0xe0;
        reg |= src;
-       data->write_value(data, data->REG_TEMP_SEL[nr], reg);
+       err = nct6775_write_value(data, data->REG_TEMP_SEL[nr], reg);
+out:
        mutex_unlock(&data->update_lock);
 
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -3019,6 +2694,9 @@ show_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr,
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int index = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return show_pwm_temp_sel_common(data, buf,
                                        data->pwm_weight_temp_sel[index]);
 }
@@ -3031,7 +2709,11 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr,
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
        unsigned long val;
-       int err, reg, src;
+       int err, src;
+       u16 reg;
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -3047,19 +2729,24 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr,
        if (val) {
                src = data->temp_src[val - 1];
                data->pwm_weight_temp_sel[nr] = src;
-               reg = data->read_value(data, data->REG_WEIGHT_TEMP_SEL[nr]);
+               err = nct6775_read_value(data, data->REG_WEIGHT_TEMP_SEL[nr], &reg);
+               if (err)
+                       goto out;
                reg &= 0xe0;
                reg |= (src | 0x80);
-               data->write_value(data, data->REG_WEIGHT_TEMP_SEL[nr], reg);
+               err = nct6775_write_value(data, data->REG_WEIGHT_TEMP_SEL[nr], reg);
        } else {
                data->pwm_weight_temp_sel[nr] = 0;
-               reg = data->read_value(data, data->REG_WEIGHT_TEMP_SEL[nr]);
+               err = nct6775_read_value(data, data->REG_WEIGHT_TEMP_SEL[nr], &reg);
+               if (err)
+                       goto out;
                reg &= 0x7f;
-               data->write_value(data, data->REG_WEIGHT_TEMP_SEL[nr], reg);
+               err = nct6775_write_value(data, data->REG_WEIGHT_TEMP_SEL[nr], reg);
        }
+out:
        mutex_unlock(&data->update_lock);
 
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -3068,6 +2755,9 @@ show_target_temp(struct device *dev, struct device_attribute *attr, char *buf)
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->target_temp[sattr->index] * 1000);
 }
 
@@ -3090,9 +2780,9 @@ store_target_temp(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->target_temp[nr] = val;
-       pwm_update_registers(data, nr);
+       err = pwm_update_registers(data, nr);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -3102,6 +2792,9 @@ show_target_speed(struct device *dev, struct device_attribute *attr, char *buf)
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n",
                       fan_from_reg16(data->target_speed[nr],
                                      data->fan_div[nr]));
@@ -3127,9 +2820,9 @@ store_target_speed(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->target_speed[nr] = speed;
-       pwm_update_registers(data, nr);
+       err = pwm_update_registers(data, nr);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -3141,6 +2834,9 @@ show_temp_tolerance(struct device *dev, struct device_attribute *attr,
        int nr = sattr->nr;
        int index = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->temp_tolerance[index][nr] * 1000);
 }
 
@@ -3165,13 +2861,11 @@ store_temp_tolerance(struct device *dev, struct device_attribute *attr,
        mutex_lock(&data->update_lock);
        data->temp_tolerance[index][nr] = val;
        if (index)
-               pwm_update_registers(data, nr);
+               err = pwm_update_registers(data, nr);
        else
-               data->write_value(data,
-                                 data->REG_CRITICAL_TEMP_TOLERANCE[nr],
-                                 val);
+               err = nct6775_write_value(data, data->REG_CRITICAL_TEMP_TOLERANCE[nr], val);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 /*
@@ -3188,8 +2882,12 @@ show_speed_tolerance(struct device *dev, struct device_attribute *attr,
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
        int nr = sattr->index;
-       int target = data->target_speed[nr];
-       int tolerance = 0;
+       int target, tolerance = 0;
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       target = data->target_speed[nr];
 
        if (target) {
                int low = target - data->target_speed_tolerance[nr];
@@ -3239,24 +2937,19 @@ store_speed_tolerance(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->target_speed_tolerance[nr] = val;
-       pwm_update_registers(data, nr);
+       err = pwm_update_registers(data, nr);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
-SENSOR_TEMPLATE_2(pwm, "pwm%d", S_IWUSR | S_IRUGO, show_pwm, store_pwm, 0, 0);
-SENSOR_TEMPLATE(pwm_mode, "pwm%d_mode", S_IWUSR | S_IRUGO, show_pwm_mode,
-               store_pwm_mode, 0);
-SENSOR_TEMPLATE(pwm_enable, "pwm%d_enable", S_IWUSR | S_IRUGO, show_pwm_enable,
-               store_pwm_enable, 0);
-SENSOR_TEMPLATE(pwm_temp_sel, "pwm%d_temp_sel", S_IWUSR | S_IRUGO,
-               show_pwm_temp_sel, store_pwm_temp_sel, 0);
-SENSOR_TEMPLATE(pwm_target_temp, "pwm%d_target_temp", S_IWUSR | S_IRUGO,
-               show_target_temp, store_target_temp, 0);
-SENSOR_TEMPLATE(fan_target, "fan%d_target", S_IWUSR | S_IRUGO,
-               show_target_speed, store_target_speed, 0);
-SENSOR_TEMPLATE(fan_tolerance, "fan%d_tolerance", S_IWUSR | S_IRUGO,
-               show_speed_tolerance, store_speed_tolerance, 0);
+SENSOR_TEMPLATE_2(pwm, "pwm%d", 0644, show_pwm, store_pwm, 0, 0);
+SENSOR_TEMPLATE(pwm_mode, "pwm%d_mode", 0644, show_pwm_mode, store_pwm_mode, 0);
+SENSOR_TEMPLATE(pwm_enable, "pwm%d_enable", 0644, show_pwm_enable, store_pwm_enable, 0);
+SENSOR_TEMPLATE(pwm_temp_sel, "pwm%d_temp_sel", 0644, show_pwm_temp_sel, store_pwm_temp_sel, 0);
+SENSOR_TEMPLATE(pwm_target_temp, "pwm%d_target_temp", 0644, show_target_temp, store_target_temp, 0);
+SENSOR_TEMPLATE(fan_target, "fan%d_target", 0644, show_target_speed, store_target_speed, 0);
+SENSOR_TEMPLATE(fan_tolerance, "fan%d_tolerance", 0644, show_speed_tolerance,
+               store_speed_tolerance, 0);
 
 /* Smart Fan registers */
 
@@ -3268,6 +2961,9 @@ show_weight_temp(struct device *dev, struct device_attribute *attr, char *buf)
        int nr = sattr->nr;
        int index = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->weight_temp[index][nr] * 1000);
 }
 
@@ -3290,23 +2986,21 @@ store_weight_temp(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&data->update_lock);
        data->weight_temp[index][nr] = val;
-       data->write_value(data, data->REG_WEIGHT_TEMP[index][nr], val);
+       err = nct6775_write_value(data, data->REG_WEIGHT_TEMP[index][nr], val);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
-SENSOR_TEMPLATE(pwm_weight_temp_sel, "pwm%d_weight_temp_sel", S_IWUSR | S_IRUGO,
-                 show_pwm_weight_temp_sel, store_pwm_weight_temp_sel, 0);
+SENSOR_TEMPLATE(pwm_weight_temp_sel, "pwm%d_weight_temp_sel", 0644,
+               show_pwm_weight_temp_sel, store_pwm_weight_temp_sel, 0);
 SENSOR_TEMPLATE_2(pwm_weight_temp_step, "pwm%d_weight_temp_step",
-                 S_IWUSR | S_IRUGO, show_weight_temp, store_weight_temp, 0, 0);
+                 0644, show_weight_temp, store_weight_temp, 0, 0);
 SENSOR_TEMPLATE_2(pwm_weight_temp_step_tol, "pwm%d_weight_temp_step_tol",
-                 S_IWUSR | S_IRUGO, show_weight_temp, store_weight_temp, 0, 1);
+                 0644, show_weight_temp, store_weight_temp, 0, 1);
 SENSOR_TEMPLATE_2(pwm_weight_temp_step_base, "pwm%d_weight_temp_step_base",
-                 S_IWUSR | S_IRUGO, show_weight_temp, store_weight_temp, 0, 2);
-SENSOR_TEMPLATE_2(pwm_weight_duty_step, "pwm%d_weight_duty_step",
-                 S_IWUSR | S_IRUGO, show_pwm, store_pwm, 0, 5);
-SENSOR_TEMPLATE_2(pwm_weight_duty_base, "pwm%d_weight_duty_base",
-                 S_IWUSR | S_IRUGO, show_pwm, store_pwm, 0, 6);
+                 0644, show_weight_temp, store_weight_temp, 0, 2);
+SENSOR_TEMPLATE_2(pwm_weight_duty_step, "pwm%d_weight_duty_step", 0644, show_pwm, store_pwm, 0, 5);
+SENSOR_TEMPLATE_2(pwm_weight_duty_base, "pwm%d_weight_duty_base", 0644, show_pwm, store_pwm, 0, 6);
 
 static ssize_t
 show_fan_time(struct device *dev, struct device_attribute *attr, char *buf)
@@ -3316,6 +3010,9 @@ show_fan_time(struct device *dev, struct device_attribute *attr, char *buf)
        int nr = sattr->nr;
        int index = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n",
                       step_time_from_reg(data->fan_time[index][nr],
                                          data->pwm_mode[nr]));
@@ -3339,9 +3036,9 @@ store_fan_time(struct device *dev, struct device_attribute *attr,
        val = step_time_to_reg(val, data->pwm_mode[nr]);
        mutex_lock(&data->update_lock);
        data->fan_time[index][nr] = val;
-       data->write_value(data, data->REG_FAN_TIME[index][nr], val);
+       err = nct6775_write_value(data, data->REG_FAN_TIME[index][nr], val);
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -3350,6 +3047,9 @@ show_auto_pwm(struct device *dev, struct device_attribute *attr, char *buf)
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        return sprintf(buf, "%d\n", data->auto_pwm[sattr->nr][sattr->index]);
 }
 
@@ -3363,7 +3063,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr,
        int point = sattr->index;
        unsigned long val;
        int err;
-       u8 reg;
+       u16 reg;
 
        err = kstrtoul(buf, 10, &val);
        if (err < 0)
@@ -3381,21 +3081,20 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr,
        mutex_lock(&data->update_lock);
        data->auto_pwm[nr][point] = val;
        if (point < data->auto_pwm_num) {
-               data->write_value(data,
-                                   NCT6775_AUTO_PWM(data, nr, point),
-                                   data->auto_pwm[nr][point]);
+               err = nct6775_write_value(data, NCT6775_AUTO_PWM(data, nr, point),
+                                         data->auto_pwm[nr][point]);
        } else {
                switch (data->kind) {
                case nct6775:
                        /* disable if needed (pwm == 0) */
-                       reg = data->read_value(data,
-                                              NCT6775_REG_CRITICAL_ENAB[nr]);
+                       err = nct6775_read_value(data, NCT6775_REG_CRITICAL_ENAB[nr], &reg);
+                       if (err)
+                               break;
                        if (val)
                                reg |= 0x02;
                        else
                                reg &= ~0x02;
-                       data->write_value(data, NCT6775_REG_CRITICAL_ENAB[nr],
-                                         reg);
+                       err = nct6775_write_value(data, NCT6775_REG_CRITICAL_ENAB[nr], reg);
                        break;
                case nct6776:
                        break; /* always enabled, nothing to do */
@@ -3409,22 +3108,22 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr,
                case nct6796:
                case nct6797:
                case nct6798:
-                       data->write_value(data, data->REG_CRITICAL_PWM[nr],
-                                           val);
-                       reg = data->read_value(data,
-                                       data->REG_CRITICAL_PWM_ENABLE[nr]);
+                       err = nct6775_write_value(data, data->REG_CRITICAL_PWM[nr], val);
+                       if (err)
+                               break;
+                       err = nct6775_read_value(data, data->REG_CRITICAL_PWM_ENABLE[nr], &reg);
+                       if (err)
+                               break;
                        if (val == 255)
                                reg &= ~data->CRITICAL_PWM_ENABLE_MASK;
                        else
                                reg |= data->CRITICAL_PWM_ENABLE_MASK;
-                       data->write_value(data,
-                                         data->REG_CRITICAL_PWM_ENABLE[nr],
-                                         reg);
+                       err = nct6775_write_value(data, data->REG_CRITICAL_PWM_ENABLE[nr], reg);
                        break;
                }
        }
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static ssize_t
@@ -3435,6 +3134,9 @@ show_auto_temp(struct device *dev, struct device_attribute *attr, char *buf)
        int nr = sattr->nr;
        int point = sattr->index;
 
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
        /*
         * We don't know for sure if the temperature is signed or unsigned.
         * Assume it is unsigned.
@@ -3462,15 +3164,14 @@ store_auto_temp(struct device *dev, struct device_attribute *attr,
        mutex_lock(&data->update_lock);
        data->auto_temp[nr][point] = DIV_ROUND_CLOSEST(val, 1000);
        if (point < data->auto_pwm_num) {
-               data->write_value(data,
-                                   NCT6775_AUTO_TEMP(data, nr, point),
-                                   data->auto_temp[nr][point]);
+               err = nct6775_write_value(data, NCT6775_AUTO_TEMP(data, nr, point),
+                                         data->auto_temp[nr][point]);
        } else {
-               data->write_value(data, data->REG_CRITICAL_TEMP[nr],
-                                   data->auto_temp[nr][point]);
+               err = nct6775_write_value(data, data->REG_CRITICAL_TEMP[nr],
+                                         data->auto_temp[nr][point]);
        }
        mutex_unlock(&data->update_lock);
-       return count;
+       return err ? : count;
 }
 
 static umode_t nct6775_pwm_is_visible(struct kobject *kobj,
@@ -3500,65 +3201,59 @@ static umode_t nct6775_pwm_is_visible(struct kobject *kobj,
                if (api > data->auto_pwm_num)
                        return 0;
        }
-       return attr->mode;
+       return nct6775_attr_mode(data, attr);
 }
 
-SENSOR_TEMPLATE_2(pwm_stop_time, "pwm%d_stop_time", S_IWUSR | S_IRUGO,
-                 show_fan_time, store_fan_time, 0, 0);
-SENSOR_TEMPLATE_2(pwm_step_up_time, "pwm%d_step_up_time", S_IWUSR | S_IRUGO,
+SENSOR_TEMPLATE_2(pwm_stop_time, "pwm%d_stop_time", 0644, show_fan_time, store_fan_time, 0, 0);
+SENSOR_TEMPLATE_2(pwm_step_up_time, "pwm%d_step_up_time", 0644,
                  show_fan_time, store_fan_time, 0, 1);
-SENSOR_TEMPLATE_2(pwm_step_down_time, "pwm%d_step_down_time", S_IWUSR | S_IRUGO,
+SENSOR_TEMPLATE_2(pwm_step_down_time, "pwm%d_step_down_time", 0644,
                  show_fan_time, store_fan_time, 0, 2);
-SENSOR_TEMPLATE_2(pwm_start, "pwm%d_start", S_IWUSR | S_IRUGO, show_pwm,
-                 store_pwm, 0, 1);
-SENSOR_TEMPLATE_2(pwm_floor, "pwm%d_floor", S_IWUSR | S_IRUGO, show_pwm,
-                 store_pwm, 0, 2);
-SENSOR_TEMPLATE_2(pwm_temp_tolerance, "pwm%d_temp_tolerance", S_IWUSR | S_IRUGO,
+SENSOR_TEMPLATE_2(pwm_start, "pwm%d_start", 0644, show_pwm, store_pwm, 0, 1);
+SENSOR_TEMPLATE_2(pwm_floor, "pwm%d_floor", 0644, show_pwm, store_pwm, 0, 2);
+SENSOR_TEMPLATE_2(pwm_temp_tolerance, "pwm%d_temp_tolerance", 0644,
                  show_temp_tolerance, store_temp_tolerance, 0, 0);
 SENSOR_TEMPLATE_2(pwm_crit_temp_tolerance, "pwm%d_crit_temp_tolerance",
-                 S_IWUSR | S_IRUGO, show_temp_tolerance, store_temp_tolerance,
-                 0, 1);
+                 0644, show_temp_tolerance, store_temp_tolerance, 0, 1);
 
-SENSOR_TEMPLATE_2(pwm_max, "pwm%d_max", S_IWUSR | S_IRUGO, show_pwm, store_pwm,
-                 0, 3);
+SENSOR_TEMPLATE_2(pwm_max, "pwm%d_max", 0644, show_pwm, store_pwm, 0, 3);
 
-SENSOR_TEMPLATE_2(pwm_step, "pwm%d_step", S_IWUSR | S_IRUGO, show_pwm,
-                 store_pwm, 0, 4);
+SENSOR_TEMPLATE_2(pwm_step, "pwm%d_step", 0644, show_pwm, store_pwm, 0, 4);
 
 SENSOR_TEMPLATE_2(pwm_auto_point1_pwm, "pwm%d_auto_point1_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 0);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 0);
 SENSOR_TEMPLATE_2(pwm_auto_point1_temp, "pwm%d_auto_point1_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 0);
+                 0644, show_auto_temp, store_auto_temp, 0, 0);
 
 SENSOR_TEMPLATE_2(pwm_auto_point2_pwm, "pwm%d_auto_point2_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 1);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 1);
 SENSOR_TEMPLATE_2(pwm_auto_point2_temp, "pwm%d_auto_point2_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 1);
+                 0644, show_auto_temp, store_auto_temp, 0, 1);
 
 SENSOR_TEMPLATE_2(pwm_auto_point3_pwm, "pwm%d_auto_point3_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 2);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 2);
 SENSOR_TEMPLATE_2(pwm_auto_point3_temp, "pwm%d_auto_point3_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 2);
+                 0644, show_auto_temp, store_auto_temp, 0, 2);
 
 SENSOR_TEMPLATE_2(pwm_auto_point4_pwm, "pwm%d_auto_point4_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 3);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 3);
 SENSOR_TEMPLATE_2(pwm_auto_point4_temp, "pwm%d_auto_point4_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 3);
+                 0644, show_auto_temp, store_auto_temp, 0, 3);
 
 SENSOR_TEMPLATE_2(pwm_auto_point5_pwm, "pwm%d_auto_point5_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 4);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 4);
 SENSOR_TEMPLATE_2(pwm_auto_point5_temp, "pwm%d_auto_point5_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 4);
+                 0644, show_auto_temp, store_auto_temp, 0, 4);
 
 SENSOR_TEMPLATE_2(pwm_auto_point6_pwm, "pwm%d_auto_point6_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 5);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 5);
 SENSOR_TEMPLATE_2(pwm_auto_point6_temp, "pwm%d_auto_point6_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 5);
+                 0644, show_auto_temp, store_auto_temp, 0, 5);
 
 SENSOR_TEMPLATE_2(pwm_auto_point7_pwm, "pwm%d_auto_point7_pwm",
-                 S_IWUSR | S_IRUGO, show_auto_pwm, store_auto_pwm, 0, 6);
+                 0644, show_auto_pwm, store_auto_pwm, 0, 6);
 SENSOR_TEMPLATE_2(pwm_auto_point7_temp, "pwm%d_auto_point7_temp",
-                 S_IWUSR | S_IRUGO, show_auto_temp, store_auto_temp, 0, 6);
+                 0644, show_auto_temp, store_auto_temp, 0, 6);
 
 /*
  * nct6775_pwm_is_visible uses the index into the following array
@@ -3612,123 +3307,21 @@ static const struct sensor_template_group nct6775_pwm_template_group = {
        .base = 1,
 };
 
-static ssize_t
-cpu0_vid_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct nct6775_data *data = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%d\n", vid_from_reg(data->vid, data->vrm));
-}
-
-static DEVICE_ATTR_RO(cpu0_vid);
-
-/* Case open detection */
-
-static ssize_t
-clear_caseopen(struct device *dev, struct device_attribute *attr,
-              const char *buf, size_t count)
-{
-       struct nct6775_data *data = dev_get_drvdata(dev);
-       struct nct6775_sio_data *sio_data = data->sio_data;
-       int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE;
-       unsigned long val;
-       u8 reg;
-       int ret;
-
-       if (kstrtoul(buf, 10, &val) || val != 0)
-               return -EINVAL;
-
-       mutex_lock(&data->update_lock);
-
-       /*
-        * Use CR registers to clear caseopen status.
-        * The CR registers are the same for all chips, and not all chips
-        * support clearing the caseopen status through "regular" registers.
-        */
-       ret = sio_data->sio_enter(sio_data);
-       if (ret) {
-               count = ret;
-               goto error;
-       }
-
-       sio_data->sio_select(sio_data, NCT6775_LD_ACPI);
-       reg = sio_data->sio_inb(sio_data, NCT6775_REG_CR_CASEOPEN_CLR[nr]);
-       reg |= NCT6775_CR_CASEOPEN_CLR_MASK[nr];
-       sio_data->sio_outb(sio_data, NCT6775_REG_CR_CASEOPEN_CLR[nr], reg);
-       reg &= ~NCT6775_CR_CASEOPEN_CLR_MASK[nr];
-       sio_data->sio_outb(sio_data, NCT6775_REG_CR_CASEOPEN_CLR[nr], reg);
-       sio_data->sio_exit(sio_data);
-
-       data->valid = false;    /* Force cache refresh */
-error:
-       mutex_unlock(&data->update_lock);
-       return count;
-}
-
-static SENSOR_DEVICE_ATTR(intrusion0_alarm, S_IWUSR | S_IRUGO, show_alarm,
-                         clear_caseopen, INTRUSION_ALARM_BASE);
-static SENSOR_DEVICE_ATTR(intrusion1_alarm, S_IWUSR | S_IRUGO, show_alarm,
-                         clear_caseopen, INTRUSION_ALARM_BASE + 1);
-static SENSOR_DEVICE_ATTR(intrusion0_beep, S_IWUSR | S_IRUGO, show_beep,
-                         store_beep, INTRUSION_ALARM_BASE);
-static SENSOR_DEVICE_ATTR(intrusion1_beep, S_IWUSR | S_IRUGO, show_beep,
-                         store_beep, INTRUSION_ALARM_BASE + 1);
-static SENSOR_DEVICE_ATTR(beep_enable, S_IWUSR | S_IRUGO, show_beep,
-                         store_beep, BEEP_ENABLE_BASE);
-
-static umode_t nct6775_other_is_visible(struct kobject *kobj,
-                                       struct attribute *attr, int index)
-{
-       struct device *dev = kobj_to_dev(kobj);
-       struct nct6775_data *data = dev_get_drvdata(dev);
-
-       if (index == 0 && !data->have_vid)
-               return 0;
-
-       if (index == 1 || index == 2) {
-               if (data->ALARM_BITS[INTRUSION_ALARM_BASE + index - 1] < 0)
-                       return 0;
-       }
-
-       if (index == 3 || index == 4) {
-               if (data->BEEP_BITS[INTRUSION_ALARM_BASE + index - 3] < 0)
-                       return 0;
-       }
-
-       return attr->mode;
-}
-
-/*
- * nct6775_other_is_visible uses the index into the following array
- * to determine if attributes should be created or not.
- * Any change in order or content must be matched.
- */
-static struct attribute *nct6775_attributes_other[] = {
-       &dev_attr_cpu0_vid.attr,                                /* 0 */
-       &sensor_dev_attr_intrusion0_alarm.dev_attr.attr,        /* 1 */
-       &sensor_dev_attr_intrusion1_alarm.dev_attr.attr,        /* 2 */
-       &sensor_dev_attr_intrusion0_beep.dev_attr.attr,         /* 3 */
-       &sensor_dev_attr_intrusion1_beep.dev_attr.attr,         /* 4 */
-       &sensor_dev_attr_beep_enable.dev_attr.attr,             /* 5 */
-
-       NULL
-};
-
-static const struct attribute_group nct6775_group_other = {
-       .attrs = nct6775_attributes_other,
-       .is_visible = nct6775_other_is_visible,
-};
-
-static inline void nct6775_init_device(struct nct6775_data *data)
+static inline int nct6775_init_device(struct nct6775_data *data)
 {
-       int i;
-       u8 tmp, diode;
+       int i, err;
+       u16 tmp, diode;
 
        /* Start monitoring if needed */
        if (data->REG_CONFIG) {
-               tmp = data->read_value(data, data->REG_CONFIG);
-               if (!(tmp & 0x01))
-                       data->write_value(data, data->REG_CONFIG, tmp | 0x01);
+               err = nct6775_read_value(data, data->REG_CONFIG, &tmp);
+               if (err)
+                       return err;
+               if (!(tmp & 0x01)) {
+                       err = nct6775_write_value(data, data->REG_CONFIG, tmp | 0x01);
+                       if (err)
+                               return err;
+               }
        }
 
        /* Enable temperature sensors if needed */
@@ -3737,18 +3330,29 @@ static inline void nct6775_init_device(struct nct6775_data *data)
                        continue;
                if (!data->reg_temp_config[i])
                        continue;
-               tmp = data->read_value(data, data->reg_temp_config[i]);
-               if (tmp & 0x01)
-                       data->write_value(data, data->reg_temp_config[i],
-                                           tmp & 0xfe);
+               err = nct6775_read_value(data, data->reg_temp_config[i], &tmp);
+               if (err)
+                       return err;
+               if (tmp & 0x01) {
+                       err = nct6775_write_value(data, data->reg_temp_config[i], tmp & 0xfe);
+                       if (err)
+                               return err;
+               }
        }
 
        /* Enable VBAT monitoring if needed */
-       tmp = data->read_value(data, data->REG_VBAT);
-       if (!(tmp & 0x01))
-               data->write_value(data, data->REG_VBAT, tmp | 0x01);
+       err = nct6775_read_value(data, data->REG_VBAT, &tmp);
+       if (err)
+               return err;
+       if (!(tmp & 0x01)) {
+               err = nct6775_write_value(data, data->REG_VBAT, tmp | 0x01);
+               if (err)
+                       return err;
+       }
 
-       diode = data->read_value(data, data->REG_DIODE);
+       err = nct6775_read_value(data, data->REG_DIODE, &diode);
+       if (err)
+               return err;
 
        for (i = 0; i < data->temp_fixed_num; i++) {
                if (!(data->have_temp_fixed & BIT(i)))
@@ -3759,241 +3363,24 @@ static inline void nct6775_init_device(struct nct6775_data *data)
                else                            /* thermistor */
                        data->temp_type[i] = 4;
        }
-}
-
-static void
-nct6775_check_fan_inputs(struct nct6775_data *data, struct nct6775_sio_data *sio_data)
-{
-       bool fan3pin = false, fan4pin = false, fan4min = false;
-       bool fan5pin = false, fan6pin = false, fan7pin = false;
-       bool pwm3pin = false, pwm4pin = false, pwm5pin = false;
-       bool pwm6pin = false, pwm7pin = false;
-
-       /* Store SIO_REG_ENABLE for use during resume */
-       sio_data->sio_select(sio_data, NCT6775_LD_HWM);
-       data->sio_reg_enable = sio_data->sio_inb(sio_data, SIO_REG_ENABLE);
-
-       /* fan4 and fan5 share some pins with the GPIO and serial flash */
-       if (data->kind == nct6775) {
-               int cr2c = sio_data->sio_inb(sio_data, 0x2c);
-
-               fan3pin = cr2c & BIT(6);
-               pwm3pin = cr2c & BIT(7);
-
-               /* On NCT6775, fan4 shares pins with the fdc interface */
-               fan4pin = !(sio_data->sio_inb(sio_data, 0x2A) & 0x80);
-       } else if (data->kind == nct6776) {
-               bool gpok = sio_data->sio_inb(sio_data, 0x27) & 0x80;
-               const char *board_vendor, *board_name;
-
-               board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
-               board_name = dmi_get_system_info(DMI_BOARD_NAME);
-
-               if (board_name && board_vendor &&
-                   !strcmp(board_vendor, "ASRock")) {
-                       /*
-                        * Auxiliary fan monitoring is not enabled on ASRock
-                        * Z77 Pro4-M if booted in UEFI Ultra-FastBoot mode.
-                        * Observed with BIOS version 2.00.
-                        */
-                       if (!strcmp(board_name, "Z77 Pro4-M")) {
-                               if ((data->sio_reg_enable & 0xe0) != 0xe0) {
-                                       data->sio_reg_enable |= 0xe0;
-                                       sio_data->sio_outb(sio_data, SIO_REG_ENABLE,
-                                                    data->sio_reg_enable);
-                               }
-                       }
-               }
-
-               if (data->sio_reg_enable & 0x80)
-                       fan3pin = gpok;
-               else
-                       fan3pin = !(sio_data->sio_inb(sio_data, 0x24) & 0x40);
-
-               if (data->sio_reg_enable & 0x40)
-                       fan4pin = gpok;
-               else
-                       fan4pin = sio_data->sio_inb(sio_data, 0x1C) & 0x01;
-
-               if (data->sio_reg_enable & 0x20)
-                       fan5pin = gpok;
-               else
-                       fan5pin = sio_data->sio_inb(sio_data, 0x1C) & 0x02;
-
-               fan4min = fan4pin;
-               pwm3pin = fan3pin;
-       } else if (data->kind == nct6106) {
-               int cr24 = sio_data->sio_inb(sio_data, 0x24);
-
-               fan3pin = !(cr24 & 0x80);
-               pwm3pin = cr24 & 0x08;
-       } else if (data->kind == nct6116) {
-               int cr1a = sio_data->sio_inb(sio_data, 0x1a);
-               int cr1b = sio_data->sio_inb(sio_data, 0x1b);
-               int cr24 = sio_data->sio_inb(sio_data, 0x24);
-               int cr2a = sio_data->sio_inb(sio_data, 0x2a);
-               int cr2b = sio_data->sio_inb(sio_data, 0x2b);
-               int cr2f = sio_data->sio_inb(sio_data, 0x2f);
-
-               fan3pin = !(cr2b & 0x10);
-               fan4pin = (cr2b & 0x80) ||                      // pin 1(2)
-                       (!(cr2f & 0x10) && (cr1a & 0x04));      // pin 65(66)
-               fan5pin = (cr2b & 0x80) ||                      // pin 126(127)
-                       (!(cr1b & 0x03) && (cr2a & 0x02));      // pin 94(96)
-
-               pwm3pin = fan3pin && (cr24 & 0x08);
-               pwm4pin = fan4pin;
-               pwm5pin = fan5pin;
-       } else {
-               /*
-                * NCT6779D, NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D,
-                * NCT6797D, NCT6798D
-                */
-               int cr1a = sio_data->sio_inb(sio_data, 0x1a);
-               int cr1b = sio_data->sio_inb(sio_data, 0x1b);
-               int cr1c = sio_data->sio_inb(sio_data, 0x1c);
-               int cr1d = sio_data->sio_inb(sio_data, 0x1d);
-               int cr2a = sio_data->sio_inb(sio_data, 0x2a);
-               int cr2b = sio_data->sio_inb(sio_data, 0x2b);
-               int cr2d = sio_data->sio_inb(sio_data, 0x2d);
-               int cr2f = sio_data->sio_inb(sio_data, 0x2f);
-               bool dsw_en = cr2f & BIT(3);
-               bool ddr4_en = cr2f & BIT(4);
-               int cre0;
-               int creb;
-               int cred;
-
-               sio_data->sio_select(sio_data, NCT6775_LD_12);
-               cre0 = sio_data->sio_inb(sio_data, 0xe0);
-               creb = sio_data->sio_inb(sio_data, 0xeb);
-               cred = sio_data->sio_inb(sio_data, 0xed);
-
-               fan3pin = !(cr1c & BIT(5));
-               fan4pin = !(cr1c & BIT(6));
-               fan5pin = !(cr1c & BIT(7));
-
-               pwm3pin = !(cr1c & BIT(0));
-               pwm4pin = !(cr1c & BIT(1));
-               pwm5pin = !(cr1c & BIT(2));
-
-               switch (data->kind) {
-               case nct6791:
-                       fan6pin = cr2d & BIT(1);
-                       pwm6pin = cr2d & BIT(0);
-                       break;
-               case nct6792:
-                       fan6pin = !dsw_en && (cr2d & BIT(1));
-                       pwm6pin = !dsw_en && (cr2d & BIT(0));
-                       break;
-               case nct6793:
-                       fan5pin |= cr1b & BIT(5);
-                       fan5pin |= creb & BIT(5);
-
-                       fan6pin = !dsw_en && (cr2d & BIT(1));
-                       fan6pin |= creb & BIT(3);
-
-                       pwm5pin |= cr2d & BIT(7);
-                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
-
-                       pwm6pin = !dsw_en && (cr2d & BIT(0));
-                       pwm6pin |= creb & BIT(2);
-                       break;
-               case nct6795:
-                       fan5pin |= cr1b & BIT(5);
-                       fan5pin |= creb & BIT(5);
-
-                       fan6pin = (cr2a & BIT(4)) &&
-                                       (!dsw_en || (cred & BIT(4)));
-                       fan6pin |= creb & BIT(3);
-
-                       pwm5pin |= cr2d & BIT(7);
-                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
-
-                       pwm6pin = (cr2a & BIT(3)) && (cred & BIT(2));
-                       pwm6pin |= creb & BIT(2);
-                       break;
-               case nct6796:
-                       fan5pin |= cr1b & BIT(5);
-                       fan5pin |= (cre0 & BIT(3)) && !(cr1b & BIT(0));
-                       fan5pin |= creb & BIT(5);
-
-                       fan6pin = (cr2a & BIT(4)) &&
-                                       (!dsw_en || (cred & BIT(4)));
-                       fan6pin |= creb & BIT(3);
-
-                       fan7pin = !(cr2b & BIT(2));
-
-                       pwm5pin |= cr2d & BIT(7);
-                       pwm5pin |= (cre0 & BIT(4)) && !(cr1b & BIT(0));
-                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
-
-                       pwm6pin = (cr2a & BIT(3)) && (cred & BIT(2));
-                       pwm6pin |= creb & BIT(2);
-
-                       pwm7pin = !(cr1d & (BIT(2) | BIT(3)));
-                       break;
-               case nct6797:
-                       fan5pin |= !ddr4_en && (cr1b & BIT(5));
-                       fan5pin |= creb & BIT(5);
-
-                       fan6pin = cr2a & BIT(4);
-                       fan6pin |= creb & BIT(3);
-
-                       fan7pin = cr1a & BIT(1);
-
-                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
-                       pwm5pin |= !ddr4_en && (cr2d & BIT(7));
-
-                       pwm6pin = creb & BIT(2);
-                       pwm6pin |= cred & BIT(2);
-
-                       pwm7pin = cr1d & BIT(4);
-                       break;
-               case nct6798:
-                       fan6pin = !(cr1b & BIT(0)) && (cre0 & BIT(3));
-                       fan6pin |= cr2a & BIT(4);
-                       fan6pin |= creb & BIT(5);
-
-                       fan7pin = cr1b & BIT(5);
-                       fan7pin |= !(cr2b & BIT(2));
-                       fan7pin |= creb & BIT(3);
-
-                       pwm6pin = !(cr1b & BIT(0)) && (cre0 & BIT(4));
-                       pwm6pin |= !(cred & BIT(2)) && (cr2a & BIT(3));
-                       pwm6pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
-
-                       pwm7pin = !(cr1d & (BIT(2) | BIT(3)));
-                       pwm7pin |= cr2d & BIT(7);
-                       pwm7pin |= creb & BIT(2);
-                       break;
-               default:        /* NCT6779D */
-                       break;
-               }
-
-               fan4min = fan4pin;
-       }
 
-       /* fan 1 and 2 (0x03) are always present */
-       data->has_fan = 0x03 | (fan3pin << 2) | (fan4pin << 3) |
-               (fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
-       data->has_fan_min = 0x03 | (fan3pin << 2) | (fan4min << 3) |
-               (fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
-       data->has_pwm = 0x03 | (pwm3pin << 2) | (pwm4pin << 3) |
-               (pwm5pin << 4) | (pwm6pin << 5) | (pwm7pin << 6);
+       return 0;
 }
 
-static void add_temp_sensors(struct nct6775_data *data, const u16 *regp,
-                            int *available, int *mask)
+static int add_temp_sensors(struct nct6775_data *data, const u16 *regp,
+                           int *available, int *mask)
 {
-       int i;
-       u8 src;
+       int i, err;
+       u16 src;
 
        for (i = 0; i < data->pwm_num && *available; i++) {
                int index;
 
                if (!regp[i])
                        continue;
-               src = data->read_value(data, regp[i]);
+               err = nct6775_read_value(data, regp[i], &src);
+               if (err)
+                       return err;
                src &= 0x1f;
                if (!src || (*mask & BIT(src)))
                        continue;
@@ -4001,58 +3388,36 @@ static void add_temp_sensors(struct nct6775_data *data, const u16 *regp,
                        continue;
 
                index = __ffs(*available);
-               data->write_value(data, data->REG_TEMP_SOURCE[index], src);
+               err = nct6775_write_value(data, data->REG_TEMP_SOURCE[index], src);
+               if (err)
+                       return err;
                *available &= ~BIT(index);
                *mask |= BIT(src);
        }
+
+       return 0;
 }
 
-static int nct6775_probe(struct platform_device *pdev)
+int nct6775_probe(struct device *dev, struct nct6775_data *data,
+                 const struct regmap_config *regmapcfg)
 {
-       struct device *dev = &pdev->dev;
-       struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
-       struct nct6775_data *data;
-       struct resource *res;
        int i, s, err = 0;
-       int src, mask, available;
+       int mask, available;
+       u16 src;
        const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config;
        const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit;
        const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL;
        int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp;
-       u8 cr2a;
-       struct attribute_group *group;
        struct device *hwmon_dev;
        struct sensor_template_group tsi_temp_tg;
-       int num_attr_groups = 0;
 
-       if (sio_data->access == access_direct) {
-               res = platform_get_resource(pdev, IORESOURCE_IO, 0);
-               if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH,
-                                        DRVNAME))
-                       return -EBUSY;
-       }
-
-       data = devm_kzalloc(&pdev->dev, sizeof(struct nct6775_data),
-                           GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       data->kind = sio_data->kind;
-       data->sio_data = sio_data;
-
-       if (sio_data->access == access_direct) {
-               data->addr = res->start;
-               data->read_value = nct6775_read_value;
-               data->write_value = nct6775_write_value;
-       } else {
-               data->read_value = nct6775_wmi_read_value;
-               data->write_value = nct6775_wmi_write_value;
-       }
+       data->regmap = devm_regmap_init(dev, NULL, data, regmapcfg);
+       if (IS_ERR(data->regmap))
+               return PTR_ERR(data->regmap);
 
        mutex_init(&data->update_lock);
        data->name = nct6775_device_names[data->kind];
        data->bank = 0xff;              /* Force initial bank selection */
-       platform_set_drvdata(pdev, data);
 
        switch (data->kind) {
        case nct6106:
@@ -4596,7 +3961,10 @@ static int nct6775_probe(struct platform_device *pdev)
                if (reg_temp[i] == 0)
                        continue;
 
-               src = data->read_value(data, data->REG_TEMP_SOURCE[i]) & 0x1f;
+               err = nct6775_read_value(data, data->REG_TEMP_SOURCE[i], &src);
+               if (err)
+                       return err;
+               src &= 0x1f;
                if (!src || (mask & BIT(src)))
                        available |= BIT(i);
 
@@ -4607,8 +3975,12 @@ static int nct6775_probe(struct platform_device *pdev)
         * Now find unmonitored temperature registers and enable monitoring
         * if additional monitoring registers are available.
         */
-       add_temp_sensors(data, data->REG_TEMP_SEL, &available, &mask);
-       add_temp_sensors(data, data->REG_WEIGHT_TEMP_SEL, &available, &mask);
+       err = add_temp_sensors(data, data->REG_TEMP_SEL, &available, &mask);
+       if (err)
+               return err;
+       err = add_temp_sensors(data, data->REG_WEIGHT_TEMP_SEL, &available, &mask);
+       if (err)
+               return err;
 
        mask = 0;
        s = NUM_TEMP_FIXED;     /* First dynamic temperature attribute */
@@ -4616,7 +3988,10 @@ static int nct6775_probe(struct platform_device *pdev)
                if (reg_temp[i] == 0)
                        continue;
 
-               src = data->read_value(data, data->REG_TEMP_SOURCE[i]) & 0x1f;
+               err = nct6775_read_value(data, data->REG_TEMP_SOURCE[i], &src);
+               if (err)
+                       return err;
+               src &= 0x1f;
                if (!src || (mask & BIT(src)))
                        continue;
 
@@ -4676,7 +4051,10 @@ static int nct6775_probe(struct platform_device *pdev)
                if (reg_temp_mon[i] == 0)
                        continue;
 
-               src = data->read_value(data, data->REG_TEMP_SEL[i]) & 0x1f;
+               err = nct6775_read_value(data, data->REG_TEMP_SEL[i], &src);
+               if (err)
+                       return err;
+               src &= 0x1f;
                if (!src)
                        continue;
 
@@ -4760,525 +4138,68 @@ static int nct6775_probe(struct platform_device *pdev)
 
        /* Check which TSIx_TEMP registers are active */
        for (i = 0; i < num_reg_tsi_temp; i++) {
-               if (data->read_value(data, data->REG_TSI_TEMP[i]))
+               u16 tmp;
+
+               err = nct6775_read_value(data, data->REG_TSI_TEMP[i], &tmp);
+               if (err)
+                       return err;
+               if (tmp)
                        data->have_tsi_temp |= BIT(i);
        }
 
        /* Initialize the chip */
-       nct6775_init_device(data);
-
-       err = sio_data->sio_enter(sio_data);
+       err = nct6775_init_device(data);
        if (err)
                return err;
 
-       cr2a = sio_data->sio_inb(sio_data, 0x2a);
-       switch (data->kind) {
-       case nct6775:
-               data->have_vid = (cr2a & 0x40);
-               break;
-       case nct6776:
-               data->have_vid = (cr2a & 0x60) == 0x40;
-               break;
-       case nct6106:
-       case nct6116:
-       case nct6779:
-       case nct6791:
-       case nct6792:
-       case nct6793:
-       case nct6795:
-       case nct6796:
-       case nct6797:
-       case nct6798:
-               break;
-       }
-
-       /*
-        * Read VID value
-        * We can get the VID input values directly at logical device D 0xe3.
-        */
-       if (data->have_vid) {
-               sio_data->sio_select(sio_data, NCT6775_LD_VID);
-               data->vid = sio_data->sio_inb(sio_data, 0xe3);
-               data->vrm = vid_which_vrm();
-       }
-
-       if (fan_debounce) {
-               u8 tmp;
-
-               sio_data->sio_select(sio_data, NCT6775_LD_HWM);
-               tmp = sio_data->sio_inb(sio_data,
-                                   NCT6775_REG_CR_FAN_DEBOUNCE);
-               switch (data->kind) {
-               case nct6106:
-               case nct6116:
-                       tmp |= 0xe0;
-                       break;
-               case nct6775:
-                       tmp |= 0x1e;
-                       break;
-               case nct6776:
-               case nct6779:
-                       tmp |= 0x3e;
-                       break;
-               case nct6791:
-               case nct6792:
-               case nct6793:
-               case nct6795:
-               case nct6796:
-               case nct6797:
-               case nct6798:
-                       tmp |= 0x7e;
-                       break;
-               }
-               sio_data->sio_outb(sio_data, NCT6775_REG_CR_FAN_DEBOUNCE,
-                            tmp);
-               dev_info(&pdev->dev, "Enabled fan debounce for chip %s\n",
-                        data->name);
+       if (data->driver_init) {
+               err = data->driver_init(data);
+               if (err)
+                       return err;
        }
 
-       nct6775_check_fan_inputs(data, sio_data);
-
-       sio_data->sio_exit(sio_data);
-
        /* Read fan clock dividers immediately */
-       nct6775_init_fan_common(dev, data);
+       err = nct6775_init_fan_common(dev, data);
+       if (err)
+               return err;
 
        /* Register sysfs hooks */
-       group = nct6775_create_attr_group(dev, &nct6775_pwm_template_group,
-                                         data->pwm_num);
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
-       data->groups[num_attr_groups++] = group;
-
-       group = nct6775_create_attr_group(dev, &nct6775_in_template_group,
-                                         fls(data->have_in));
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
-       data->groups[num_attr_groups++] = group;
-
-       group = nct6775_create_attr_group(dev, &nct6775_fan_template_group,
-                                         fls(data->has_fan));
-       if (IS_ERR(group))
-               return PTR_ERR(group);
+       err = nct6775_add_template_attr_group(dev, data, &nct6775_pwm_template_group,
+                                             data->pwm_num);
+       if (err)
+               return err;
 
-       data->groups[num_attr_groups++] = group;
+       err = nct6775_add_template_attr_group(dev, data, &nct6775_in_template_group,
+                                             fls(data->have_in));
+       if (err)
+               return err;
 
-       group = nct6775_create_attr_group(dev, &nct6775_temp_template_group,
-                                         fls(data->have_temp));
-       if (IS_ERR(group))
-               return PTR_ERR(group);
+       err = nct6775_add_template_attr_group(dev, data, &nct6775_fan_template_group,
+                                             fls(data->has_fan));
+       if (err)
+               return err;
 
-       data->groups[num_attr_groups++] = group;
+       err = nct6775_add_template_attr_group(dev, data, &nct6775_temp_template_group,
+                                             fls(data->have_temp));
+       if (err)
+               return err;
 
        if (data->have_tsi_temp) {
                tsi_temp_tg.templates = nct6775_tsi_temp_template;
                tsi_temp_tg.is_visible = nct6775_tsi_temp_is_visible;
                tsi_temp_tg.base = fls(data->have_temp) + 1;
-               group = nct6775_create_attr_group(dev, &tsi_temp_tg, fls(data->have_tsi_temp));
-               if (IS_ERR(group))
-                       return PTR_ERR(group);
-
-               data->groups[num_attr_groups++] = group;
+               err = nct6775_add_template_attr_group(dev, data, &tsi_temp_tg,
+                                                     fls(data->have_tsi_temp));
+               if (err)
+                       return err;
        }
 
-       data->groups[num_attr_groups++] = &nct6775_group_other;
-
        hwmon_dev = devm_hwmon_device_register_with_groups(dev, data->name,
                                                           data, data->groups);
        return PTR_ERR_OR_ZERO(hwmon_dev);
 }
-
-static void nct6791_enable_io_mapping(struct nct6775_sio_data *sio_data)
-{
-       int val;
-
-       val = sio_data->sio_inb(sio_data, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE);
-       if (val & 0x10) {
-               pr_info("Enabling hardware monitor logical device mappings.\n");
-               sio_data->sio_outb(sio_data, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE,
-                              val & ~0x10);
-       }
-}
-
-static int __maybe_unused nct6775_suspend(struct device *dev)
-{
-       struct nct6775_data *data = nct6775_update_device(dev);
-
-       mutex_lock(&data->update_lock);
-       data->vbat = data->read_value(data, data->REG_VBAT);
-       if (data->kind == nct6775) {
-               data->fandiv1 = data->read_value(data, NCT6775_REG_FANDIV1);
-               data->fandiv2 = data->read_value(data, NCT6775_REG_FANDIV2);
-       }
-       mutex_unlock(&data->update_lock);
-
-       return 0;
-}
-
-static int __maybe_unused nct6775_resume(struct device *dev)
-{
-       struct nct6775_data *data = dev_get_drvdata(dev);
-       struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
-       int i, j, err = 0;
-       u8 reg;
-
-       mutex_lock(&data->update_lock);
-       data->bank = 0xff;              /* Force initial bank selection */
-
-       err = sio_data->sio_enter(sio_data);
-       if (err)
-               goto abort;
-
-       sio_data->sio_select(sio_data, NCT6775_LD_HWM);
-       reg = sio_data->sio_inb(sio_data, SIO_REG_ENABLE);
-       if (reg != data->sio_reg_enable)
-               sio_data->sio_outb(sio_data, SIO_REG_ENABLE, data->sio_reg_enable);
-
-       if (data->kind == nct6791 || data->kind == nct6792 ||
-           data->kind == nct6793 || data->kind == nct6795 ||
-           data->kind == nct6796 || data->kind == nct6797 ||
-           data->kind == nct6798)
-               nct6791_enable_io_mapping(sio_data);
-
-       sio_data->sio_exit(sio_data);
-
-       /* Restore limits */
-       for (i = 0; i < data->in_num; i++) {
-               if (!(data->have_in & BIT(i)))
-                       continue;
-
-               data->write_value(data, data->REG_IN_MINMAX[0][i],
-                                 data->in[i][1]);
-               data->write_value(data, data->REG_IN_MINMAX[1][i],
-                                 data->in[i][2]);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(data->fan_min); i++) {
-               if (!(data->has_fan_min & BIT(i)))
-                       continue;
-
-               data->write_value(data, data->REG_FAN_MIN[i],
-                                 data->fan_min[i]);
-       }
-
-       for (i = 0; i < NUM_TEMP; i++) {
-               if (!(data->have_temp & BIT(i)))
-                       continue;
-
-               for (j = 1; j < ARRAY_SIZE(data->reg_temp); j++)
-                       if (data->reg_temp[j][i])
-                               nct6775_write_temp(data, data->reg_temp[j][i],
-                                                  data->temp[j][i]);
-       }
-
-       /* Restore other settings */
-       data->write_value(data, data->REG_VBAT, data->vbat);
-       if (data->kind == nct6775) {
-               data->write_value(data, NCT6775_REG_FANDIV1, data->fandiv1);
-               data->write_value(data, NCT6775_REG_FANDIV2, data->fandiv2);
-       }
-
-abort:
-       /* Force re-reading all values */
-       data->valid = false;
-       mutex_unlock(&data->update_lock);
-
-       return err;
-}
-
-static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
-
-static struct platform_driver nct6775_driver = {
-       .driver = {
-               .name   = DRVNAME,
-               .pm     = &nct6775_dev_pm_ops,
-       },
-       .probe          = nct6775_probe,
-};
-
-/* nct6775_find() looks for a '627 in the Super-I/O config space */
-static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
-{
-       u16 val;
-       int err;
-       int addr;
-
-       sio_data->access = access_direct;
-       sio_data->sioreg = sioaddr;
-
-       err = sio_data->sio_enter(sio_data);
-       if (err)
-               return err;
-
-       val = (sio_data->sio_inb(sio_data, SIO_REG_DEVID) << 8) |
-               sio_data->sio_inb(sio_data, SIO_REG_DEVID + 1);
-       if (force_id && val != 0xffff)
-               val = force_id;
-
-       switch (val & SIO_ID_MASK) {
-       case SIO_NCT6106_ID:
-               sio_data->kind = nct6106;
-               break;
-       case SIO_NCT6116_ID:
-               sio_data->kind = nct6116;
-               break;
-       case SIO_NCT6775_ID:
-               sio_data->kind = nct6775;
-               break;
-       case SIO_NCT6776_ID:
-               sio_data->kind = nct6776;
-               break;
-       case SIO_NCT6779_ID:
-               sio_data->kind = nct6779;
-               break;
-       case SIO_NCT6791_ID:
-               sio_data->kind = nct6791;
-               break;
-       case SIO_NCT6792_ID:
-               sio_data->kind = nct6792;
-               break;
-       case SIO_NCT6793_ID:
-               sio_data->kind = nct6793;
-               break;
-       case SIO_NCT6795_ID:
-               sio_data->kind = nct6795;
-               break;
-       case SIO_NCT6796_ID:
-               sio_data->kind = nct6796;
-               break;
-       case SIO_NCT6797_ID:
-               sio_data->kind = nct6797;
-               break;
-       case SIO_NCT6798_ID:
-               sio_data->kind = nct6798;
-               break;
-       default:
-               if (val != 0xffff)
-                       pr_debug("unsupported chip ID: 0x%04x\n", val);
-               sio_data->sio_exit(sio_data);
-               return -ENODEV;
-       }
-
-       /* We have a known chip, find the HWM I/O address */
-       sio_data->sio_select(sio_data, NCT6775_LD_HWM);
-       val = (sio_data->sio_inb(sio_data, SIO_REG_ADDR) << 8)
-           | sio_data->sio_inb(sio_data, SIO_REG_ADDR + 1);
-       addr = val & IOREGION_ALIGNMENT;
-       if (addr == 0) {
-               pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n");
-               sio_data->sio_exit(sio_data);
-               return -ENODEV;
-       }
-
-       /* Activate logical device if needed */
-       val = sio_data->sio_inb(sio_data, SIO_REG_ENABLE);
-       if (!(val & 0x01)) {
-               pr_warn("Forcibly enabling Super-I/O. Sensor is probably unusable.\n");
-               sio_data->sio_outb(sio_data, SIO_REG_ENABLE, val | 0x01);
-       }
-
-       if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
-           sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
-           sio_data->kind == nct6796 || sio_data->kind == nct6797 ||
-           sio_data->kind == nct6798)
-               nct6791_enable_io_mapping(sio_data);
-
-       sio_data->sio_exit(sio_data);
-       pr_info("Found %s or compatible chip at %#x:%#x\n",
-               nct6775_sio_names[sio_data->kind], sioaddr, addr);
-
-       return addr;
-}
-
-/*
- * when Super-I/O functions move to a separate file, the Super-I/O
- * bus will manage the lifetime of the device and this module will only keep
- * track of the nct6775 driver. But since we use platform_device_alloc(), we
- * must keep track of the device
- */
-static struct platform_device *pdev[2];
-
-static const char * const asus_wmi_boards[] = {
-       "ProArt X570-CREATOR WIFI",
-       "Pro B550M-C",
-       "Pro WS X570-ACE",
-       "PRIME B360-PLUS",
-       "PRIME B460-PLUS",
-       "PRIME B550-PLUS",
-       "PRIME B550M-A",
-       "PRIME B550M-A (WI-FI)",
-       "PRIME X570-P",
-       "PRIME X570-PRO",
-       "ROG CROSSHAIR VIII DARK HERO",
-       "ROG CROSSHAIR VIII FORMULA",
-       "ROG CROSSHAIR VIII HERO",
-       "ROG CROSSHAIR VIII IMPACT",
-       "ROG STRIX B550-A GAMING",
-       "ROG STRIX B550-E GAMING",
-       "ROG STRIX B550-F GAMING",
-       "ROG STRIX B550-F GAMING (WI-FI)",
-       "ROG STRIX B550-F GAMING WIFI II",
-       "ROG STRIX B550-I GAMING",
-       "ROG STRIX B550-XE GAMING (WI-FI)",
-       "ROG STRIX X570-E GAMING",
-       "ROG STRIX X570-F GAMING",
-       "ROG STRIX X570-I GAMING",
-       "ROG STRIX Z390-E GAMING",
-       "ROG STRIX Z390-F GAMING",
-       "ROG STRIX Z390-H GAMING",
-       "ROG STRIX Z390-I GAMING",
-       "ROG STRIX Z490-A GAMING",
-       "ROG STRIX Z490-E GAMING",
-       "ROG STRIX Z490-F GAMING",
-       "ROG STRIX Z490-G GAMING",
-       "ROG STRIX Z490-G GAMING (WI-FI)",
-       "ROG STRIX Z490-H GAMING",
-       "ROG STRIX Z490-I GAMING",
-       "TUF GAMING B550M-PLUS",
-       "TUF GAMING B550M-PLUS (WI-FI)",
-       "TUF GAMING B550-PLUS",
-       "TUF GAMING B550-PRO",
-       "TUF GAMING X570-PLUS",
-       "TUF GAMING X570-PLUS (WI-FI)",
-       "TUF GAMING X570-PRO (WI-FI)",
-       "TUF GAMING Z490-PLUS",
-       "TUF GAMING Z490-PLUS (WI-FI)",
-};
-
-static int __init sensors_nct6775_init(void)
-{
-       int i, err;
-       bool found = false;
-       int address;
-       struct resource res;
-       struct nct6775_sio_data sio_data;
-       int sioaddr[2] = { 0x2e, 0x4e };
-       enum sensor_access access = access_direct;
-       const char *board_vendor, *board_name;
-       u8 tmp;
-
-       err = platform_driver_register(&nct6775_driver);
-       if (err)
-               return err;
-
-       board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
-       board_name = dmi_get_system_info(DMI_BOARD_NAME);
-
-       if (board_name && board_vendor &&
-           !strcmp(board_vendor, "ASUSTeK COMPUTER INC.")) {
-               err = match_string(asus_wmi_boards, ARRAY_SIZE(asus_wmi_boards),
-                                  board_name);
-               if (err >= 0) {
-                       /* if reading chip id via WMI succeeds, use WMI */
-                       if (!nct6775_asuswmi_read(0, NCT6775_PORT_CHIPID, &tmp) && tmp) {
-                               pr_info("Using Asus WMI to access %#x chip.\n", tmp);
-                               access = access_asuswmi;
-                       } else {
-                               pr_err("Can't read ChipID by Asus WMI.\n");
-                       }
-               }
-       }
-
-       /*
-        * initialize sio_data->kind and sio_data->sioreg.
-        *
-        * when Super-I/O functions move to a separate file, the Super-I/O
-        * driver will probe 0x2e and 0x4e and auto-detect the presence of a
-        * nct6775 hardware monitor, and call probe()
-        */
-       for (i = 0; i < ARRAY_SIZE(pdev); i++) {
-               sio_data.sio_outb = superio_outb;
-               sio_data.sio_inb = superio_inb;
-               sio_data.sio_select = superio_select;
-               sio_data.sio_enter = superio_enter;
-               sio_data.sio_exit = superio_exit;
-
-               address = nct6775_find(sioaddr[i], &sio_data);
-               if (address <= 0)
-                       continue;
-
-               found = true;
-
-               sio_data.access = access;
-
-               if (access == access_asuswmi) {
-                       sio_data.sio_outb = superio_wmi_outb;
-                       sio_data.sio_inb = superio_wmi_inb;
-                       sio_data.sio_select = superio_wmi_select;
-                       sio_data.sio_enter = superio_wmi_enter;
-                       sio_data.sio_exit = superio_wmi_exit;
-               }
-
-               pdev[i] = platform_device_alloc(DRVNAME, address);
-               if (!pdev[i]) {
-                       err = -ENOMEM;
-                       goto exit_device_unregister;
-               }
-
-               err = platform_device_add_data(pdev[i], &sio_data,
-                                              sizeof(struct nct6775_sio_data));
-               if (err)
-                       goto exit_device_put;
-
-               if (sio_data.access == access_direct) {
-                       memset(&res, 0, sizeof(res));
-                       res.name = DRVNAME;
-                       res.start = address + IOREGION_OFFSET;
-                       res.end = address + IOREGION_OFFSET + IOREGION_LENGTH - 1;
-                       res.flags = IORESOURCE_IO;
-
-                       err = acpi_check_resource_conflict(&res);
-                       if (err) {
-                               platform_device_put(pdev[i]);
-                               pdev[i] = NULL;
-                               continue;
-                       }
-
-                       err = platform_device_add_resources(pdev[i], &res, 1);
-                       if (err)
-                               goto exit_device_put;
-               }
-
-               /* platform_device_add calls probe() */
-               err = platform_device_add(pdev[i]);
-               if (err)
-                       goto exit_device_put;
-       }
-       if (!found) {
-               err = -ENODEV;
-               goto exit_unregister;
-       }
-
-       return 0;
-
-exit_device_put:
-       platform_device_put(pdev[i]);
-exit_device_unregister:
-       while (--i >= 0) {
-               if (pdev[i])
-                       platform_device_unregister(pdev[i]);
-       }
-exit_unregister:
-       platform_driver_unregister(&nct6775_driver);
-       return err;
-}
-
-static void __exit sensors_nct6775_exit(void)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(pdev); i++) {
-               if (pdev[i])
-                       platform_device_unregister(pdev[i]);
-       }
-       platform_driver_unregister(&nct6775_driver);
-}
+EXPORT_SYMBOL_GPL(nct6775_probe);
 
 MODULE_AUTHOR("Guenter Roeck <linux@roeck-us.net>");
-MODULE_DESCRIPTION("Driver for NCT6775F and compatible chips");
+MODULE_DESCRIPTION("Core driver for NCT6775F and compatible chips");
 MODULE_LICENSE("GPL");
-
-module_init(sensors_nct6775_init);
-module_exit(sensors_nct6775_exit);
diff --git a/drivers/hwmon/nct6775-i2c.c b/drivers/hwmon/nct6775-i2c.c
new file mode 100644 (file)
index 0000000..e1bcd11
--- /dev/null
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * nct6775-i2c - I2C driver for the hardware monitoring functionality of
+ *              Nuvoton NCT677x Super-I/O chips
+ *
+ * Copyright (C) 2022 Zev Weiss <zev@bewilderbeest.net>
+ *
+ * This driver interacts with the chip via it's "back door" i2c interface, as
+ * is often exposed to a BMC.  Because the host may still be operating the
+ * chip via the ("front door") LPC interface, this driver cannot assume that
+ * it actually has full control of the chip, and in particular must avoid
+ * making any changes that could confuse the host's LPC usage of it.  It thus
+ * operates in a strictly read-only fashion, with the only exception being the
+ * bank-select register (which seems, thankfully, to be replicated for the i2c
+ * interface so it doesn't affect the LPC interface).
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include "nct6775.h"
+
+static int nct6775_i2c_read(void *ctx, unsigned int reg, unsigned int *val)
+{
+       int ret;
+       u32 tmp;
+       u8 bank = reg >> 8;
+       struct nct6775_data *data = ctx;
+       struct i2c_client *client = data->driver_data;
+
+       if (bank != data->bank) {
+               ret = i2c_smbus_write_byte_data(client, NCT6775_REG_BANK, bank);
+               if (ret)
+                       return ret;
+               data->bank = bank;
+       }
+
+       ret = i2c_smbus_read_byte_data(client, reg & 0xff);
+       if (ret < 0)
+               return ret;
+       tmp = ret;
+
+       if (nct6775_reg_is_word_sized(data, reg)) {
+               ret = i2c_smbus_read_byte_data(client, (reg & 0xff) + 1);
+               if (ret < 0)
+                       return ret;
+               tmp = (tmp << 8) | ret;
+       }
+
+       *val = tmp;
+       return 0;
+}
+
+/*
+ * The write operation is a dummy so as not to disturb anything being done
+ * with the chip via LPC.
+ */
+static int nct6775_i2c_write(void *ctx, unsigned int reg, unsigned int value)
+{
+       struct nct6775_data *data = ctx;
+       struct i2c_client *client = data->driver_data;
+
+       dev_dbg(&client->dev, "skipping attempted write: %02x -> %03x\n", value, reg);
+
+       /*
+        * This is a lie, but writing anything but the bank-select register is
+        * something this driver shouldn't be doing.
+        */
+       return 0;
+}
+
+static const struct of_device_id __maybe_unused nct6775_i2c_of_match[] = {
+       { .compatible = "nuvoton,nct6106", .data = (void *)nct6106, },
+       { .compatible = "nuvoton,nct6116", .data = (void *)nct6116, },
+       { .compatible = "nuvoton,nct6775", .data = (void *)nct6775, },
+       { .compatible = "nuvoton,nct6776", .data = (void *)nct6776, },
+       { .compatible = "nuvoton,nct6779", .data = (void *)nct6779, },
+       { .compatible = "nuvoton,nct6791", .data = (void *)nct6791, },
+       { .compatible = "nuvoton,nct6792", .data = (void *)nct6792, },
+       { .compatible = "nuvoton,nct6793", .data = (void *)nct6793, },
+       { .compatible = "nuvoton,nct6795", .data = (void *)nct6795, },
+       { .compatible = "nuvoton,nct6796", .data = (void *)nct6796, },
+       { .compatible = "nuvoton,nct6797", .data = (void *)nct6797, },
+       { .compatible = "nuvoton,nct6798", .data = (void *)nct6798, },
+       { },
+};
+MODULE_DEVICE_TABLE(of, nct6775_i2c_of_match);
+
+static const struct i2c_device_id nct6775_i2c_id[] = {
+       { "nct6106", nct6106 },
+       { "nct6116", nct6116 },
+       { "nct6775", nct6775 },
+       { "nct6776", nct6776 },
+       { "nct6779", nct6779 },
+       { "nct6791", nct6791 },
+       { "nct6792", nct6792 },
+       { "nct6793", nct6793 },
+       { "nct6795", nct6795 },
+       { "nct6796", nct6796 },
+       { "nct6797", nct6797 },
+       { "nct6798", nct6798 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, nct6775_i2c_id);
+
+static int nct6775_i2c_probe_init(struct nct6775_data *data)
+{
+       u32 tsi_channel_mask;
+       struct i2c_client *client = data->driver_data;
+
+       /*
+        * The i2c interface doesn't provide access to the control registers
+        * needed to determine the presence of other fans, but fans 1 and 2
+        * are (in principle) always there.
+        *
+        * In practice this is perhaps a little silly, because the system
+        * using this driver is mostly likely a BMC, and hence probably has
+        * totally separate fan tachs & pwms of its own that are actually
+        * controlling/monitoring the fans -- these are thus unlikely to be
+        * doing anything actually useful.
+        */
+       data->has_fan = 0x03;
+       data->has_fan_min = 0x03;
+       data->has_pwm = 0x03;
+
+       /*
+        * Because on a BMC this driver may be bound very shortly after power
+        * is first applied to the device, the automatic TSI channel detection
+        * in nct6775_probe() (which has already been run at this point) may
+        * not find anything if a channel hasn't yet produced a temperature
+        * reading.  Augment whatever was found via autodetection (if
+        * anything) with the channels DT says should be active.
+        */
+       if (!of_property_read_u32(client->dev.of_node, "nuvoton,tsi-channel-mask",
+                                 &tsi_channel_mask))
+               data->have_tsi_temp |= tsi_channel_mask & GENMASK(NUM_TSI_TEMP - 1, 0);
+
+       return 0;
+}
+
+static const struct regmap_config nct6775_i2c_regmap_config = {
+       .reg_bits = 16,
+       .val_bits = 16,
+       .reg_read = nct6775_i2c_read,
+       .reg_write = nct6775_i2c_write,
+};
+
+static int nct6775_i2c_probe(struct i2c_client *client)
+{
+       struct nct6775_data *data;
+       const struct of_device_id *of_id;
+       const struct i2c_device_id *i2c_id;
+       struct device *dev = &client->dev;
+
+       of_id = of_match_device(nct6775_i2c_of_match, dev);
+       i2c_id = i2c_match_id(nct6775_i2c_id, client);
+
+       if (of_id && (unsigned long)of_id->data != i2c_id->driver_data)
+               dev_notice(dev, "Device mismatch: %s in device tree, %s detected\n",
+                          of_id->name, i2c_id->name);
+
+       data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->kind = i2c_id->driver_data;
+
+       data->read_only = true;
+       data->driver_data = client;
+       data->driver_init = nct6775_i2c_probe_init;
+
+       return nct6775_probe(dev, data, &nct6775_i2c_regmap_config);
+}
+
+static struct i2c_driver nct6775_i2c_driver = {
+       .class = I2C_CLASS_HWMON,
+       .driver = {
+               .name = "nct6775-i2c",
+               .of_match_table = of_match_ptr(nct6775_i2c_of_match),
+       },
+       .probe_new = nct6775_i2c_probe,
+       .id_table = nct6775_i2c_id,
+};
+
+module_i2c_driver(nct6775_i2c_driver);
+
+MODULE_AUTHOR("Zev Weiss <zev@bewilderbeest.net>");
+MODULE_DESCRIPTION("I2C driver for NCT6775F and compatible chips");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(HWMON_NCT6775);
diff --git a/drivers/hwmon/nct6775-platform.c b/drivers/hwmon/nct6775-platform.c
new file mode 100644 (file)
index 0000000..6d46c94
--- /dev/null
@@ -0,0 +1,1229 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * nct6775 - Platform driver for the hardware monitoring
+ *          functionality of Nuvoton NCT677x Super-I/O chips
+ *
+ * Copyright (C) 2012  Guenter Roeck <linux@roeck-us.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/hwmon-vid.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/wmi.h>
+
+#include "nct6775.h"
+
+enum sensor_access { access_direct, access_asuswmi };
+
+static const char * const nct6775_sio_names[] __initconst = {
+       "NCT6106D",
+       "NCT6116D",
+       "NCT6775F",
+       "NCT6776D/F",
+       "NCT6779D",
+       "NCT6791D",
+       "NCT6792D",
+       "NCT6793D",
+       "NCT6795D",
+       "NCT6796D",
+       "NCT6797D",
+       "NCT6798D",
+};
+
+static unsigned short force_id;
+module_param(force_id, ushort, 0);
+MODULE_PARM_DESC(force_id, "Override the detected device ID");
+
+static unsigned short fan_debounce;
+module_param(fan_debounce, ushort, 0);
+MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal");
+
+#define DRVNAME "nct6775"
+
+#define NCT6775_PORT_CHIPID    0x58
+
+/*
+ * ISA constants
+ */
+
+#define IOREGION_ALIGNMENT     (~7)
+#define IOREGION_OFFSET                5
+#define IOREGION_LENGTH                2
+#define ADDR_REG_OFFSET                0
+#define DATA_REG_OFFSET                1
+
+/*
+ * Super-I/O constants and functions
+ */
+
+#define NCT6775_LD_ACPI                0x0a
+#define NCT6775_LD_HWM         0x0b
+#define NCT6775_LD_VID         0x0d
+#define NCT6775_LD_12          0x12
+
+#define SIO_REG_LDSEL          0x07    /* Logical device select */
+#define SIO_REG_DEVID          0x20    /* Device ID (2 bytes) */
+#define SIO_REG_ENABLE         0x30    /* Logical device enable */
+#define SIO_REG_ADDR           0x60    /* Logical device address (2 bytes) */
+
+#define SIO_NCT6106_ID         0xc450
+#define SIO_NCT6116_ID         0xd280
+#define SIO_NCT6775_ID         0xb470
+#define SIO_NCT6776_ID         0xc330
+#define SIO_NCT6779_ID         0xc560
+#define SIO_NCT6791_ID         0xc800
+#define SIO_NCT6792_ID         0xc910
+#define SIO_NCT6793_ID         0xd120
+#define SIO_NCT6795_ID         0xd350
+#define SIO_NCT6796_ID         0xd420
+#define SIO_NCT6797_ID         0xd450
+#define SIO_NCT6798_ID         0xd428
+#define SIO_ID_MASK            0xFFF8
+
+/*
+ * Control registers
+ */
+#define NCT6775_REG_CR_FAN_DEBOUNCE    0xf0
+
+struct nct6775_sio_data {
+       int sioreg;
+       int ld;
+       enum kinds kind;
+       enum sensor_access access;
+
+       /* superio_() callbacks  */
+       void (*sio_outb)(struct nct6775_sio_data *sio_data, int reg, int val);
+       int (*sio_inb)(struct nct6775_sio_data *sio_data, int reg);
+       void (*sio_select)(struct nct6775_sio_data *sio_data, int ld);
+       int (*sio_enter)(struct nct6775_sio_data *sio_data);
+       void (*sio_exit)(struct nct6775_sio_data *sio_data);
+};
+
+#define ASUSWMI_MONITORING_GUID                "466747A0-70EC-11DE-8A39-0800200C9A66"
+#define ASUSWMI_METHODID_RSIO          0x5253494F
+#define ASUSWMI_METHODID_WSIO          0x5753494F
+#define ASUSWMI_METHODID_RHWM          0x5248574D
+#define ASUSWMI_METHODID_WHWM          0x5748574D
+#define ASUSWMI_UNSUPPORTED_METHOD     0xFFFFFFFE
+
+static int nct6775_asuswmi_evaluate_method(u32 method_id, u8 bank, u8 reg, u8 val, u32 *retval)
+{
+#if IS_ENABLED(CONFIG_ACPI_WMI)
+       u32 args = bank | (reg << 8) | (val << 16);
+       struct acpi_buffer input = { (acpi_size) sizeof(args), &args };
+       struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+       acpi_status status;
+       union acpi_object *obj;
+       u32 tmp = ASUSWMI_UNSUPPORTED_METHOD;
+
+       status = wmi_evaluate_method(ASUSWMI_MONITORING_GUID, 0,
+                                    method_id, &input, &output);
+
+       if (ACPI_FAILURE(status))
+               return -EIO;
+
+       obj = output.pointer;
+       if (obj && obj->type == ACPI_TYPE_INTEGER)
+               tmp = obj->integer.value;
+
+       if (retval)
+               *retval = tmp;
+
+       kfree(obj);
+
+       if (tmp == ASUSWMI_UNSUPPORTED_METHOD)
+               return -ENODEV;
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
+static inline int nct6775_asuswmi_write(u8 bank, u8 reg, u8 val)
+{
+       return nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_WHWM, bank,
+                                             reg, val, NULL);
+}
+
+static inline int nct6775_asuswmi_read(u8 bank, u8 reg, u8 *val)
+{
+       u32 ret, tmp = 0;
+
+       ret = nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_RHWM, bank,
+                                             reg, 0, &tmp);
+       *val = tmp;
+       return ret;
+}
+
+static int superio_wmi_inb(struct nct6775_sio_data *sio_data, int reg)
+{
+       int tmp = 0;
+
+       nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_RSIO, sio_data->ld,
+                                       reg, 0, &tmp);
+       return tmp;
+}
+
+static void superio_wmi_outb(struct nct6775_sio_data *sio_data, int reg, int val)
+{
+       nct6775_asuswmi_evaluate_method(ASUSWMI_METHODID_WSIO, sio_data->ld,
+                                       reg, val, NULL);
+}
+
+static void superio_wmi_select(struct nct6775_sio_data *sio_data, int ld)
+{
+       sio_data->ld = ld;
+}
+
+static int superio_wmi_enter(struct nct6775_sio_data *sio_data)
+{
+       return 0;
+}
+
+static void superio_wmi_exit(struct nct6775_sio_data *sio_data)
+{
+}
+
+static void superio_outb(struct nct6775_sio_data *sio_data, int reg, int val)
+{
+       int ioreg = sio_data->sioreg;
+
+       outb(reg, ioreg);
+       outb(val, ioreg + 1);
+}
+
+static int superio_inb(struct nct6775_sio_data *sio_data, int reg)
+{
+       int ioreg = sio_data->sioreg;
+
+       outb(reg, ioreg);
+       return inb(ioreg + 1);
+}
+
+static void superio_select(struct nct6775_sio_data *sio_data, int ld)
+{
+       int ioreg = sio_data->sioreg;
+
+       outb(SIO_REG_LDSEL, ioreg);
+       outb(ld, ioreg + 1);
+}
+
+static int superio_enter(struct nct6775_sio_data *sio_data)
+{
+       int ioreg = sio_data->sioreg;
+
+       /*
+        * Try to reserve <ioreg> and <ioreg + 1> for exclusive access.
+        */
+       if (!request_muxed_region(ioreg, 2, DRVNAME))
+               return -EBUSY;
+
+       outb(0x87, ioreg);
+       outb(0x87, ioreg);
+
+       return 0;
+}
+
+static void superio_exit(struct nct6775_sio_data *sio_data)
+{
+       int ioreg = sio_data->sioreg;
+
+       outb(0xaa, ioreg);
+       outb(0x02, ioreg);
+       outb(0x02, ioreg + 1);
+       release_region(ioreg, 2);
+}
+
+static inline void nct6775_wmi_set_bank(struct nct6775_data *data, u16 reg)
+{
+       u8 bank = reg >> 8;
+
+       data->bank = bank;
+}
+
+static int nct6775_wmi_reg_read(void *ctx, unsigned int reg, unsigned int *val)
+{
+       struct nct6775_data *data = ctx;
+       int err, word_sized = nct6775_reg_is_word_sized(data, reg);
+       u8 tmp = 0;
+       u16 res;
+
+       nct6775_wmi_set_bank(data, reg);
+
+       err = nct6775_asuswmi_read(data->bank, reg & 0xff, &tmp);
+       if (err)
+               return err;
+
+       res = tmp;
+       if (word_sized) {
+               err = nct6775_asuswmi_read(data->bank, (reg & 0xff) + 1, &tmp);
+               if (err)
+                       return err;
+
+               res = (res << 8) + tmp;
+       }
+       *val = res;
+       return 0;
+}
+
+static int nct6775_wmi_reg_write(void *ctx, unsigned int reg, unsigned int value)
+{
+       struct nct6775_data *data = ctx;
+       int res, word_sized = nct6775_reg_is_word_sized(data, reg);
+
+       nct6775_wmi_set_bank(data, reg);
+
+       if (word_sized) {
+               res = nct6775_asuswmi_write(data->bank, reg & 0xff, value >> 8);
+               if (res)
+                       return res;
+
+               res = nct6775_asuswmi_write(data->bank, (reg & 0xff) + 1, value);
+       } else {
+               res = nct6775_asuswmi_write(data->bank, reg & 0xff, value);
+       }
+
+       return res;
+}
+
+/*
+ * On older chips, only registers 0x50-0x5f are banked.
+ * On more recent chips, all registers are banked.
+ * Assume that is the case and set the bank number for each access.
+ * Cache the bank number so it only needs to be set if it changes.
+ */
+static inline void nct6775_set_bank(struct nct6775_data *data, u16 reg)
+{
+       u8 bank = reg >> 8;
+
+       if (data->bank != bank) {
+               outb_p(NCT6775_REG_BANK, data->addr + ADDR_REG_OFFSET);
+               outb_p(bank, data->addr + DATA_REG_OFFSET);
+               data->bank = bank;
+       }
+}
+
+static int nct6775_reg_read(void *ctx, unsigned int reg, unsigned int *val)
+{
+       struct nct6775_data *data = ctx;
+       int word_sized = nct6775_reg_is_word_sized(data, reg);
+
+       nct6775_set_bank(data, reg);
+       outb_p(reg & 0xff, data->addr + ADDR_REG_OFFSET);
+       *val = inb_p(data->addr + DATA_REG_OFFSET);
+       if (word_sized) {
+               outb_p((reg & 0xff) + 1,
+                      data->addr + ADDR_REG_OFFSET);
+               *val = (*val << 8) + inb_p(data->addr + DATA_REG_OFFSET);
+       }
+       return 0;
+}
+
+static int nct6775_reg_write(void *ctx, unsigned int reg, unsigned int value)
+{
+       struct nct6775_data *data = ctx;
+       int word_sized = nct6775_reg_is_word_sized(data, reg);
+
+       nct6775_set_bank(data, reg);
+       outb_p(reg & 0xff, data->addr + ADDR_REG_OFFSET);
+       if (word_sized) {
+               outb_p(value >> 8, data->addr + DATA_REG_OFFSET);
+               outb_p((reg & 0xff) + 1,
+                      data->addr + ADDR_REG_OFFSET);
+       }
+       outb_p(value & 0xff, data->addr + DATA_REG_OFFSET);
+       return 0;
+}
+
+static void nct6791_enable_io_mapping(struct nct6775_sio_data *sio_data)
+{
+       int val;
+
+       val = sio_data->sio_inb(sio_data, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE);
+       if (val & 0x10) {
+               pr_info("Enabling hardware monitor logical device mappings.\n");
+               sio_data->sio_outb(sio_data, NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE,
+                              val & ~0x10);
+       }
+}
+
+static int __maybe_unused nct6775_suspend(struct device *dev)
+{
+       int err;
+       u16 tmp;
+       struct nct6775_data *data = dev_get_drvdata(dev);
+
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       mutex_lock(&data->update_lock);
+       err = nct6775_read_value(data, data->REG_VBAT, &tmp);
+       if (err)
+               goto out;
+       data->vbat = tmp;
+       if (data->kind == nct6775) {
+               err = nct6775_read_value(data, NCT6775_REG_FANDIV1, &tmp);
+               if (err)
+                       goto out;
+               data->fandiv1 = tmp;
+
+               err = nct6775_read_value(data, NCT6775_REG_FANDIV2, &tmp);
+               if (err)
+                       goto out;
+               data->fandiv2 = tmp;
+       }
+out:
+       mutex_unlock(&data->update_lock);
+
+       return err;
+}
+
+static int __maybe_unused nct6775_resume(struct device *dev)
+{
+       struct nct6775_data *data = dev_get_drvdata(dev);
+       struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
+       int i, j, err = 0;
+       u8 reg;
+
+       mutex_lock(&data->update_lock);
+       data->bank = 0xff;              /* Force initial bank selection */
+
+       err = sio_data->sio_enter(sio_data);
+       if (err)
+               goto abort;
+
+       sio_data->sio_select(sio_data, NCT6775_LD_HWM);
+       reg = sio_data->sio_inb(sio_data, SIO_REG_ENABLE);
+       if (reg != data->sio_reg_enable)
+               sio_data->sio_outb(sio_data, SIO_REG_ENABLE, data->sio_reg_enable);
+
+       if (data->kind == nct6791 || data->kind == nct6792 ||
+           data->kind == nct6793 || data->kind == nct6795 ||
+           data->kind == nct6796 || data->kind == nct6797 ||
+           data->kind == nct6798)
+               nct6791_enable_io_mapping(sio_data);
+
+       sio_data->sio_exit(sio_data);
+
+       /* Restore limits */
+       for (i = 0; i < data->in_num; i++) {
+               if (!(data->have_in & BIT(i)))
+                       continue;
+
+               err = nct6775_write_value(data, data->REG_IN_MINMAX[0][i], data->in[i][1]);
+               if (err)
+                       goto abort;
+               err = nct6775_write_value(data, data->REG_IN_MINMAX[1][i], data->in[i][2]);
+               if (err)
+                       goto abort;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(data->fan_min); i++) {
+               if (!(data->has_fan_min & BIT(i)))
+                       continue;
+
+               err = nct6775_write_value(data, data->REG_FAN_MIN[i], data->fan_min[i]);
+               if (err)
+                       goto abort;
+       }
+
+       for (i = 0; i < NUM_TEMP; i++) {
+               if (!(data->have_temp & BIT(i)))
+                       continue;
+
+               for (j = 1; j < ARRAY_SIZE(data->reg_temp); j++)
+                       if (data->reg_temp[j][i]) {
+                               err = nct6775_write_temp(data, data->reg_temp[j][i],
+                                                        data->temp[j][i]);
+                               if (err)
+                                       goto abort;
+                       }
+       }
+
+       /* Restore other settings */
+       err = nct6775_write_value(data, data->REG_VBAT, data->vbat);
+       if (err)
+               goto abort;
+       if (data->kind == nct6775) {
+               err = nct6775_write_value(data, NCT6775_REG_FANDIV1, data->fandiv1);
+               if (err)
+                       goto abort;
+               err = nct6775_write_value(data, NCT6775_REG_FANDIV2, data->fandiv2);
+       }
+
+abort:
+       /* Force re-reading all values */
+       data->valid = false;
+       mutex_unlock(&data->update_lock);
+
+       return err;
+}
+
+static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
+
+static void
+nct6775_check_fan_inputs(struct nct6775_data *data, struct nct6775_sio_data *sio_data)
+{
+       bool fan3pin = false, fan4pin = false, fan4min = false;
+       bool fan5pin = false, fan6pin = false, fan7pin = false;
+       bool pwm3pin = false, pwm4pin = false, pwm5pin = false;
+       bool pwm6pin = false, pwm7pin = false;
+
+       /* Store SIO_REG_ENABLE for use during resume */
+       sio_data->sio_select(sio_data, NCT6775_LD_HWM);
+       data->sio_reg_enable = sio_data->sio_inb(sio_data, SIO_REG_ENABLE);
+
+       /* fan4 and fan5 share some pins with the GPIO and serial flash */
+       if (data->kind == nct6775) {
+               int cr2c = sio_data->sio_inb(sio_data, 0x2c);
+
+               fan3pin = cr2c & BIT(6);
+               pwm3pin = cr2c & BIT(7);
+
+               /* On NCT6775, fan4 shares pins with the fdc interface */
+               fan4pin = !(sio_data->sio_inb(sio_data, 0x2A) & 0x80);
+       } else if (data->kind == nct6776) {
+               bool gpok = sio_data->sio_inb(sio_data, 0x27) & 0x80;
+               const char *board_vendor, *board_name;
+
+               board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+               board_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+               if (board_name && board_vendor &&
+                   !strcmp(board_vendor, "ASRock")) {
+                       /*
+                        * Auxiliary fan monitoring is not enabled on ASRock
+                        * Z77 Pro4-M if booted in UEFI Ultra-FastBoot mode.
+                        * Observed with BIOS version 2.00.
+                        */
+                       if (!strcmp(board_name, "Z77 Pro4-M")) {
+                               if ((data->sio_reg_enable & 0xe0) != 0xe0) {
+                                       data->sio_reg_enable |= 0xe0;
+                                       sio_data->sio_outb(sio_data, SIO_REG_ENABLE,
+                                                    data->sio_reg_enable);
+                               }
+                       }
+               }
+
+               if (data->sio_reg_enable & 0x80)
+                       fan3pin = gpok;
+               else
+                       fan3pin = !(sio_data->sio_inb(sio_data, 0x24) & 0x40);
+
+               if (data->sio_reg_enable & 0x40)
+                       fan4pin = gpok;
+               else
+                       fan4pin = sio_data->sio_inb(sio_data, 0x1C) & 0x01;
+
+               if (data->sio_reg_enable & 0x20)
+                       fan5pin = gpok;
+               else
+                       fan5pin = sio_data->sio_inb(sio_data, 0x1C) & 0x02;
+
+               fan4min = fan4pin;
+               pwm3pin = fan3pin;
+       } else if (data->kind == nct6106) {
+               int cr24 = sio_data->sio_inb(sio_data, 0x24);
+
+               fan3pin = !(cr24 & 0x80);
+               pwm3pin = cr24 & 0x08;
+       } else if (data->kind == nct6116) {
+               int cr1a = sio_data->sio_inb(sio_data, 0x1a);
+               int cr1b = sio_data->sio_inb(sio_data, 0x1b);
+               int cr24 = sio_data->sio_inb(sio_data, 0x24);
+               int cr2a = sio_data->sio_inb(sio_data, 0x2a);
+               int cr2b = sio_data->sio_inb(sio_data, 0x2b);
+               int cr2f = sio_data->sio_inb(sio_data, 0x2f);
+
+               fan3pin = !(cr2b & 0x10);
+               fan4pin = (cr2b & 0x80) ||                      // pin 1(2)
+                       (!(cr2f & 0x10) && (cr1a & 0x04));      // pin 65(66)
+               fan5pin = (cr2b & 0x80) ||                      // pin 126(127)
+                       (!(cr1b & 0x03) && (cr2a & 0x02));      // pin 94(96)
+
+               pwm3pin = fan3pin && (cr24 & 0x08);
+               pwm4pin = fan4pin;
+               pwm5pin = fan5pin;
+       } else {
+               /*
+                * NCT6779D, NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D,
+                * NCT6797D, NCT6798D
+                */
+               int cr1a = sio_data->sio_inb(sio_data, 0x1a);
+               int cr1b = sio_data->sio_inb(sio_data, 0x1b);
+               int cr1c = sio_data->sio_inb(sio_data, 0x1c);
+               int cr1d = sio_data->sio_inb(sio_data, 0x1d);
+               int cr2a = sio_data->sio_inb(sio_data, 0x2a);
+               int cr2b = sio_data->sio_inb(sio_data, 0x2b);
+               int cr2d = sio_data->sio_inb(sio_data, 0x2d);
+               int cr2f = sio_data->sio_inb(sio_data, 0x2f);
+               bool dsw_en = cr2f & BIT(3);
+               bool ddr4_en = cr2f & BIT(4);
+               int cre0;
+               int creb;
+               int cred;
+
+               sio_data->sio_select(sio_data, NCT6775_LD_12);
+               cre0 = sio_data->sio_inb(sio_data, 0xe0);
+               creb = sio_data->sio_inb(sio_data, 0xeb);
+               cred = sio_data->sio_inb(sio_data, 0xed);
+
+               fan3pin = !(cr1c & BIT(5));
+               fan4pin = !(cr1c & BIT(6));
+               fan5pin = !(cr1c & BIT(7));
+
+               pwm3pin = !(cr1c & BIT(0));
+               pwm4pin = !(cr1c & BIT(1));
+               pwm5pin = !(cr1c & BIT(2));
+
+               switch (data->kind) {
+               case nct6791:
+                       fan6pin = cr2d & BIT(1);
+                       pwm6pin = cr2d & BIT(0);
+                       break;
+               case nct6792:
+                       fan6pin = !dsw_en && (cr2d & BIT(1));
+                       pwm6pin = !dsw_en && (cr2d & BIT(0));
+                       break;
+               case nct6793:
+                       fan5pin |= cr1b & BIT(5);
+                       fan5pin |= creb & BIT(5);
+
+                       fan6pin = !dsw_en && (cr2d & BIT(1));
+                       fan6pin |= creb & BIT(3);
+
+                       pwm5pin |= cr2d & BIT(7);
+                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
+
+                       pwm6pin = !dsw_en && (cr2d & BIT(0));
+                       pwm6pin |= creb & BIT(2);
+                       break;
+               case nct6795:
+                       fan5pin |= cr1b & BIT(5);
+                       fan5pin |= creb & BIT(5);
+
+                       fan6pin = (cr2a & BIT(4)) &&
+                                       (!dsw_en || (cred & BIT(4)));
+                       fan6pin |= creb & BIT(3);
+
+                       pwm5pin |= cr2d & BIT(7);
+                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
+
+                       pwm6pin = (cr2a & BIT(3)) && (cred & BIT(2));
+                       pwm6pin |= creb & BIT(2);
+                       break;
+               case nct6796:
+                       fan5pin |= cr1b & BIT(5);
+                       fan5pin |= (cre0 & BIT(3)) && !(cr1b & BIT(0));
+                       fan5pin |= creb & BIT(5);
+
+                       fan6pin = (cr2a & BIT(4)) &&
+                                       (!dsw_en || (cred & BIT(4)));
+                       fan6pin |= creb & BIT(3);
+
+                       fan7pin = !(cr2b & BIT(2));
+
+                       pwm5pin |= cr2d & BIT(7);
+                       pwm5pin |= (cre0 & BIT(4)) && !(cr1b & BIT(0));
+                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
+
+                       pwm6pin = (cr2a & BIT(3)) && (cred & BIT(2));
+                       pwm6pin |= creb & BIT(2);
+
+                       pwm7pin = !(cr1d & (BIT(2) | BIT(3)));
+                       break;
+               case nct6797:
+                       fan5pin |= !ddr4_en && (cr1b & BIT(5));
+                       fan5pin |= creb & BIT(5);
+
+                       fan6pin = cr2a & BIT(4);
+                       fan6pin |= creb & BIT(3);
+
+                       fan7pin = cr1a & BIT(1);
+
+                       pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
+                       pwm5pin |= !ddr4_en && (cr2d & BIT(7));
+
+                       pwm6pin = creb & BIT(2);
+                       pwm6pin |= cred & BIT(2);
+
+                       pwm7pin = cr1d & BIT(4);
+                       break;
+               case nct6798:
+                       fan6pin = !(cr1b & BIT(0)) && (cre0 & BIT(3));
+                       fan6pin |= cr2a & BIT(4);
+                       fan6pin |= creb & BIT(5);
+
+                       fan7pin = cr1b & BIT(5);
+                       fan7pin |= !(cr2b & BIT(2));
+                       fan7pin |= creb & BIT(3);
+
+                       pwm6pin = !(cr1b & BIT(0)) && (cre0 & BIT(4));
+                       pwm6pin |= !(cred & BIT(2)) && (cr2a & BIT(3));
+                       pwm6pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
+
+                       pwm7pin = !(cr1d & (BIT(2) | BIT(3)));
+                       pwm7pin |= cr2d & BIT(7);
+                       pwm7pin |= creb & BIT(2);
+                       break;
+               default:        /* NCT6779D */
+                       break;
+               }
+
+               fan4min = fan4pin;
+       }
+
+       /* fan 1 and 2 (0x03) are always present */
+       data->has_fan = 0x03 | (fan3pin << 2) | (fan4pin << 3) |
+               (fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
+       data->has_fan_min = 0x03 | (fan3pin << 2) | (fan4min << 3) |
+               (fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
+       data->has_pwm = 0x03 | (pwm3pin << 2) | (pwm4pin << 3) |
+               (pwm5pin << 4) | (pwm6pin << 5) | (pwm7pin << 6);
+}
+
+static ssize_t
+cpu0_vid_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct nct6775_data *data = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d\n", vid_from_reg(data->vid, data->vrm));
+}
+
+static DEVICE_ATTR_RO(cpu0_vid);
+
+/* Case open detection */
+
+static const u8 NCT6775_REG_CR_CASEOPEN_CLR[] = { 0xe6, 0xee };
+static const u8 NCT6775_CR_CASEOPEN_CLR_MASK[] = { 0x20, 0x01 };
+
+static ssize_t
+clear_caseopen(struct device *dev, struct device_attribute *attr,
+              const char *buf, size_t count)
+{
+       struct nct6775_data *data = dev_get_drvdata(dev);
+       struct nct6775_sio_data *sio_data = data->driver_data;
+       int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE;
+       unsigned long val;
+       u8 reg;
+       int ret;
+
+       if (kstrtoul(buf, 10, &val) || val != 0)
+               return -EINVAL;
+
+       mutex_lock(&data->update_lock);
+
+       /*
+        * Use CR registers to clear caseopen status.
+        * The CR registers are the same for all chips, and not all chips
+        * support clearing the caseopen status through "regular" registers.
+        */
+       ret = sio_data->sio_enter(sio_data);
+       if (ret) {
+               count = ret;
+               goto error;
+       }
+
+       sio_data->sio_select(sio_data, NCT6775_LD_ACPI);
+       reg = sio_data->sio_inb(sio_data, NCT6775_REG_CR_CASEOPEN_CLR[nr]);
+       reg |= NCT6775_CR_CASEOPEN_CLR_MASK[nr];
+       sio_data->sio_outb(sio_data, NCT6775_REG_CR_CASEOPEN_CLR[nr], reg);
+       reg &= ~NCT6775_CR_CASEOPEN_CLR_MASK[nr];
+       sio_data->sio_outb(sio_data, NCT6775_REG_CR_CASEOPEN_CLR[nr], reg);
+       sio_data->sio_exit(sio_data);
+
+       data->valid = false;    /* Force cache refresh */
+error:
+       mutex_unlock(&data->update_lock);
+       return count;
+}
+
+static SENSOR_DEVICE_ATTR(intrusion0_alarm, 0644, nct6775_show_alarm,
+                         clear_caseopen, INTRUSION_ALARM_BASE);
+static SENSOR_DEVICE_ATTR(intrusion1_alarm, 0644, nct6775_show_alarm,
+                         clear_caseopen, INTRUSION_ALARM_BASE + 1);
+static SENSOR_DEVICE_ATTR(intrusion0_beep, 0644, nct6775_show_beep,
+                         nct6775_store_beep, INTRUSION_ALARM_BASE);
+static SENSOR_DEVICE_ATTR(intrusion1_beep, 0644, nct6775_show_beep,
+                         nct6775_store_beep, INTRUSION_ALARM_BASE + 1);
+static SENSOR_DEVICE_ATTR(beep_enable, 0644, nct6775_show_beep,
+                         nct6775_store_beep, BEEP_ENABLE_BASE);
+
+static umode_t nct6775_other_is_visible(struct kobject *kobj,
+                                       struct attribute *attr, int index)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct nct6775_data *data = dev_get_drvdata(dev);
+
+       if (index == 0 && !data->have_vid)
+               return 0;
+
+       if (index == 1 || index == 2) {
+               if (data->ALARM_BITS[INTRUSION_ALARM_BASE + index - 1] < 0)
+                       return 0;
+       }
+
+       if (index == 3 || index == 4) {
+               if (data->BEEP_BITS[INTRUSION_ALARM_BASE + index - 3] < 0)
+                       return 0;
+       }
+
+       return nct6775_attr_mode(data, attr);
+}
+
+/*
+ * nct6775_other_is_visible uses the index into the following array
+ * to determine if attributes should be created or not.
+ * Any change in order or content must be matched.
+ */
+static struct attribute *nct6775_attributes_other[] = {
+       &dev_attr_cpu0_vid.attr,                                /* 0 */
+       &sensor_dev_attr_intrusion0_alarm.dev_attr.attr,        /* 1 */
+       &sensor_dev_attr_intrusion1_alarm.dev_attr.attr,        /* 2 */
+       &sensor_dev_attr_intrusion0_beep.dev_attr.attr,         /* 3 */
+       &sensor_dev_attr_intrusion1_beep.dev_attr.attr,         /* 4 */
+       &sensor_dev_attr_beep_enable.dev_attr.attr,             /* 5 */
+
+       NULL
+};
+
+static const struct attribute_group nct6775_group_other = {
+       .attrs = nct6775_attributes_other,
+       .is_visible = nct6775_other_is_visible,
+};
+
+static int nct6775_platform_probe_init(struct nct6775_data *data)
+{
+       int err;
+       u8 cr2a;
+       struct nct6775_sio_data *sio_data = data->driver_data;
+
+       err = sio_data->sio_enter(sio_data);
+       if (err)
+               return err;
+
+       cr2a = sio_data->sio_inb(sio_data, 0x2a);
+       switch (data->kind) {
+       case nct6775:
+               data->have_vid = (cr2a & 0x40);
+               break;
+       case nct6776:
+               data->have_vid = (cr2a & 0x60) == 0x40;
+               break;
+       case nct6106:
+       case nct6116:
+       case nct6779:
+       case nct6791:
+       case nct6792:
+       case nct6793:
+       case nct6795:
+       case nct6796:
+       case nct6797:
+       case nct6798:
+               break;
+       }
+
+       /*
+        * Read VID value
+        * We can get the VID input values directly at logical device D 0xe3.
+        */
+       if (data->have_vid) {
+               sio_data->sio_select(sio_data, NCT6775_LD_VID);
+               data->vid = sio_data->sio_inb(sio_data, 0xe3);
+               data->vrm = vid_which_vrm();
+       }
+
+       if (fan_debounce) {
+               u8 tmp;
+
+               sio_data->sio_select(sio_data, NCT6775_LD_HWM);
+               tmp = sio_data->sio_inb(sio_data,
+                                   NCT6775_REG_CR_FAN_DEBOUNCE);
+               switch (data->kind) {
+               case nct6106:
+               case nct6116:
+                       tmp |= 0xe0;
+                       break;
+               case nct6775:
+                       tmp |= 0x1e;
+                       break;
+               case nct6776:
+               case nct6779:
+                       tmp |= 0x3e;
+                       break;
+               case nct6791:
+               case nct6792:
+               case nct6793:
+               case nct6795:
+               case nct6796:
+               case nct6797:
+               case nct6798:
+                       tmp |= 0x7e;
+                       break;
+               }
+               sio_data->sio_outb(sio_data, NCT6775_REG_CR_FAN_DEBOUNCE,
+                            tmp);
+               pr_info("Enabled fan debounce for chip %s\n", data->name);
+       }
+
+       nct6775_check_fan_inputs(data, sio_data);
+
+       sio_data->sio_exit(sio_data);
+
+       return nct6775_add_attr_group(data, &nct6775_group_other);
+}
+
+static const struct regmap_config nct6775_regmap_config = {
+       .reg_bits = 16,
+       .val_bits = 16,
+       .reg_read = nct6775_reg_read,
+       .reg_write = nct6775_reg_write,
+};
+
+static const struct regmap_config nct6775_wmi_regmap_config = {
+       .reg_bits = 16,
+       .val_bits = 16,
+       .reg_read = nct6775_wmi_reg_read,
+       .reg_write = nct6775_wmi_reg_write,
+};
+
+static int nct6775_platform_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct nct6775_sio_data *sio_data = dev_get_platdata(dev);
+       struct nct6775_data *data;
+       struct resource *res;
+       const struct regmap_config *regmapcfg;
+
+       if (sio_data->access == access_direct) {
+               res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+               if (!devm_request_region(&pdev->dev, res->start, IOREGION_LENGTH, DRVNAME))
+                       return -EBUSY;
+       }
+
+       data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->kind = sio_data->kind;
+       data->sioreg = sio_data->sioreg;
+
+       if (sio_data->access == access_direct) {
+               data->addr = res->start;
+               regmapcfg = &nct6775_regmap_config;
+       } else {
+               regmapcfg = &nct6775_wmi_regmap_config;
+       }
+
+       platform_set_drvdata(pdev, data);
+
+       data->driver_data = sio_data;
+       data->driver_init = nct6775_platform_probe_init;
+
+       return nct6775_probe(&pdev->dev, data, regmapcfg);
+}
+
+static struct platform_driver nct6775_driver = {
+       .driver = {
+               .name   = DRVNAME,
+               .pm     = &nct6775_dev_pm_ops,
+       },
+       .probe          = nct6775_platform_probe,
+};
+
+/* nct6775_find() looks for a '627 in the Super-I/O config space */
+static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
+{
+       u16 val;
+       int err;
+       int addr;
+
+       sio_data->access = access_direct;
+       sio_data->sioreg = sioaddr;
+
+       err = sio_data->sio_enter(sio_data);
+       if (err)
+               return err;
+
+       val = (sio_data->sio_inb(sio_data, SIO_REG_DEVID) << 8) |
+               sio_data->sio_inb(sio_data, SIO_REG_DEVID + 1);
+       if (force_id && val != 0xffff)
+               val = force_id;
+
+       switch (val & SIO_ID_MASK) {
+       case SIO_NCT6106_ID:
+               sio_data->kind = nct6106;
+               break;
+       case SIO_NCT6116_ID:
+               sio_data->kind = nct6116;
+               break;
+       case SIO_NCT6775_ID:
+               sio_data->kind = nct6775;
+               break;
+       case SIO_NCT6776_ID:
+               sio_data->kind = nct6776;
+               break;
+       case SIO_NCT6779_ID:
+               sio_data->kind = nct6779;
+               break;
+       case SIO_NCT6791_ID:
+               sio_data->kind = nct6791;
+               break;
+       case SIO_NCT6792_ID:
+               sio_data->kind = nct6792;
+               break;
+       case SIO_NCT6793_ID:
+               sio_data->kind = nct6793;
+               break;
+       case SIO_NCT6795_ID:
+               sio_data->kind = nct6795;
+               break;
+       case SIO_NCT6796_ID:
+               sio_data->kind = nct6796;
+               break;
+       case SIO_NCT6797_ID:
+               sio_data->kind = nct6797;
+               break;
+       case SIO_NCT6798_ID:
+               sio_data->kind = nct6798;
+               break;
+       default:
+               if (val != 0xffff)
+                       pr_debug("unsupported chip ID: 0x%04x\n", val);
+               sio_data->sio_exit(sio_data);
+               return -ENODEV;
+       }
+
+       /* We have a known chip, find the HWM I/O address */
+       sio_data->sio_select(sio_data, NCT6775_LD_HWM);
+       val = (sio_data->sio_inb(sio_data, SIO_REG_ADDR) << 8)
+           | sio_data->sio_inb(sio_data, SIO_REG_ADDR + 1);
+       addr = val & IOREGION_ALIGNMENT;
+       if (addr == 0) {
+               pr_err("Refusing to enable a Super-I/O device with a base I/O port 0\n");
+               sio_data->sio_exit(sio_data);
+               return -ENODEV;
+       }
+
+       /* Activate logical device if needed */
+       val = sio_data->sio_inb(sio_data, SIO_REG_ENABLE);
+       if (!(val & 0x01)) {
+               pr_warn("Forcibly enabling Super-I/O. Sensor is probably unusable.\n");
+               sio_data->sio_outb(sio_data, SIO_REG_ENABLE, val | 0x01);
+       }
+
+       if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
+           sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
+           sio_data->kind == nct6796 || sio_data->kind == nct6797 ||
+           sio_data->kind == nct6798)
+               nct6791_enable_io_mapping(sio_data);
+
+       sio_data->sio_exit(sio_data);
+       pr_info("Found %s or compatible chip at %#x:%#x\n",
+               nct6775_sio_names[sio_data->kind], sioaddr, addr);
+
+       return addr;
+}
+
+/*
+ * when Super-I/O functions move to a separate file, the Super-I/O
+ * bus will manage the lifetime of the device and this module will only keep
+ * track of the nct6775 driver. But since we use platform_device_alloc(), we
+ * must keep track of the device
+ */
+static struct platform_device *pdev[2];
+
+static const char * const asus_wmi_boards[] = {
+       "PRO H410T",
+       "ProArt X570-CREATOR WIFI",
+       "Pro B550M-C",
+       "Pro WS X570-ACE",
+       "PRIME B360-PLUS",
+       "PRIME B460-PLUS",
+       "PRIME B550-PLUS",
+       "PRIME B550M-A",
+       "PRIME B550M-A (WI-FI)",
+       "PRIME H410M-R",
+       "PRIME X570-P",
+       "PRIME X570-PRO",
+       "ROG CROSSHAIR VIII DARK HERO",
+       "ROG CROSSHAIR VIII FORMULA",
+       "ROG CROSSHAIR VIII HERO",
+       "ROG CROSSHAIR VIII IMPACT",
+       "ROG STRIX B550-A GAMING",
+       "ROG STRIX B550-E GAMING",
+       "ROG STRIX B550-F GAMING",
+       "ROG STRIX B550-F GAMING (WI-FI)",
+       "ROG STRIX B550-F GAMING WIFI II",
+       "ROG STRIX B550-I GAMING",
+       "ROG STRIX B550-XE GAMING (WI-FI)",
+       "ROG STRIX X570-E GAMING",
+       "ROG STRIX X570-E GAMING WIFI II",
+       "ROG STRIX X570-F GAMING",
+       "ROG STRIX X570-I GAMING",
+       "ROG STRIX Z390-E GAMING",
+       "ROG STRIX Z390-F GAMING",
+       "ROG STRIX Z390-H GAMING",
+       "ROG STRIX Z390-I GAMING",
+       "ROG STRIX Z490-A GAMING",
+       "ROG STRIX Z490-E GAMING",
+       "ROG STRIX Z490-F GAMING",
+       "ROG STRIX Z490-G GAMING",
+       "ROG STRIX Z490-G GAMING (WI-FI)",
+       "ROG STRIX Z490-H GAMING",
+       "ROG STRIX Z490-I GAMING",
+       "TUF GAMING B550M-PLUS",
+       "TUF GAMING B550M-PLUS (WI-FI)",
+       "TUF GAMING B550-PLUS",
+       "TUF GAMING B550-PRO",
+       "TUF GAMING X570-PLUS",
+       "TUF GAMING X570-PLUS (WI-FI)",
+       "TUF GAMING X570-PRO (WI-FI)",
+       "TUF GAMING Z490-PLUS",
+       "TUF GAMING Z490-PLUS (WI-FI)",
+};
+
+static int __init sensors_nct6775_platform_init(void)
+{
+       int i, err;
+       bool found = false;
+       int address;
+       struct resource res;
+       struct nct6775_sio_data sio_data;
+       int sioaddr[2] = { 0x2e, 0x4e };
+       enum sensor_access access = access_direct;
+       const char *board_vendor, *board_name;
+       u8 tmp;
+
+       err = platform_driver_register(&nct6775_driver);
+       if (err)
+               return err;
+
+       board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+       board_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+       if (board_name && board_vendor &&
+           !strcmp(board_vendor, "ASUSTeK COMPUTER INC.")) {
+               err = match_string(asus_wmi_boards, ARRAY_SIZE(asus_wmi_boards),
+                                  board_name);
+               if (err >= 0) {
+                       /* if reading chip id via WMI succeeds, use WMI */
+                       if (!nct6775_asuswmi_read(0, NCT6775_PORT_CHIPID, &tmp) && tmp) {
+                               pr_info("Using Asus WMI to access %#x chip.\n", tmp);
+                               access = access_asuswmi;
+                       } else {
+                               pr_err("Can't read ChipID by Asus WMI.\n");
+                       }
+               }
+       }
+
+       /*
+        * initialize sio_data->kind and sio_data->sioreg.
+        *
+        * when Super-I/O functions move to a separate file, the Super-I/O
+        * driver will probe 0x2e and 0x4e and auto-detect the presence of a
+        * nct6775 hardware monitor, and call probe()
+        */
+       for (i = 0; i < ARRAY_SIZE(pdev); i++) {
+               sio_data.sio_outb = superio_outb;
+               sio_data.sio_inb = superio_inb;
+               sio_data.sio_select = superio_select;
+               sio_data.sio_enter = superio_enter;
+               sio_data.sio_exit = superio_exit;
+
+               address = nct6775_find(sioaddr[i], &sio_data);
+               if (address <= 0)
+                       continue;
+
+               found = true;
+
+               sio_data.access = access;
+
+               if (access == access_asuswmi) {
+                       sio_data.sio_outb = superio_wmi_outb;
+                       sio_data.sio_inb = superio_wmi_inb;
+                       sio_data.sio_select = superio_wmi_select;
+                       sio_data.sio_enter = superio_wmi_enter;
+                       sio_data.sio_exit = superio_wmi_exit;
+               }
+
+               pdev[i] = platform_device_alloc(DRVNAME, address);
+               if (!pdev[i]) {
+                       err = -ENOMEM;
+                       goto exit_device_unregister;
+               }
+
+               err = platform_device_add_data(pdev[i], &sio_data,
+                                              sizeof(struct nct6775_sio_data));
+               if (err)
+                       goto exit_device_put;
+
+               if (sio_data.access == access_direct) {
+                       memset(&res, 0, sizeof(res));
+                       res.name = DRVNAME;
+                       res.start = address + IOREGION_OFFSET;
+                       res.end = address + IOREGION_OFFSET + IOREGION_LENGTH - 1;
+                       res.flags = IORESOURCE_IO;
+
+                       err = acpi_check_resource_conflict(&res);
+                       if (err) {
+                               platform_device_put(pdev[i]);
+                               pdev[i] = NULL;
+                               continue;
+                       }
+
+                       err = platform_device_add_resources(pdev[i], &res, 1);
+                       if (err)
+                               goto exit_device_put;
+               }
+
+               /* platform_device_add calls probe() */
+               err = platform_device_add(pdev[i]);
+               if (err)
+                       goto exit_device_put;
+       }
+       if (!found) {
+               err = -ENODEV;
+               goto exit_unregister;
+       }
+
+       return 0;
+
+exit_device_put:
+       platform_device_put(pdev[i]);
+exit_device_unregister:
+       while (--i >= 0) {
+               if (pdev[i])
+                       platform_device_unregister(pdev[i]);
+       }
+exit_unregister:
+       platform_driver_unregister(&nct6775_driver);
+       return err;
+}
+
+static void __exit sensors_nct6775_platform_exit(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(pdev); i++) {
+               if (pdev[i])
+                       platform_device_unregister(pdev[i]);
+       }
+       platform_driver_unregister(&nct6775_driver);
+}
+
+MODULE_AUTHOR("Guenter Roeck <linux@roeck-us.net>");
+MODULE_DESCRIPTION("Platform driver for NCT6775F and compatible chips");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(HWMON_NCT6775);
+
+module_init(sensors_nct6775_platform_init);
+module_exit(sensors_nct6775_platform_exit);
diff --git a/drivers/hwmon/nct6775.h b/drivers/hwmon/nct6775.h
new file mode 100644 (file)
index 0000000..93f7081
--- /dev/null
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __HWMON_NCT6775_H__
+#define __HWMON_NCT6775_H__
+
+#include <linux/types.h>
+
+enum kinds { nct6106, nct6116, nct6775, nct6776, nct6779, nct6791, nct6792,
+            nct6793, nct6795, nct6796, nct6797, nct6798 };
+enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
+
+#define NUM_TEMP       10      /* Max number of temp attribute sets w/ limits*/
+#define NUM_TEMP_FIXED 6       /* Max number of fixed temp attribute sets */
+#define NUM_TSI_TEMP   8       /* Max number of TSI temp register pairs */
+
+#define NUM_REG_ALARM  7       /* Max number of alarm registers */
+#define NUM_REG_BEEP   5       /* Max number of beep registers */
+
+#define NUM_FAN                7
+
+struct nct6775_data {
+       int addr;       /* IO base of hw monitor block */
+       int sioreg;     /* SIO register address */
+       enum kinds kind;
+       const char *name;
+
+       const struct attribute_group *groups[7];
+       u8 num_groups;
+
+       u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
+                                   * 3=temp_crit, 4=temp_lcrit
+                                   */
+       u8 temp_src[NUM_TEMP];
+       u16 reg_temp_config[NUM_TEMP];
+       const char * const *temp_label;
+       u32 temp_mask;
+       u32 virt_temp_mask;
+
+       u16 REG_CONFIG;
+       u16 REG_VBAT;
+       u16 REG_DIODE;
+       u8 DIODE_MASK;
+
+       const s8 *ALARM_BITS;
+       const s8 *BEEP_BITS;
+
+       const u16 *REG_VIN;
+       const u16 *REG_IN_MINMAX[2];
+
+       const u16 *REG_TARGET;
+       const u16 *REG_FAN;
+       const u16 *REG_FAN_MODE;
+       const u16 *REG_FAN_MIN;
+       const u16 *REG_FAN_PULSES;
+       const u16 *FAN_PULSE_SHIFT;
+       const u16 *REG_FAN_TIME[3];
+
+       const u16 *REG_TOLERANCE_H;
+
+       const u8 *REG_PWM_MODE;
+       const u8 *PWM_MODE_MASK;
+
+       const u16 *REG_PWM[7];  /* [0]=pwm, [1]=pwm_start, [2]=pwm_floor,
+                                * [3]=pwm_max, [4]=pwm_step,
+                                * [5]=weight_duty_step, [6]=weight_duty_base
+                                */
+       const u16 *REG_PWM_READ;
+
+       const u16 *REG_CRITICAL_PWM_ENABLE;
+       u8 CRITICAL_PWM_ENABLE_MASK;
+       const u16 *REG_CRITICAL_PWM;
+
+       const u16 *REG_AUTO_TEMP;
+       const u16 *REG_AUTO_PWM;
+
+       const u16 *REG_CRITICAL_TEMP;
+       const u16 *REG_CRITICAL_TEMP_TOLERANCE;
+
+       const u16 *REG_TEMP_SOURCE;     /* temp register sources */
+       const u16 *REG_TEMP_SEL;
+       const u16 *REG_WEIGHT_TEMP_SEL;
+       const u16 *REG_WEIGHT_TEMP[3];  /* 0=base, 1=tolerance, 2=step */
+
+       const u16 *REG_TEMP_OFFSET;
+
+       const u16 *REG_ALARM;
+       const u16 *REG_BEEP;
+
+       const u16 *REG_TSI_TEMP;
+
+       unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg);
+       unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg);
+
+       struct mutex update_lock;
+       bool valid;             /* true if following fields are valid */
+       unsigned long last_updated;     /* In jiffies */
+
+       /* Register values */
+       u8 bank;                /* current register bank */
+       u8 in_num;              /* number of in inputs we have */
+       u8 in[15][3];           /* [0]=in, [1]=in_max, [2]=in_min */
+       unsigned int rpm[NUM_FAN];
+       u16 fan_min[NUM_FAN];
+       u8 fan_pulses[NUM_FAN];
+       u8 fan_div[NUM_FAN];
+       u8 has_pwm;
+       u8 has_fan;             /* some fan inputs can be disabled */
+       u8 has_fan_min;         /* some fans don't have min register */
+       bool has_fan_div;
+
+       u8 num_temp_alarms;     /* 2, 3, or 6 */
+       u8 num_temp_beeps;      /* 2, 3, or 6 */
+       u8 temp_fixed_num;      /* 3 or 6 */
+       u8 temp_type[NUM_TEMP_FIXED];
+       s8 temp_offset[NUM_TEMP_FIXED];
+       s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
+                               * 3=temp_crit, 4=temp_lcrit
+                               */
+       s16 tsi_temp[NUM_TSI_TEMP];
+       u64 alarms;
+       u64 beeps;
+
+       u8 pwm_num;     /* number of pwm */
+       u8 pwm_mode[NUM_FAN];   /* 0->DC variable voltage,
+                                * 1->PWM variable duty cycle
+                                */
+       enum pwm_enable pwm_enable[NUM_FAN];
+                       /* 0->off
+                        * 1->manual
+                        * 2->thermal cruise mode (also called SmartFan I)
+                        * 3->fan speed cruise mode
+                        * 4->SmartFan III
+                        * 5->enhanced variable thermal cruise (SmartFan IV)
+                        */
+       u8 pwm[7][NUM_FAN];     /* [0]=pwm, [1]=pwm_start, [2]=pwm_floor,
+                                * [3]=pwm_max, [4]=pwm_step,
+                                * [5]=weight_duty_step, [6]=weight_duty_base
+                                */
+
+       u8 target_temp[NUM_FAN];
+       u8 target_temp_mask;
+       u32 target_speed[NUM_FAN];
+       u32 target_speed_tolerance[NUM_FAN];
+       u8 speed_tolerance_limit;
+
+       u8 temp_tolerance[2][NUM_FAN];
+       u8 tolerance_mask;
+
+       u8 fan_time[3][NUM_FAN]; /* 0 = stop_time, 1 = step_up, 2 = step_down */
+
+       /* Automatic fan speed control registers */
+       int auto_pwm_num;
+       u8 auto_pwm[NUM_FAN][7];
+       u8 auto_temp[NUM_FAN][7];
+       u8 pwm_temp_sel[NUM_FAN];
+       u8 pwm_weight_temp_sel[NUM_FAN];
+       u8 weight_temp[3][NUM_FAN];     /* 0->temp_step, 1->temp_step_tol,
+                                        * 2->temp_base
+                                        */
+
+       u8 vid;
+       u8 vrm;
+
+       bool have_vid;
+
+       u16 have_temp;
+       u16 have_temp_fixed;
+       u16 have_tsi_temp;
+       u16 have_in;
+
+       /* Remember extra register values over suspend/resume */
+       u8 vbat;
+       u8 fandiv1;
+       u8 fandiv2;
+       u8 sio_reg_enable;
+
+       struct regmap *regmap;
+       bool read_only;
+
+       /* driver-specific (platform, i2c) initialization hook and data */
+       int (*driver_init)(struct nct6775_data *data);
+       void *driver_data;
+};
+
+static inline int nct6775_read_value(struct nct6775_data *data, u16 reg, u16 *value)
+{
+       unsigned int tmp;
+       int ret = regmap_read(data->regmap, reg, &tmp);
+
+       if (!ret)
+               *value = tmp;
+       return ret;
+}
+
+static inline int nct6775_write_value(struct nct6775_data *data, u16 reg, u16 value)
+{
+       return regmap_write(data->regmap, reg, value);
+}
+
+bool nct6775_reg_is_word_sized(struct nct6775_data *data, u16 reg);
+int nct6775_probe(struct device *dev, struct nct6775_data *data,
+                 const struct regmap_config *regmapcfg);
+
+ssize_t nct6775_show_alarm(struct device *dev, struct device_attribute *attr, char *buf);
+ssize_t nct6775_show_beep(struct device *dev, struct device_attribute *attr, char *buf);
+ssize_t nct6775_store_beep(struct device *dev, struct device_attribute *attr, const char *buf,
+                          size_t count);
+
+static inline int nct6775_write_temp(struct nct6775_data *data, u16 reg, u16 value)
+{
+       if (!nct6775_reg_is_word_sized(data, reg))
+               value >>= 8;
+       return nct6775_write_value(data, reg, value);
+}
+
+static inline umode_t nct6775_attr_mode(struct nct6775_data *data, struct attribute *attr)
+{
+       return data->read_only ? (attr->mode & ~0222) : attr->mode;
+}
+
+static inline int
+nct6775_add_attr_group(struct nct6775_data *data, const struct attribute_group *group)
+{
+       /* Need to leave a NULL terminator at the end of data->groups */
+       if (data->num_groups == ARRAY_SIZE(data->groups) - 1)
+               return -ENOBUFS;
+
+       data->groups[data->num_groups++] = group;
+       return 0;
+}
+
+#define NCT6775_REG_BANK       0x4E
+#define NCT6775_REG_CONFIG     0x40
+
+#define NCT6775_REG_FANDIV1            0x506
+#define NCT6775_REG_FANDIV2            0x507
+
+#define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE    0x28
+
+#define FAN_ALARM_BASE         16
+#define TEMP_ALARM_BASE                24
+#define INTRUSION_ALARM_BASE   30
+#define BEEP_ENABLE_BASE       15
+
+/*
+ * Not currently used:
+ * REG_MAN_ID has the value 0x5ca3 for all supported chips.
+ * REG_CHIP_ID == 0x88/0xa1/0xc1 depending on chip model.
+ * REG_MAN_ID is at port 0x4f
+ * REG_CHIP_ID is at port 0x58
+ */
+
+#endif /* __HWMON_NCT6775_H__ */
index f00cd59f1d19f7358fceff0f69e4ad6e95c8fdb5..d78f4bebc7189b2200de3dc2128d02514459f8a1 100644 (file)
@@ -1149,44 +1149,75 @@ static void occ_parse_poll_response(struct occ *occ)
                sizeof(*header), size + sizeof(*header));
 }
 
-int occ_setup(struct occ *occ, const char *name)
+int occ_active(struct occ *occ, bool active)
 {
-       int rc;
-
-       mutex_init(&occ->lock);
-       occ->groups[0] = &occ->group;
+       int rc = mutex_lock_interruptible(&occ->lock);
 
-       /* no need to lock */
-       rc = occ_poll(occ);
-       if (rc == -ESHUTDOWN) {
-               dev_info(occ->bus_dev, "host is not ready\n");
-               return rc;
-       } else if (rc < 0) {
-               dev_err(occ->bus_dev,
-                       "failed to get OCC poll response=%02x: %d\n",
-                       occ->resp.return_status, rc);
+       if (rc)
                return rc;
-       }
 
-       occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
-       occ_parse_poll_response(occ);
+       if (active) {
+               if (occ->active) {
+                       rc = -EALREADY;
+                       goto unlock;
+               }
 
-       rc = occ_setup_sensor_attrs(occ);
-       if (rc) {
-               dev_err(occ->bus_dev, "failed to setup sensor attrs: %d\n",
-                       rc);
-               return rc;
-       }
+               occ->error_count = 0;
+               occ->last_safe = 0;
 
-       occ->hwmon = devm_hwmon_device_register_with_groups(occ->bus_dev, name,
-                                                           occ, occ->groups);
-       if (IS_ERR(occ->hwmon)) {
-               rc = PTR_ERR(occ->hwmon);
-               dev_err(occ->bus_dev, "failed to register hwmon device: %d\n",
-                       rc);
-               return rc;
+               rc = occ_poll(occ);
+               if (rc < 0) {
+                       dev_err(occ->bus_dev,
+                               "failed to get OCC poll response=%02x: %d\n",
+                               occ->resp.return_status, rc);
+                       goto unlock;
+               }
+
+               occ->active = true;
+               occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
+               occ_parse_poll_response(occ);
+
+               rc = occ_setup_sensor_attrs(occ);
+               if (rc) {
+                       dev_err(occ->bus_dev,
+                               "failed to setup sensor attrs: %d\n", rc);
+                       goto unlock;
+               }
+
+               occ->hwmon = hwmon_device_register_with_groups(occ->bus_dev,
+                                                              "occ", occ,
+                                                              occ->groups);
+               if (IS_ERR(occ->hwmon)) {
+                       rc = PTR_ERR(occ->hwmon);
+                       occ->hwmon = NULL;
+                       dev_err(occ->bus_dev,
+                               "failed to register hwmon device: %d\n", rc);
+                       goto unlock;
+               }
+       } else {
+               if (!occ->active) {
+                       rc = -EALREADY;
+                       goto unlock;
+               }
+
+               if (occ->hwmon)
+                       hwmon_device_unregister(occ->hwmon);
+               occ->active = false;
+               occ->hwmon = NULL;
        }
 
+unlock:
+       mutex_unlock(&occ->lock);
+       return rc;
+}
+
+int occ_setup(struct occ *occ)
+{
+       int rc;
+
+       mutex_init(&occ->lock);
+       occ->groups[0] = &occ->group;
+
        rc = occ_setup_sysfs(occ);
        if (rc)
                dev_err(occ->bus_dev, "failed to setup sysfs: %d\n", rc);
@@ -1195,6 +1226,15 @@ int occ_setup(struct occ *occ, const char *name)
 }
 EXPORT_SYMBOL_GPL(occ_setup);
 
+void occ_shutdown(struct occ *occ)
+{
+       occ_shutdown_sysfs(occ);
+
+       if (occ->hwmon)
+               hwmon_device_unregister(occ->hwmon);
+}
+EXPORT_SYMBOL_GPL(occ_shutdown);
+
 MODULE_AUTHOR("Eddie James <eajames@linux.ibm.com>");
 MODULE_DESCRIPTION("Common OCC hwmon code");
 MODULE_LICENSE("GPL");
index 2dd4a4d240c0f775b24cfe5852d72638de725d83..64d5ec7e169b04982951a66cc0c8710ee836de96 100644 (file)
@@ -106,6 +106,7 @@ struct occ {
        struct attribute_group group;
        const struct attribute_group *groups[2];
 
+       bool active;
        int error;                      /* final transfer error after retry */
        int last_error;                 /* latest transfer error */
        unsigned int error_count;       /* number of xfr errors observed */
@@ -123,9 +124,11 @@ struct occ {
        u8 prev_mode;
 };
 
-int occ_setup(struct occ *occ, const char *name);
+int occ_active(struct occ *occ, bool active);
+int occ_setup(struct occ *occ);
 int occ_setup_sysfs(struct occ *occ);
 void occ_shutdown(struct occ *occ);
+void occ_shutdown_sysfs(struct occ *occ);
 void occ_sysfs_poll_done(struct occ *occ);
 int occ_update_response(struct occ *occ);
 
index 9e61e1fb5142cfeb398ebb1dd5d634cc76d39a1e..da39ea28df3122652a744454785c03c5e498e97f 100644 (file)
@@ -223,7 +223,7 @@ static int p8_i2c_occ_probe(struct i2c_client *client)
        occ->poll_cmd_data = 0x10;              /* P8 OCC poll data */
        occ->send_cmd = p8_i2c_occ_send_cmd;
 
-       return occ_setup(occ, "p8_occ");
+       return occ_setup(occ);
 }
 
 static int p8_i2c_occ_remove(struct i2c_client *client)
index 49b13cc01073a51e6d7aa1a77859899237634ec8..42fc7b97bb34b55ed5dc4ea73e31ff8b2a7a1af4 100644 (file)
@@ -145,7 +145,7 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
        occ->poll_cmd_data = 0x20;              /* P9 OCC poll data */
        occ->send_cmd = p9_sbe_occ_send_cmd;
 
-       rc = occ_setup(occ, "p9_occ");
+       rc = occ_setup(occ);
        if (rc == -ESHUTDOWN)
                rc = -ENODEV;   /* Host is shutdown, don't spew errors */
 
index b2f788a777469367676fb63f20788cd6005d089a..2317301fc1e9f62cd367a51d5c256bf95e09b1eb 100644 (file)
@@ -6,13 +6,13 @@
 #include <linux/export.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/kernel.h>
+#include <linux/kstrtox.h>
 #include <linux/sysfs.h>
 
 #include "common.h"
 
 /* OCC status register */
 #define OCC_STAT_MASTER                        BIT(7)
-#define OCC_STAT_ACTIVE                        BIT(0)
 
 /* OCC extended status register */
 #define OCC_EXT_STAT_DVFS_OT           BIT(7)
 #define OCC_EXT_STAT_DVFS_VDD          BIT(3)
 #define OCC_EXT_STAT_GPU_THROTTLE      GENMASK(2, 0)
 
+static ssize_t occ_active_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       int rc;
+       bool active;
+       struct occ *occ = dev_get_drvdata(dev);
+
+       rc = kstrtobool(buf, &active);
+       if (rc)
+               return rc;
+
+       rc = occ_active(occ, active);
+       if (rc)
+               return rc;
+
+       return count;
+}
+
 static ssize_t occ_sysfs_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
@@ -31,54 +50,64 @@ static ssize_t occ_sysfs_show(struct device *dev,
        struct occ_poll_response_header *header;
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
-       rc = occ_update_response(occ);
-       if (rc)
-               return rc;
+       if (occ->active) {
+               rc = occ_update_response(occ);
+               if (rc)
+                       return rc;
 
-       header = (struct occ_poll_response_header *)occ->resp.data;
-
-       switch (sattr->index) {
-       case 0:
-               val = !!(header->status & OCC_STAT_MASTER);
-               break;
-       case 1:
-               val = !!(header->status & OCC_STAT_ACTIVE);
-               break;
-       case 2:
-               val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT);
-               break;
-       case 3:
-               val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER);
-               break;
-       case 4:
-               val = !!(header->ext_status & OCC_EXT_STAT_MEM_THROTTLE);
-               break;
-       case 5:
-               val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP);
-               break;
-       case 6:
-               val = header->occ_state;
-               break;
-       case 7:
-               if (header->status & OCC_STAT_MASTER)
-                       val = hweight8(header->occs_present);
-               else
+               header = (struct occ_poll_response_header *)occ->resp.data;
+
+               switch (sattr->index) {
+               case 0:
+                       val = !!(header->status & OCC_STAT_MASTER);
+                       break;
+               case 1:
                        val = 1;
-               break;
-       case 8:
-               val = header->ips_status;
-               break;
-       case 9:
-               val = header->mode;
-               break;
-       case 10:
-               val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD);
-               break;
-       case 11:
-               val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE;
-               break;
-       default:
-               return -EINVAL;
+                       break;
+               case 2:
+                       val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT);
+                       break;
+               case 3:
+                       val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER);
+                       break;
+               case 4:
+                       val = !!(header->ext_status &
+                                OCC_EXT_STAT_MEM_THROTTLE);
+                       break;
+               case 5:
+                       val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP);
+                       break;
+               case 6:
+                       val = header->occ_state;
+                       break;
+               case 7:
+                       if (header->status & OCC_STAT_MASTER)
+                               val = hweight8(header->occs_present);
+                       else
+                               val = 1;
+                       break;
+               case 8:
+                       val = header->ips_status;
+                       break;
+               case 9:
+                       val = header->mode;
+                       break;
+               case 10:
+                       val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD);
+                       break;
+               case 11:
+                       val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       } else {
+               if (sattr->index == 1)
+                       val = 0;
+               else if (sattr->index <= 11)
+                       val = -ENODATA;
+               else
+                       return -EINVAL;
        }
 
        return sysfs_emit(buf, "%d\n", val);
@@ -95,7 +124,8 @@ static ssize_t occ_error_show(struct device *dev,
 }
 
 static SENSOR_DEVICE_ATTR(occ_master, 0444, occ_sysfs_show, NULL, 0);
-static SENSOR_DEVICE_ATTR(occ_active, 0444, occ_sysfs_show, NULL, 1);
+static SENSOR_DEVICE_ATTR(occ_active, 0644, occ_sysfs_show, occ_active_store,
+                         1);
 static SENSOR_DEVICE_ATTR(occ_dvfs_overtemp, 0444, occ_sysfs_show, NULL, 2);
 static SENSOR_DEVICE_ATTR(occ_dvfs_power, 0444, occ_sysfs_show, NULL, 3);
 static SENSOR_DEVICE_ATTR(occ_mem_throttle, 0444, occ_sysfs_show, NULL, 4);
@@ -139,7 +169,7 @@ void occ_sysfs_poll_done(struct occ *occ)
         * On the first poll response, we haven't yet created the sysfs
         * attributes, so don't make any notify calls.
         */
-       if (!occ->hwmon)
+       if (!occ->active)
                goto done;
 
        if ((header->status & OCC_STAT_MASTER) !=
@@ -148,12 +178,6 @@ void occ_sysfs_poll_done(struct occ *occ)
                sysfs_notify(&occ->bus_dev->kobj, NULL, name);
        }
 
-       if ((header->status & OCC_STAT_ACTIVE) !=
-           (occ->prev_stat & OCC_STAT_ACTIVE)) {
-               name = sensor_dev_attr_occ_active.dev_attr.attr.name;
-               sysfs_notify(&occ->bus_dev->kobj, NULL, name);
-       }
-
        if ((header->ext_status & OCC_EXT_STAT_DVFS_OT) !=
            (occ->prev_ext_stat & OCC_EXT_STAT_DVFS_OT)) {
                name = sensor_dev_attr_occ_dvfs_overtemp.dev_attr.attr.name;
@@ -227,8 +251,7 @@ int occ_setup_sysfs(struct occ *occ)
        return sysfs_create_group(&occ->bus_dev->kobj, &occ_sysfs);
 }
 
-void occ_shutdown(struct occ *occ)
+void occ_shutdown_sysfs(struct occ *occ)
 {
        sysfs_remove_group(&occ->bus_dev->kobj, &occ_sysfs);
 }
-EXPORT_SYMBOL_GPL(occ_shutdown);
index 12156328f5cf515cd45f1b2c9400b797a0fac201..57470fda5f6c91f46c02ff3563f8dbd781342527 100644 (file)
@@ -447,29 +447,23 @@ static const struct hwmon_ops peci_cputemp_ops = {
        .read = cputemp_read,
 };
 
-static const u32 peci_cputemp_temp_channel_config[] = {
-       /* Die temperature */
-       HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | HWMON_T_CRIT_HYST,
-       /* DTS margin */
-       HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | HWMON_T_CRIT_HYST,
-       /* Tcontrol temperature */
-       HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_CRIT,
-       /* Tthrottle temperature */
-       HWMON_T_LABEL | HWMON_T_INPUT,
-       /* Tjmax temperature */
-       HWMON_T_LABEL | HWMON_T_INPUT,
-       /* Core temperature - for all core channels */
-       [channel_core ... CPUTEMP_CHANNEL_NUMS - 1] = HWMON_T_LABEL | HWMON_T_INPUT,
-       0
-};
-
-static const struct hwmon_channel_info peci_cputemp_temp_channel = {
-       .type = hwmon_temp,
-       .config = peci_cputemp_temp_channel_config,
-};
-
 static const struct hwmon_channel_info *peci_cputemp_info[] = {
-       &peci_cputemp_temp_channel,
+       HWMON_CHANNEL_INFO(temp,
+                          /* Die temperature */
+                          HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX |
+                          HWMON_T_CRIT | HWMON_T_CRIT_HYST,
+                          /* DTS margin */
+                          HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX |
+                          HWMON_T_CRIT | HWMON_T_CRIT_HYST,
+                          /* Tcontrol temperature */
+                          HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_CRIT,
+                          /* Tthrottle temperature */
+                          HWMON_T_LABEL | HWMON_T_INPUT,
+                          /* Tjmax temperature */
+                          HWMON_T_LABEL | HWMON_T_INPUT,
+                          /* Core temperature - for all core channels */
+                          [channel_core ... CPUTEMP_CHANNEL_NUMS - 1] =
+                                               HWMON_T_LABEL | HWMON_T_INPUT),
        NULL
 };
 
index c8222354c0056bbafaf23b0cd9214280de32502a..0a633bda36689b9528c8d94783b43e4af10fe2d9 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/auxiliary_bus.h>
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
+#include <linux/devm-helpers.h>
 #include <linux/hwmon.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
@@ -219,7 +220,7 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
        int chan_rank_max = priv->gen_info->chan_rank_max;
        int dimm_idx_max = priv->gen_info->dimm_idx_max;
        u32 chan_rank_empty = 0;
-       u64 dimm_mask = 0;
+       u32 dimm_mask = 0;
        int chan_rank, dimm_idx, ret;
        u32 pcs;
 
@@ -278,9 +279,9 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
                return -EAGAIN;
        }
 
-       dev_dbg(priv->dev, "Scanned populated DIMMs: %#llx\n", dimm_mask);
+       dev_dbg(priv->dev, "Scanned populated DIMMs: %#x\n", dimm_mask);
 
-       bitmap_from_u64(priv->dimm_mask, dimm_mask);
+       bitmap_from_arr32(priv->dimm_mask, &dimm_mask, DIMM_NUMS_MAX);
 
        return 0;
 }
@@ -299,18 +300,10 @@ static int create_dimm_temp_label(struct peci_dimmtemp *priv, int chan)
        return 0;
 }
 
-static const u32 peci_dimmtemp_temp_channel_config[] = {
-       [0 ... DIMM_NUMS_MAX - 1] = HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT,
-       0
-};
-
-static const struct hwmon_channel_info peci_dimmtemp_temp_channel = {
-       .type = hwmon_temp,
-       .config = peci_dimmtemp_temp_channel_config,
-};
-
 static const struct hwmon_channel_info *peci_dimmtemp_temp_info[] = {
-       &peci_dimmtemp_temp_channel,
+       HWMON_CHANNEL_INFO(temp,
+                          [0 ... DIMM_NUMS_MAX - 1] = HWMON_T_LABEL |
+                               HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT),
        NULL
 };
 
@@ -378,13 +371,6 @@ static void create_dimm_temp_info_delayed(struct work_struct *work)
                dev_err(priv->dev, "Failed to populate DIMM temp info\n");
 }
 
-static void remove_delayed_work(void *_priv)
-{
-       struct peci_dimmtemp *priv = _priv;
-
-       cancel_delayed_work_sync(&priv->detect_work);
-}
-
 static int peci_dimmtemp_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id)
 {
        struct device *dev = &adev->dev;
@@ -415,9 +401,8 @@ static int peci_dimmtemp_probe(struct auxiliary_device *adev, const struct auxil
                         "Unexpected PECI revision %#x, some features may be unavailable\n",
                         peci_dev->info.peci_revision);
 
-       INIT_DELAYED_WORK(&priv->detect_work, create_dimm_temp_info_delayed);
-
-       ret = devm_add_action_or_reset(priv->dev, remove_delayed_work, priv);
+       ret = devm_delayed_work_autocancel(priv->dev, &priv->detect_work,
+                                          create_dimm_temp_info_delayed);
        if (ret)
                return ret;
 
index a2ea1d5a87650df2426d620a5acbbc0e65f27a5b..dfae76db65aef21eec3f6ba7d1838984ddb20a2e 100644 (file)
@@ -228,10 +228,10 @@ config SENSORS_MAX16064
          be called max16064.
 
 config SENSORS_MAX16601
-       tristate "Maxim MAX16508, MAX16601"
+       tristate "Maxim MAX16508, MAX16601, MAX16602"
        help
          If you say yes here you get hardware monitoring support for Maxim
-         MAX16508 and MAX16601.
+         MAX16508, MAX16601 and MAX16602.
 
          This driver can also be built as a module. If so, the module will
          be called max16601.
@@ -408,6 +408,15 @@ config SENSORS_UCD9200
          This driver can also be built as a module. If so, the module will
          be called ucd9200.
 
+config SENSORS_XDPE152
+       tristate "Infineon XDPE152 family"
+       help
+         If you say yes here you get hardware monitoring support for Infineon
+         XDPE15284, XDPE152C4, device.
+
+         This driver can also be built as a module. If so, the module will
+         be called xdpe152c4.
+
 config SENSORS_XDPE122
        tristate "Infineon XDPE122 family"
        help
index a4a96ac71de79914feee72f4661f019c9e19a1d3..4678fba5012c7f1b0f711f92b48981c5693e78d2 100644 (file)
@@ -43,5 +43,6 @@ obj-$(CONFIG_SENSORS_TPS53679)        += tps53679.o
 obj-$(CONFIG_SENSORS_UCD9000)  += ucd9000.o
 obj-$(CONFIG_SENSORS_UCD9200)  += ucd9200.o
 obj-$(CONFIG_SENSORS_XDPE122)  += xdpe12284.o
+obj-$(CONFIG_SENSORS_XDPE152)  += xdpe152c4.o
 obj-$(CONFIG_SENSORS_ZL6100)   += zl6100.o
 obj-$(CONFIG_SENSORS_PIM4328)  += pim4328.o
index 0127273883f0426020e7ed4881a5c193ed2717a0..531aa674a9283367d6259cb2d0f3e3290d44a042 100644 (file)
@@ -196,6 +196,17 @@ static int ltc_read_byte_data(struct i2c_client *client, int page, int reg)
        return pmbus_read_byte_data(client, page, reg);
 }
 
+static int ltc_write_byte_data(struct i2c_client *client, int page, int reg, u8 value)
+{
+       int ret;
+
+       ret = ltc_wait_ready(client);
+       if (ret < 0)
+               return ret;
+
+       return pmbus_write_byte_data(client, page, reg, value);
+}
+
 static int ltc_write_byte(struct i2c_client *client, int page, u8 byte)
 {
        int ret;
@@ -681,6 +692,7 @@ static int ltc2978_probe(struct i2c_client *client)
        info = &data->info;
        info->write_word_data = ltc2978_write_word_data;
        info->write_byte = ltc_write_byte;
+       info->write_byte_data = ltc_write_byte_data;
        info->read_word_data = ltc_read_word_data;
        info->read_byte_data = ltc_read_byte_data;
 
index 5a226a564776c984de8a1860ebe5e86be0e478ea..b628405e6586c39a92aa58d4780a795cea9087bb 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Hardware monitoring driver for Maxim MAX16508 and MAX16601.
+ * Hardware monitoring driver for Maxim MAX16508, MAX16601 and MAX16602.
  *
  * Implementation notes:
  *
@@ -31,7 +31,7 @@
 
 #include "pmbus.h"
 
-enum chips { max16508, max16601 };
+enum chips { max16508, max16601, max16602 };
 
 #define REG_DEFAULT_NUM_POP    0xc4
 #define REG_SETPT_DVID         0xd1
@@ -202,7 +202,7 @@ static int max16601_identify(struct i2c_client *client,
        else
                info->vrm_version[0] = vr12;
 
-       if (data->id != max16601)
+       if (data->id != max16601 && data->id != max16602)
                return 0;
 
        reg = i2c_smbus_read_byte_data(client, REG_DEFAULT_NUM_POP);
@@ -264,6 +264,7 @@ static void max16601_remove(void *_data)
 static const struct i2c_device_id max16601_id[] = {
        {"max16508", max16508},
        {"max16601", max16601},
+       {"max16602", max16602},
        {}
 };
 MODULE_DEVICE_TABLE(i2c, max16601_id);
@@ -280,13 +281,15 @@ static int max16601_get_id(struct i2c_client *client)
                return -ENODEV;
 
        /*
-        * PMBUS_IC_DEVICE_ID is expected to return "MAX16601y.xx"
-        * or "MAX16500y.xx".
+        * PMBUS_IC_DEVICE_ID is expected to return "MAX16601y.xx" or
+        * MAX16602y.xx or "MAX16500y.xx".cdxxcccccccccc
         */
        if (!strncmp(buf, "MAX16500", 8)) {
                id = max16508;
        } else if (!strncmp(buf, "MAX16601", 8)) {
                id = max16601;
+       } else if (!strncmp(buf, "MAX16602", 8)) {
+               id = max16602;
        } else {
                buf[ret] = '\0';
                dev_err(dev, "Unsupported chip '%s'\n", buf);
index e74b6ef070f3eaea16d3e82860cd4e93da323417..c031a9700ace9bdbb53ac8648700dc53da498582 100644 (file)
@@ -438,6 +438,8 @@ struct pmbus_driver_info {
        int (*read_byte_data)(struct i2c_client *client, int page, int reg);
        int (*read_word_data)(struct i2c_client *client, int page, int phase,
                              int reg);
+       int (*write_byte_data)(struct i2c_client *client, int page, int reg,
+                             u8 byte);
        int (*write_word_data)(struct i2c_client *client, int page, int reg,
                               u16 word);
        int (*write_byte)(struct i2c_client *client, int page, u8 value);
index d93574d6a1fb638cb9d0567ff0ba30db9824ed96..02912022853d802e24173d43fd3e96ba83174557 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/pmbus.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
+#include <linux/of.h>
+#include <linux/thermal.h>
 #include "pmbus.h"
 
 /*
@@ -276,6 +278,42 @@ static int _pmbus_write_word_data(struct i2c_client *client, int page, int reg,
        return pmbus_write_word_data(client, page, reg, word);
 }
 
+/*
+ * _pmbus_write_byte_data() is similar to pmbus_write_byte_data(), but checks if
+ * a device specific mapping function exists and calls it if necessary.
+ */
+static int _pmbus_write_byte_data(struct i2c_client *client, int page, int reg, u8 value)
+{
+       struct pmbus_data *data = i2c_get_clientdata(client);
+       const struct pmbus_driver_info *info = data->info;
+       int status;
+
+       if (info->write_byte_data) {
+               status = info->write_byte_data(client, page, reg, value);
+               if (status != -ENODATA)
+                       return status;
+       }
+       return pmbus_write_byte_data(client, page, reg, value);
+}
+
+/*
+ * _pmbus_read_byte_data() is similar to pmbus_read_byte_data(), but checks if
+ * a device specific mapping function exists and calls it if necessary.
+ */
+static int _pmbus_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+       struct pmbus_data *data = i2c_get_clientdata(client);
+       const struct pmbus_driver_info *info = data->info;
+       int status;
+
+       if (info->read_byte_data) {
+               status = info->read_byte_data(client, page, reg);
+               if (status != -ENODATA)
+                       return status;
+       }
+       return pmbus_read_byte_data(client, page, reg);
+}
+
 int pmbus_update_fan(struct i2c_client *client, int page, int id,
                     u8 config, u8 mask, u16 command)
 {
@@ -283,14 +321,14 @@ int pmbus_update_fan(struct i2c_client *client, int page, int id,
        int rv;
        u8 to;
 
-       from = pmbus_read_byte_data(client, page,
+       from = _pmbus_read_byte_data(client, page,
                                    pmbus_fan_config_registers[id]);
        if (from < 0)
                return from;
 
        to = (from & ~mask) | (config & mask);
        if (to != from) {
-               rv = pmbus_write_byte_data(client, page,
+               rv = _pmbus_write_byte_data(client, page,
                                           pmbus_fan_config_registers[id], to);
                if (rv < 0)
                        return rv;
@@ -390,37 +428,19 @@ int pmbus_update_byte_data(struct i2c_client *client, int page, u8 reg,
        unsigned int tmp;
        int rv;
 
-       rv = pmbus_read_byte_data(client, page, reg);
+       rv = _pmbus_read_byte_data(client, page, reg);
        if (rv < 0)
                return rv;
 
        tmp = (rv & ~mask) | (value & mask);
 
        if (tmp != rv)
-               rv = pmbus_write_byte_data(client, page, reg, tmp);
+               rv = _pmbus_write_byte_data(client, page, reg, tmp);
 
        return rv;
 }
 EXPORT_SYMBOL_NS_GPL(pmbus_update_byte_data, PMBUS);
 
-/*
- * _pmbus_read_byte_data() is similar to pmbus_read_byte_data(), but checks if
- * a device specific mapping function exists and calls it if necessary.
- */
-static int _pmbus_read_byte_data(struct i2c_client *client, int page, int reg)
-{
-       struct pmbus_data *data = i2c_get_clientdata(client);
-       const struct pmbus_driver_info *info = data->info;
-       int status;
-
-       if (info->read_byte_data) {
-               status = info->read_byte_data(client, page, reg);
-               if (status != -ENODATA)
-                       return status;
-       }
-       return pmbus_read_byte_data(client, page, reg);
-}
-
 static struct pmbus_sensor *pmbus_find_sensor(struct pmbus_data *data, int page,
                                              int reg)
 {
@@ -455,7 +475,7 @@ static int pmbus_get_fan_rate(struct i2c_client *client, int page, int id,
                return s->data;
        }
 
-       config = pmbus_read_byte_data(client, page,
+       config = _pmbus_read_byte_data(client, page,
                                      pmbus_fan_config_registers[id]);
        if (config < 0)
                return config;
@@ -912,7 +932,7 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b,
 
        regval = status & mask;
        if (regval) {
-               ret = pmbus_write_byte_data(client, page, reg, regval);
+               ret = _pmbus_write_byte_data(client, page, reg, regval);
                if (ret)
                        goto unlock;
        }
@@ -1083,6 +1103,68 @@ static int pmbus_add_boolean(struct pmbus_data *data,
        return pmbus_add_attribute(data, &a->dev_attr.attr);
 }
 
+/* of thermal for pmbus temperature sensors */
+struct pmbus_thermal_data {
+       struct pmbus_data *pmbus_data;
+       struct pmbus_sensor *sensor;
+};
+
+static int pmbus_thermal_get_temp(void *data, int *temp)
+{
+       struct pmbus_thermal_data *tdata = data;
+       struct pmbus_sensor *sensor = tdata->sensor;
+       struct pmbus_data *pmbus_data = tdata->pmbus_data;
+       struct i2c_client *client = to_i2c_client(pmbus_data->dev);
+       struct device *dev = pmbus_data->hwmon_dev;
+       int ret = 0;
+
+       if (!dev) {
+               /* May not even get to hwmon yet */
+               *temp = 0;
+               return 0;
+       }
+
+       mutex_lock(&pmbus_data->update_lock);
+       pmbus_update_sensor_data(client, sensor);
+       if (sensor->data < 0)
+               ret = sensor->data;
+       else
+               *temp = (int)pmbus_reg2data(pmbus_data, sensor);
+       mutex_unlock(&pmbus_data->update_lock);
+
+       return ret;
+}
+
+static const struct thermal_zone_of_device_ops pmbus_thermal_ops = {
+       .get_temp = pmbus_thermal_get_temp,
+};
+
+static int pmbus_thermal_add_sensor(struct pmbus_data *pmbus_data,
+                                   struct pmbus_sensor *sensor, int index)
+{
+       struct device *dev = pmbus_data->dev;
+       struct pmbus_thermal_data *tdata;
+       struct thermal_zone_device *tzd;
+
+       tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
+       if (!tdata)
+               return -ENOMEM;
+
+       tdata->sensor = sensor;
+       tdata->pmbus_data = pmbus_data;
+
+       tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata,
+                                                  &pmbus_thermal_ops);
+       /*
+        * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
+        * so ignore that error but forward any other error.
+        */
+       if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
+               return PTR_ERR(tzd);
+
+       return 0;
+}
+
 static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
                                             const char *name, const char *type,
                                             int seq, int page, int phase,
@@ -1126,6 +1208,10 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data,
        sensor->next = data->sensors;
        data->sensors = sensor;
 
+       /* temperature sensors with _input values are registered with thermal */
+       if (class == PSC_TEMPERATURE && strcmp(type, "input") == 0)
+               pmbus_thermal_add_sensor(data, sensor, seq);
+
        return sensor;
 }
 
@@ -2308,6 +2394,21 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
        struct device *dev = &client->dev;
        int page, ret;
 
+       /*
+        * Figure out if PEC is enabled before accessing any other register.
+        * Make sure PEC is disabled, will be enabled later if needed.
+        */
+       client->flags &= ~I2C_CLIENT_PEC;
+
+       /* Enable PEC if the controller and bus supports it */
+       if (!(data->flags & PMBUS_NO_CAPABILITY)) {
+               ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
+               if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) {
+                       if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC))
+                               client->flags |= I2C_CLIENT_PEC;
+               }
+       }
+
        /*
         * Some PMBus chips don't support PMBUS_STATUS_WORD, so try
         * to use PMBUS_STATUS_BYTE instead if that is the case.
@@ -2326,19 +2427,6 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
                data->has_status_word = true;
        }
 
-       /* Make sure PEC is disabled, will be enabled later if needed */
-       client->flags &= ~I2C_CLIENT_PEC;
-
-       /* Enable PEC if the controller and bus supports it */
-       if (!(data->flags & PMBUS_NO_CAPABILITY)) {
-               ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
-               if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) {
-                       if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) {
-                               client->flags |= I2C_CLIENT_PEC;
-                       }
-               }
-       }
-
        /*
         * Check if the chip is write protected. If it is, we can not clear
         * faults, and we should not try it. Also, in that case, writes into
@@ -2399,7 +2487,7 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
        int ret;
 
        mutex_lock(&data->update_lock);
-       ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
+       ret = _pmbus_read_byte_data(client, page, PMBUS_OPERATION);
        mutex_unlock(&data->update_lock);
 
        if (ret < 0)
@@ -2498,7 +2586,7 @@ static int pmbus_regulator_get_error_flags(struct regulator_dev *rdev, unsigned
                if (!(func & cat->func))
                        continue;
 
-               status = pmbus_read_byte_data(client, page, cat->reg);
+               status = _pmbus_read_byte_data(client, page, cat->reg);
                if (status < 0) {
                        mutex_unlock(&data->update_lock);
                        return status;
@@ -2548,11 +2636,78 @@ static int pmbus_regulator_get_error_flags(struct regulator_dev *rdev, unsigned
        return 0;
 }
 
+static int pmbus_regulator_get_voltage(struct regulator_dev *rdev)
+{
+       struct device *dev = rdev_get_dev(rdev);
+       struct i2c_client *client = to_i2c_client(dev->parent);
+       struct pmbus_data *data = i2c_get_clientdata(client);
+       struct pmbus_sensor s = {
+               .page = rdev_get_id(rdev),
+               .class = PSC_VOLTAGE_OUT,
+               .convert = true,
+       };
+
+       s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_READ_VOUT);
+       if (s.data < 0)
+               return s.data;
+
+       return (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */
+}
+
+static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv,
+                                      int max_uv, unsigned int *selector)
+{
+       struct device *dev = rdev_get_dev(rdev);
+       struct i2c_client *client = to_i2c_client(dev->parent);
+       struct pmbus_data *data = i2c_get_clientdata(client);
+       struct pmbus_sensor s = {
+               .page = rdev_get_id(rdev),
+               .class = PSC_VOLTAGE_OUT,
+               .convert = true,
+               .data = -1,
+       };
+       int val = DIV_ROUND_CLOSEST(min_uv, 1000); /* convert to mV */
+       int low, high;
+
+       *selector = 0;
+
+       if (pmbus_check_word_register(client, s.page, PMBUS_MFR_VOUT_MIN))
+               s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_MFR_VOUT_MIN);
+       if (s.data < 0) {
+               s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_VOUT_MARGIN_LOW);
+               if (s.data < 0)
+                       return s.data;
+       }
+       low = pmbus_reg2data(data, &s);
+
+       s.data = -1;
+       if (pmbus_check_word_register(client, s.page, PMBUS_MFR_VOUT_MAX))
+               s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_MFR_VOUT_MAX);
+       if (s.data < 0) {
+               s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_VOUT_MARGIN_HIGH);
+               if (s.data < 0)
+                       return s.data;
+       }
+       high = pmbus_reg2data(data, &s);
+
+       /* Make sure we are within margins */
+       if (low > val)
+               val = low;
+       if (high < val)
+               val = high;
+
+       val = pmbus_data2reg(data, &s, val);
+
+       return _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val);
+}
+
 const struct regulator_ops pmbus_regulator_ops = {
        .enable = pmbus_regulator_enable,
        .disable = pmbus_regulator_disable,
        .is_enabled = pmbus_regulator_is_enabled,
        .get_error_flags = pmbus_regulator_get_error_flags,
+       .get_voltage = pmbus_regulator_get_voltage,
+       .set_voltage = pmbus_regulator_set_voltage,
 };
 EXPORT_SYMBOL_NS_GPL(pmbus_regulator_ops, PMBUS);
 
diff --git a/drivers/hwmon/pmbus/xdpe152c4.c b/drivers/hwmon/pmbus/xdpe152c4.c
new file mode 100644 (file)
index 0000000..b8a36ef
--- /dev/null
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hardware monitoring driver for Infineon Multi-phase Digital VR Controllers
+ *
+ * Copyright (c) 2022 Infineon Technologies. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "pmbus.h"
+
+#define XDPE152_PAGE_NUM 2
+
+static struct pmbus_driver_info xdpe152_info = {
+       .pages = XDPE152_PAGE_NUM,
+       .format[PSC_VOLTAGE_IN] = linear,
+       .format[PSC_VOLTAGE_OUT] = linear,
+       .format[PSC_TEMPERATURE] = linear,
+       .format[PSC_CURRENT_IN] = linear,
+       .format[PSC_CURRENT_OUT] = linear,
+       .format[PSC_POWER] = linear,
+       .func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+               PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
+               PMBUS_HAVE_TEMP | PMBUS_HAVE_TEMP2 | PMBUS_HAVE_STATUS_TEMP |
+               PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
+       .func[1] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+               PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
+               PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
+};
+
+static int xdpe152_probe(struct i2c_client *client)
+{
+       struct pmbus_driver_info *info;
+
+       info = devm_kmemdup(&client->dev, &xdpe152_info, sizeof(*info),
+                           GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       return pmbus_do_probe(client, info);
+}
+
+static const struct i2c_device_id xdpe152_id[] = {
+       {"xdpe152c4", 0},
+       {"xdpe15284", 0},
+       {}
+};
+
+MODULE_DEVICE_TABLE(i2c, xdpe152_id);
+
+static const struct of_device_id __maybe_unused xdpe152_of_match[] = {
+       {.compatible = "infineon,xdpe152c4"},
+       {.compatible = "infineon,xdpe15284"},
+       {}
+};
+MODULE_DEVICE_TABLE(of, xdpe152_of_match);
+
+static struct i2c_driver xdpe152_driver = {
+       .driver = {
+               .name = "xdpe152c4",
+               .of_match_table = of_match_ptr(xdpe152_of_match),
+       },
+       .probe_new = xdpe152_probe,
+       .id_table = xdpe152_id,
+};
+
+module_i2c_driver(xdpe152_driver);
+
+MODULE_AUTHOR("Greg Schwendimann <greg.schwendimann@infineon.com>");
+MODULE_DESCRIPTION("PMBus driver for Infineon XDPE152 family");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PMBUS);
index f12b9a28a232d498a68dcfe025998342e2137783..6c08551d8d140e221cf9596e6200b6195a10a46a 100644 (file)
@@ -49,16 +49,6 @@ struct pwm_fan_ctx {
        struct hwmon_channel_info fan_channel;
 };
 
-static const u32 pwm_fan_channel_config_pwm[] = {
-       HWMON_PWM_INPUT,
-       0
-};
-
-static const struct hwmon_channel_info pwm_fan_channel_pwm = {
-       .type = hwmon_pwm,
-       .config = pwm_fan_channel_config_pwm,
-};
-
 /* This handler assumes self resetting edge triggered interrupt. */
 static irqreturn_t pulse_handler(int irq, void *dev_id)
 {
@@ -387,7 +377,7 @@ static int pwm_fan_probe(struct platform_device *pdev)
        if (!channels)
                return -ENOMEM;
 
-       channels[0] = &pwm_fan_channel_pwm;
+       channels[0] = HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT);
 
        for (i = 0; i < ctx->tach_count; i++) {
                struct pwm_fan_tach *tach = &ctx->tachs[i];
index e48f58ec5b9cf1730c3b2e47f3d34660b5bd6f81..9ce4899a81a55c4df18030ae16f57b41b6a3a449 100644 (file)
@@ -54,7 +54,7 @@ static int sl28cpld_hwmon_read(struct device *dev,
 
                /*
                 * The counter period is 1000ms and the sysfs specification
-                * says we should asssume 2 pulses per revolution.
+                * says we should assume 2 pulses per revolution.
                 */
                value *= 60 / 2;
 
@@ -67,18 +67,8 @@ static int sl28cpld_hwmon_read(struct device *dev,
        return 0;
 }
 
-static const u32 sl28cpld_hwmon_fan_config[] = {
-       HWMON_F_INPUT,
-       0
-};
-
-static const struct hwmon_channel_info sl28cpld_hwmon_fan = {
-       .type = hwmon_fan,
-       .config = sl28cpld_hwmon_fan_config,
-};
-
 static const struct hwmon_channel_info *sl28cpld_hwmon_info[] = {
-       &sl28cpld_hwmon_fan,
+       HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT),
        NULL
 };
 
index b86d9df7105d107878f3618b5c7122452b97b6da..cc0a1c219b1f962bfb16fa227d45e4e83673f305 100644 (file)
@@ -41,6 +41,8 @@ enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 };
 #define TMP401_STATUS                          0x02
 #define TMP401_CONFIG                          0x03
 #define TMP401_CONVERSION_RATE                 0x04
+#define TMP4XX_N_FACTOR_REG                    0x18
+#define TMP43X_BETA_RANGE                      0x25
 #define TMP401_TEMP_CRIT_HYST                  0x21
 #define TMP401_MANUFACTURER_ID_REG             0xFE
 #define TMP401_DEVICE_ID_REG                   0xFF
@@ -543,6 +545,8 @@ static int tmp401_init_client(struct tmp401_data *data)
        struct regmap *regmap = data->regmap;
        u32 config, config_orig;
        int ret;
+       u32 val = 0;
+       s32 nfactor = 0;
 
        /* Set conversion rate to 2 Hz */
        ret = regmap_write(regmap, TMP401_CONVERSION_RATE, 5);
@@ -557,12 +561,50 @@ static int tmp401_init_client(struct tmp401_data *data)
        config_orig = config;
        config &= ~TMP401_CONFIG_SHUTDOWN;
 
+       if (of_property_read_bool(data->client->dev.of_node, "ti,extended-range-enable")) {
+               /* Enable measurement over extended temperature range */
+               config |= TMP401_CONFIG_RANGE;
+       }
+
        data->extended_range = !!(config & TMP401_CONFIG_RANGE);
 
-       if (config != config_orig)
+       if (config != config_orig) {
                ret = regmap_write(regmap, TMP401_CONFIG, config);
+               if (ret < 0)
+                       return ret;
+       }
 
-       return ret;
+       ret = of_property_read_u32(data->client->dev.of_node, "ti,n-factor", &nfactor);
+       if (!ret) {
+               if (data->kind == tmp401) {
+                       dev_err(&data->client->dev, "ti,tmp401 does not support n-factor correction\n");
+                       return -EINVAL;
+               }
+               if (nfactor < -128 || nfactor > 127) {
+                       dev_err(&data->client->dev, "n-factor is invalid (%d)\n", nfactor);
+                       return -EINVAL;
+               }
+               ret = regmap_write(regmap, TMP4XX_N_FACTOR_REG, (unsigned int)nfactor);
+               if (ret < 0)
+                       return ret;
+       }
+
+       ret = of_property_read_u32(data->client->dev.of_node, "ti,beta-compensation", &val);
+       if (!ret) {
+               if (data->kind == tmp401 || data->kind == tmp411) {
+                       dev_err(&data->client->dev, "ti,tmp401 or ti,tmp411 does not support beta compensation\n");
+                       return -EINVAL;
+               }
+               if (val > 15) {
+                       dev_err(&data->client->dev, "beta-compensation is invalid (%u)\n", val);
+                       return -EINVAL;
+               }
+               ret = regmap_write(regmap, TMP43X_BETA_RANGE, val);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
 }
 
 static int tmp401_detect(struct i2c_client *client,
@@ -708,10 +750,21 @@ static int tmp401_probe(struct i2c_client *client)
        return 0;
 }
 
+static const struct of_device_id __maybe_unused tmp4xx_of_match[] = {
+       { .compatible = "ti,tmp401", },
+       { .compatible = "ti,tmp411", },
+       { .compatible = "ti,tmp431", },
+       { .compatible = "ti,tmp432", },
+       { .compatible = "ti,tmp435", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, tmp4xx_of_match);
+
 static struct i2c_driver tmp401_driver = {
        .class          = I2C_CLASS_HWMON,
        .driver = {
                .name   = "tmp401",
+               .of_match_table = of_match_ptr(tmp4xx_of_match),
        },
        .probe_new      = tmp401_probe,
        .id_table       = tmp401_id,
index c0364314877ec6503e410b3e8dc01472c3404849..c16157ee8c52039171f886c5920e32827991526b 100644 (file)
@@ -82,6 +82,7 @@
 
 #define ISMT_DESC_ENTRIES      2       /* number of descriptor entries */
 #define ISMT_MAX_RETRIES       3       /* number of SMBus retries to attempt */
+#define ISMT_LOG_ENTRIES       3       /* number of interrupt cause log entries */
 
 /* Hardware Descriptor Constants - Control Field */
 #define ISMT_DESC_CWRL 0x01    /* Command/Write Length */
@@ -175,6 +176,8 @@ struct ismt_priv {
        u8 head;                                /* ring buffer head pointer */
        struct completion cmp;                  /* interrupt completion */
        u8 buffer[I2C_SMBUS_BLOCK_MAX + 16];    /* temp R/W data buffer */
+       dma_addr_t log_dma;
+       u32 *log;
 };
 
 static const struct pci_device_id ismt_ids[] = {
@@ -411,6 +414,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
        memset(desc, 0, sizeof(struct ismt_desc));
        desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write);
 
+       /* Always clear the log entries */
+       memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32));
+
        /* Initialize common control bits */
        if (likely(pci_dev_msi_enabled(priv->pci_dev)))
                desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR;
@@ -708,6 +714,8 @@ static void ismt_hw_init(struct ismt_priv *priv)
        /* initialize the Master Descriptor Base Address (MDBA) */
        writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA);
 
+       writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL);
+
        /* initialize the Master Control Register (MCTRL) */
        writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL);
 
@@ -795,6 +803,12 @@ static int ismt_dev_init(struct ismt_priv *priv)
        priv->head = 0;
        init_completion(&priv->cmp);
 
+       priv->log = dmam_alloc_coherent(&priv->pci_dev->dev,
+                                       ISMT_LOG_ENTRIES * sizeof(u32),
+                                       &priv->log_dma, GFP_KERNEL);
+       if (!priv->log)
+               return -ENOMEM;
+
        return 0;
 }
 
index 45fe4a7fe0c039cbba187a46b2ce5d8a40a58541..901f0fb04fee4ba3d09f2ce9650d93f45f75d98f 100644 (file)
@@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev)
 
        if (i2c->bus_freq == 0) {
                dev_warn(i2c->dev, "clock-frequency 0 not supported\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto err_disable_clk;
        }
 
        adap = &i2c->adap;
@@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev)
 
        ret = i2c_add_adapter(adap);
        if (ret < 0)
-               return ret;
+               goto err_disable_clk;
 
        dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000);
 
+       return 0;
+
+err_disable_clk:
+       clk_disable_unprepare(i2c->clk);
+
        return ret;
 }
 
index 12c90aa0900e60b63e0a14215f3692c12876c9cf..a77cd86fe75ed7401bc041b27c651b9fedf67285 100644 (file)
@@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev,
        i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info;
        i2c->adap.dev.parent = dev;
        i2c->adap.dev.of_node = pdev->dev.of_node;
+       i2c->adap.dev.fwnode = dev->fwnode;
        snprintf(i2c->adap.name, sizeof(i2c->adap.name),
                 "Cavium ThunderX i2c adapter at %s", dev_name(dev));
        i2c_set_adapdata(&i2c->adap, i2c);
index 47551ab73ca8a03ab60448adb8fe0d8cee9aed5d..b9bb94bd0f67277571596166e5de722a453f5b8e 100644 (file)
@@ -764,6 +764,106 @@ static struct cpuidle_state icx_cstates[] __initdata = {
                .enter = NULL }
 };
 
+/*
+ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
+ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
+ * But in this case there is effectively no C1, because C1 requests are
+ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
+ * and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1E and disable C1 by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state adl_cstates[] __initdata = {
+       {
+               .name = "C1",
+               .desc = "MWAIT 0x00",
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+               .exit_latency = 1,
+               .target_residency = 1,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C1E",
+               .desc = "MWAIT 0x01",
+               .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+               .exit_latency = 2,
+               .target_residency = 4,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C6",
+               .desc = "MWAIT 0x20",
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 220,
+               .target_residency = 600,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C8",
+               .desc = "MWAIT 0x40",
+               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 280,
+               .target_residency = 800,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C10",
+               .desc = "MWAIT 0x60",
+               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 680,
+               .target_residency = 2000,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .enter = NULL }
+};
+
+static struct cpuidle_state adl_l_cstates[] __initdata = {
+       {
+               .name = "C1",
+               .desc = "MWAIT 0x00",
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+               .exit_latency = 1,
+               .target_residency = 1,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C1E",
+               .desc = "MWAIT 0x01",
+               .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+               .exit_latency = 2,
+               .target_residency = 4,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C6",
+               .desc = "MWAIT 0x20",
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 170,
+               .target_residency = 500,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C8",
+               .desc = "MWAIT 0x40",
+               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 200,
+               .target_residency = 600,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .name = "C10",
+               .desc = "MWAIT 0x60",
+               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 230,
+               .target_residency = 700,
+               .enter = &intel_idle,
+               .enter_s2idle = intel_idle_s2idle, },
+       {
+               .enter = NULL }
+};
+
 /*
  * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
  * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
@@ -1147,6 +1247,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
        .use_acpi = true,
 };
 
+static const struct idle_cpu idle_cpu_adl __initconst = {
+       .state_table = adl_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_l __initconst = {
+       .state_table = adl_l_cstates,
+};
+
 static const struct idle_cpu idle_cpu_spr __initconst = {
        .state_table = spr_cstates,
        .disable_promotion_to_c1e = true,
@@ -1215,6 +1323,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
+       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &idle_cpu_adl),
+       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &idle_cpu_adl_l),
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
@@ -1573,6 +1683,25 @@ static void __init skx_idle_state_table_update(void)
        }
 }
 
+/**
+ * adl_idle_state_table_update - Adjust AlderLake idle states table.
+ */
+static void __init adl_idle_state_table_update(void)
+{
+       /* Check if user prefers C1 over C1E. */
+       if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
+               cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+               cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
+
+               /* Disable C1E by clearing the "C1E promotion" bit. */
+               c1e_promotion = C1E_PROMOTION_DISABLE;
+               return;
+       }
+
+       /* Make sure C1E is enabled by default */
+       c1e_promotion = C1E_PROMOTION_ENABLE;
+}
+
 /**
  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
  */
@@ -1642,6 +1771,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
        case INTEL_FAM6_SAPPHIRERAPIDS_X:
                spr_idle_state_table_update();
                break;
+       case INTEL_FAM6_ALDERLAKE:
+       case INTEL_FAM6_ALDERLAKE_L:
+               adl_idle_state_table_update();
+               break;
        }
 
        for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
index 6c6aec848f989baf287829236889283651103466..d5209f32adb3e265acd51d56dda32e6d0be42e21 100644 (file)
@@ -677,6 +677,17 @@ u16 qcom_adc_tm5_temp_volt_scale(unsigned int prescale_ratio,
 }
 EXPORT_SYMBOL(qcom_adc_tm5_temp_volt_scale);
 
+u16 qcom_adc_tm5_gen2_temp_res_scale(int temp)
+{
+       int64_t resistance;
+
+       resistance = qcom_vadc_map_temp_voltage(adcmap7_100k,
+               ARRAY_SIZE(adcmap7_100k), temp);
+
+       return div64_s64(resistance * RATIO_MAX_ADC7, resistance + R_PU_100K);
+}
+EXPORT_SYMBOL(qcom_adc_tm5_gen2_temp_res_scale);
+
 int qcom_adc5_hw_scale(enum vadc_scale_fn_type scaletype,
                    unsigned int prescale_ratio,
                    const struct adc5_data *data,
index f60127bfe0f4fe82a3814a2c2d1088e1898bcfc3..1ac9f3f79271325ae0546a725594ba418466a136 100644 (file)
@@ -68,10 +68,7 @@ struct scd30_state {
        scd30_command_t command;
 };
 
-int scd30_suspend(struct device *dev);
-int scd30_resume(struct device *dev);
-
-static __maybe_unused SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume);
+extern const struct dev_pm_ops scd30_pm_ops;
 
 int scd30_probe(struct device *dev, int irq, const char *name, void *priv, scd30_command_t command);
 
index 9fe6bbe9ee041b3908187bffe3c94c8b666613b4..682fca39d14d63fcbdff3c3c0b2a03e0e0950821 100644 (file)
@@ -517,7 +517,7 @@ static const struct iio_chan_spec scd30_channels[] = {
        IIO_CHAN_SOFT_TIMESTAMP(3),
 };
 
-int __maybe_unused scd30_suspend(struct device *dev)
+static int scd30_suspend(struct device *dev)
 {
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
        struct scd30_state *state  = iio_priv(indio_dev);
@@ -529,9 +529,8 @@ int __maybe_unused scd30_suspend(struct device *dev)
 
        return regulator_disable(state->vdd);
 }
-EXPORT_SYMBOL(scd30_suspend);
 
-int __maybe_unused scd30_resume(struct device *dev)
+static int scd30_resume(struct device *dev)
 {
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
        struct scd30_state *state = iio_priv(indio_dev);
@@ -543,7 +542,8 @@ int __maybe_unused scd30_resume(struct device *dev)
 
        return scd30_command_write(state, CMD_START_MEAS, state->pressure_comp);
 }
-EXPORT_SYMBOL(scd30_resume);
+
+EXPORT_NS_SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume, IIO_SCD30);
 
 static void scd30_stop_meas(void *data)
 {
@@ -759,7 +759,7 @@ int scd30_probe(struct device *dev, int irq, const char *name, void *priv,
 
        return devm_iio_device_register(dev, indio_dev);
 }
-EXPORT_SYMBOL(scd30_probe);
+EXPORT_SYMBOL_NS(scd30_probe, IIO_SCD30);
 
 MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
 MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor core driver");
index 875892a070eec78d68a40eacc4ad07ca5b5634f1..bae479a4721f3b4977f72da16d7cd31086ba90bb 100644 (file)
@@ -128,7 +128,7 @@ static struct i2c_driver scd30_i2c_driver = {
        .driver = {
                .name = KBUILD_MODNAME,
                .of_match_table = scd30_i2c_of_match,
-               .pm = &scd30_pm_ops,
+               .pm = pm_sleep_ptr(&scd30_pm_ops),
        },
        .probe_new = scd30_i2c_probe,
 };
@@ -137,3 +137,4 @@ module_i2c_driver(scd30_i2c_driver);
 MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
 MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor i2c driver");
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(IIO_SCD30);
index 568b34486c44cf38fd32c1444838003bc1fc8c11..3c519103d30b54fc664b2751ceb0b90754bc374f 100644 (file)
@@ -252,7 +252,7 @@ static struct serdev_device_driver scd30_serdev_driver = {
        .driver = {
                .name = KBUILD_MODNAME,
                .of_match_table = scd30_serdev_of_match,
-               .pm = &scd30_pm_ops,
+               .pm = pm_sleep_ptr(&scd30_pm_ops),
        },
        .probe = scd30_serdev_probe,
 };
@@ -261,3 +261,4 @@ module_serdev_device_driver(scd30_serdev_driver);
 MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
 MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor serial driver");
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(IIO_SCD30);
index 2bd407d86bae529648d6e142da48f7343adcbab8..e9bd36adbe47dab23913aa2cc5cc9f5f315ec6de 100644 (file)
@@ -756,15 +756,12 @@ static int ili251x_firmware_reset(struct i2c_client *client)
        return ili251x_firmware_busy(client);
 }
 
-static void ili251x_hardware_reset(struct device *dev)
+static void ili210x_hardware_reset(struct gpio_desc *reset_gpio)
 {
-       struct i2c_client *client = to_i2c_client(dev);
-       struct ili210x *priv = i2c_get_clientdata(client);
-
        /* Reset the controller */
-       gpiod_set_value_cansleep(priv->reset_gpio, 1);
-       usleep_range(10000, 15000);
-       gpiod_set_value_cansleep(priv->reset_gpio, 0);
+       gpiod_set_value_cansleep(reset_gpio, 1);
+       usleep_range(12000, 15000);
+       gpiod_set_value_cansleep(reset_gpio, 0);
        msleep(300);
 }
 
@@ -773,6 +770,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
                                             const char *buf, size_t count)
 {
        struct i2c_client *client = to_i2c_client(dev);
+       struct ili210x *priv = i2c_get_clientdata(client);
        const char *fwname = ILI251X_FW_FILENAME;
        const struct firmware *fw;
        u16 ac_end, df_end;
@@ -803,7 +801,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
 
        dev_dbg(dev, "Firmware update started, firmware=%s\n", fwname);
 
-       ili251x_hardware_reset(dev);
+       ili210x_hardware_reset(priv->reset_gpio);
 
        error = ili251x_firmware_reset(client);
        if (error)
@@ -858,7 +856,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
        error = count;
 
 exit:
-       ili251x_hardware_reset(dev);
+       ili210x_hardware_reset(priv->reset_gpio);
        dev_dbg(dev, "Firmware update ended, error=%i\n", error);
        enable_irq(client->irq);
        kfree(fwbuf);
@@ -951,9 +949,7 @@ static int ili210x_i2c_probe(struct i2c_client *client,
                if (error)
                        return error;
 
-               usleep_range(50, 100);
-               gpiod_set_value_cansleep(reset_gpio, 0);
-               msleep(100);
+               ili210x_hardware_reset(reset_gpio);
        }
 
        priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
index 9050ca1f4285cf0a9f9ff9190396a83a35d13f5d..808f6e7a80482796b23f05899147c788bf91a814 100644 (file)
@@ -1087,9 +1087,15 @@ static int of_count_icc_providers(struct device_node *np)
 {
        struct device_node *child;
        int count = 0;
+       const struct of_device_id __maybe_unused ignore_list[] = {
+               { .compatible = "qcom,sc7180-ipa-virt" },
+               { .compatible = "qcom,sdx55-ipa-virt" },
+               {}
+       };
 
        for_each_available_child_of_node(np, child) {
-               if (of_property_read_bool(child, "#interconnect-cells"))
+               if (of_property_read_bool(child, "#interconnect-cells") &&
+                   likely(!of_match_node(ignore_list, child)))
                        count++;
                count += of_count_icc_providers(child);
        }
index 15edb9a6fcae0f59024fd38daa9eed827862f017..135c156673a733a7ff713e1a15e11191f40f51b4 100644 (file)
@@ -257,6 +257,18 @@ config ST_IRQCHIP
        help
          Enables SysCfg Controlled IRQs on STi based platforms.
 
+config SUN4I_INTC
+       bool
+
+config SUN6I_R_INTC
+       bool
+       select IRQ_DOMAIN_HIERARCHY
+       select IRQ_FASTEOI_HIERARCHY_HANDLERS
+
+config SUNXI_NMI_INTC
+       bool
+       select GENERIC_IRQ_CHIP
+
 config TB10X_IRQC
        bool
        select IRQ_DOMAIN
index 160a1d8ceaa96e6f255c4db8a6332f4aa37481b9..9b1ffb0f98cc090135cfa54b58b129e64ed5656d 100644 (file)
@@ -23,9 +23,9 @@ obj-$(CONFIG_OMPIC)                   += irq-ompic.o
 obj-$(CONFIG_OR1K_PIC)                 += irq-or1k-pic.o
 obj-$(CONFIG_ORION_IRQCHIP)            += irq-orion.o
 obj-$(CONFIG_OMAP_IRQCHIP)             += irq-omap-intc.o
-obj-$(CONFIG_ARCH_SUNXI)               += irq-sun4i.o
-obj-$(CONFIG_ARCH_SUNXI)               += irq-sun6i-r.o
-obj-$(CONFIG_ARCH_SUNXI)               += irq-sunxi-nmi.o
+obj-$(CONFIG_SUN4I_INTC)               += irq-sun4i.o
+obj-$(CONFIG_SUN6I_R_INTC)             += irq-sun6i-r.o
+obj-$(CONFIG_SUNXI_NMI_INTC)           += irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)            += spear-shirq.o
 obj-$(CONFIG_ARM_GIC)                  += irq-gic.o irq-gic-common.o
 obj-$(CONFIG_ARM_GIC_PM)               += irq-gic-pm.o
index 5b8d571c041dccfe80fbad1756ebfd77cf7fb7ed..ee18eb3e72b72ff64e116b86c7d97e7dbb53de59 100644 (file)
@@ -209,15 +209,29 @@ static struct msi_domain_info armada_370_xp_msi_domain_info = {
 
 static void armada_370_xp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 {
+       unsigned int cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
+
        msg->address_lo = lower_32_bits(msi_doorbell_addr);
        msg->address_hi = upper_32_bits(msi_doorbell_addr);
-       msg->data = 0xf00 | (data->hwirq + PCI_MSI_DOORBELL_START);
+       msg->data = BIT(cpu + 8) | (data->hwirq + PCI_MSI_DOORBELL_START);
 }
 
 static int armada_370_xp_msi_set_affinity(struct irq_data *irq_data,
                                          const struct cpumask *mask, bool force)
 {
-        return -EINVAL;
+       unsigned int cpu;
+
+       if (!force)
+               cpu = cpumask_any_and(mask, cpu_online_mask);
+       else
+               cpu = cpumask_first(mask);
+
+       if (cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       irq_data_update_effective_affinity(irq_data, cpumask_of(cpu));
+
+       return IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip armada_370_xp_msi_bottom_irq_chip = {
@@ -264,11 +278,21 @@ static const struct irq_domain_ops armada_370_xp_msi_domain_ops = {
        .free   = armada_370_xp_msi_free,
 };
 
-static int armada_370_xp_msi_init(struct device_node *node,
-                                 phys_addr_t main_int_phys_base)
+static void armada_370_xp_msi_reenable_percpu(void)
 {
        u32 reg;
 
+       /* Enable MSI doorbell mask and combined cpu local interrupt */
+       reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
+               | PCI_MSI_DOORBELL_MASK;
+       writel(reg, per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
+       /* Unmask local doorbell interrupt */
+       writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+}
+
+static int armada_370_xp_msi_init(struct device_node *node,
+                                 phys_addr_t main_int_phys_base)
+{
        msi_doorbell_addr = main_int_phys_base +
                ARMADA_370_XP_SW_TRIG_INT_OFFS;
 
@@ -287,18 +311,13 @@ static int armada_370_xp_msi_init(struct device_node *node,
                return -ENOMEM;
        }
 
-       reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
-               | PCI_MSI_DOORBELL_MASK;
-
-       writel(reg, per_cpu_int_base +
-              ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
-
-       /* Unmask IPI interrupt */
-       writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+       armada_370_xp_msi_reenable_percpu();
 
        return 0;
 }
 #else
+static void armada_370_xp_msi_reenable_percpu(void) {}
+
 static inline int armada_370_xp_msi_init(struct device_node *node,
                                         phys_addr_t main_int_phys_base)
 {
@@ -308,7 +327,16 @@ static inline int armada_370_xp_msi_init(struct device_node *node,
 
 static void armada_xp_mpic_perf_init(void)
 {
-       unsigned long cpuid = cpu_logical_map(smp_processor_id());
+       unsigned long cpuid;
+
+       /*
+        * This Performance Counter Overflow interrupt is specific for
+        * Armada 370 and XP. It is not available on Armada 375, 38x and 39x.
+        */
+       if (!of_machine_is_compatible("marvell,armada-370-xp"))
+               return;
+
+       cpuid = cpu_logical_map(smp_processor_id());
 
        /* Enable Performance Counter Overflow interrupts */
        writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid),
@@ -501,6 +529,8 @@ static void armada_xp_mpic_reenable_percpu(void)
        }
 
        ipi_resume();
+
+       armada_370_xp_msi_reenable_percpu();
 }
 
 static int armada_xp_mpic_starting_cpu(unsigned int cpu)
index a47db16ff9603e1ab18575521d768c0d64198447..9c9fc3e2967ede2e3be21895af3d20229ee4ab8e 100644 (file)
@@ -77,8 +77,8 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node,
        }
 
        i2c_ic->parent_irq = irq_of_parse_and_map(node, 0);
-       if (i2c_ic->parent_irq < 0) {
-               ret = i2c_ic->parent_irq;
+       if (!i2c_ic->parent_irq) {
+               ret = -EINVAL;
                goto err_iounmap;
        }
 
index 18b77c3e6db4ba939b79152b5df8b79316c60e86..279e92cf0b16bcddd40088cbc3cc8de9da820617 100644 (file)
@@ -157,8 +157,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
        }
 
        irq = irq_of_parse_and_map(node, 0);
-       if (irq < 0) {
-               rc = irq;
+       if (!irq) {
+               rc = -EINVAL;
                goto err;
        }
 
index fd079215c17fd366b8bd420395a2dbd7e9c10e85..142a7431745f940cc2ce327d8afc5ab70f7e6908 100644 (file)
@@ -315,7 +315,7 @@ static int __init bcm6345_l1_of_init(struct device_node *dn,
                        cpumask_set_cpu(idx, &intc->cpumask);
        }
 
-       if (!cpumask_weight(&intc->cpumask)) {
+       if (cpumask_empty(&intc->cpumask)) {
                ret = -ENODEV;
                goto out_free;
        }
index d36f536506ba48472bed47c93832aac7234a17f7..42d8a2438ebc2910f79bb852146a1ba338c4cbe9 100644 (file)
@@ -136,11 +136,11 @@ static inline bool handle_irq_perbit(struct pt_regs *regs, u32 hwirq,
                                     u32 irq_base)
 {
        if (hwirq == 0)
-               return 0;
+               return false;
 
        generic_handle_domain_irq(root_domain, irq_base + __fls(hwirq));
 
-       return 1;
+       return true;
 }
 
 /* gx6605s 64 irqs interrupt controller */
index a0fc764ec9dc6462478b747484175bae76964d07..5ff09de6c48fcbec5a9b954fa284c02617204de4 100644 (file)
@@ -1624,7 +1624,7 @@ static int its_select_cpu(struct irq_data *d,
 
                cpu = cpumask_pick_least_loaded(d, tmpmask);
        } else {
-               cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask);
+               cpumask_copy(tmpmask, aff_mask);
 
                /* If we cannot cross sockets, limit the search to that node */
                if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
index b252d5534547c006b757dd2fc8db02ff92469b4e..2be8dea6b6b00149efa613ed541260723239e61e 100644 (file)
@@ -352,28 +352,27 @@ static int gic_peek_irq(struct irq_data *d, u32 offset)
 
 static void gic_poke_irq(struct irq_data *d, u32 offset)
 {
-       void (*rwp_wait)(void);
        void __iomem *base;
        u32 index, mask;
 
        offset = convert_offset_index(d, offset, &index);
        mask = 1 << (index % 32);
 
-       if (gic_irq_in_rdist(d)) {
+       if (gic_irq_in_rdist(d))
                base = gic_data_rdist_sgi_base();
-               rwp_wait = gic_redist_wait_for_rwp;
-       } else {
+       else
                base = gic_data.dist_base;
-               rwp_wait = gic_dist_wait_for_rwp;
-       }
 
        writel_relaxed(mask, base + offset + (index / 32) * 4);
-       rwp_wait();
 }
 
 static void gic_mask_irq(struct irq_data *d)
 {
        gic_poke_irq(d, GICD_ICENABLER);
+       if (gic_irq_in_rdist(d))
+               gic_redist_wait_for_rwp();
+       else
+               gic_dist_wait_for_rwp();
 }
 
 static void gic_eoimode1_mask_irq(struct irq_data *d)
@@ -420,7 +419,11 @@ static int gic_irq_set_irqchip_state(struct irq_data *d,
                break;
 
        case IRQCHIP_STATE_MASKED:
-               reg = val ? GICD_ICENABLER : GICD_ISENABLER;
+               if (val) {
+                       gic_mask_irq(d);
+                       return 0;
+               }
+               reg = GICD_ISENABLER;
                break;
 
        default:
@@ -556,7 +559,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
 
 static void gic_eoi_irq(struct irq_data *d)
 {
-       gic_write_eoir(gic_irq(d));
+       write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
+       isb();
 }
 
 static void gic_eoimode1_eoi_irq(struct irq_data *d)
@@ -574,7 +578,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
        enum gic_intid_range range;
        unsigned int irq = gic_irq(d);
-       void (*rwp_wait)(void);
        void __iomem *base;
        u32 offset, index;
        int ret;
@@ -590,17 +593,14 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
            type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
                return -EINVAL;
 
-       if (gic_irq_in_rdist(d)) {
+       if (gic_irq_in_rdist(d))
                base = gic_data_rdist_sgi_base();
-               rwp_wait = gic_redist_wait_for_rwp;
-       } else {
+       else
                base = gic_data.dist_base;
-               rwp_wait = gic_dist_wait_for_rwp;
-       }
 
        offset = convert_offset_index(d, GICD_ICFGR, &index);
 
-       ret = gic_configure_irq(index, type, base + offset, rwp_wait);
+       ret = gic_configure_irq(index, type, base + offset, NULL);
        if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) {
                /* Misconfigured PPIs are usually not fatal */
                pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq);
@@ -640,82 +640,101 @@ static void gic_deactivate_unhandled(u32 irqnr)
                if (irqnr < 8192)
                        gic_write_dir(irqnr);
        } else {
-               gic_write_eoir(irqnr);
+               write_gicreg(irqnr, ICC_EOIR1_EL1);
+               isb();
        }
 }
 
-static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+/*
+ * Follow a read of the IAR with any HW maintenance that needs to happen prior
+ * to invoking the relevant IRQ handler. We must do two things:
+ *
+ * (1) Ensure instruction ordering between a read of IAR and subsequent
+ *     instructions in the IRQ handler using an ISB.
+ *
+ *     It is possible for the IAR to report an IRQ which was signalled *after*
+ *     the CPU took an IRQ exception as multiple interrupts can race to be
+ *     recognized by the GIC, earlier interrupts could be withdrawn, and/or
+ *     later interrupts could be prioritized by the GIC.
+ *
+ *     For devices which are tightly coupled to the CPU, such as PMUs, a
+ *     context synchronization event is necessary to ensure that system
+ *     register state is not stale, as these may have been indirectly written
+ *     *after* exception entry.
+ *
+ * (2) Deactivate the interrupt when EOI mode 1 is in use.
+ */
+static inline void gic_complete_ack(u32 irqnr)
 {
-       bool irqs_enabled = interrupts_enabled(regs);
-       int err;
-
-       if (irqs_enabled)
-               nmi_enter();
-
        if (static_branch_likely(&supports_deactivate_key))
-               gic_write_eoir(irqnr);
-       /*
-        * Leave the PSR.I bit set to prevent other NMIs to be
-        * received while handling this one.
-        * PSR.I will be restored when we ERET to the
-        * interrupted context.
-        */
-       err = generic_handle_domain_nmi(gic_data.domain, irqnr);
-       if (err)
-               gic_deactivate_unhandled(irqnr);
+               write_gicreg(irqnr, ICC_EOIR1_EL1);
 
-       if (irqs_enabled)
-               nmi_exit();
+       isb();
 }
 
-static u32 do_read_iar(struct pt_regs *regs)
+static bool gic_rpr_is_nmi_prio(void)
 {
-       u32 iar;
+       if (!gic_supports_nmi())
+               return false;
 
-       if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
-               u64 pmr;
+       return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI));
+}
 
-               /*
-                * We were in a context with IRQs disabled. However, the
-                * entry code has set PMR to a value that allows any
-                * interrupt to be acknowledged, and not just NMIs. This can
-                * lead to surprising effects if the NMI has been retired in
-                * the meantime, and that there is an IRQ pending. The IRQ
-                * would then be taken in NMI context, something that nobody
-                * wants to debug twice.
-                *
-                * Until we sort this, drop PMR again to a level that will
-                * actually only allow NMIs before reading IAR, and then
-                * restore it to what it was.
-                */
-               pmr = gic_read_pmr();
-               gic_pmr_mask_irqs();
-               isb();
+static bool gic_irqnr_is_special(u32 irqnr)
+{
+       return irqnr >= 1020 && irqnr <= 1023;
+}
 
-               iar = gic_read_iar();
+static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs)
+{
+       if (gic_irqnr_is_special(irqnr))
+               return;
 
-               gic_write_pmr(pmr);
-       } else {
-               iar = gic_read_iar();
+       gic_complete_ack(irqnr);
+
+       if (generic_handle_domain_irq(gic_data.domain, irqnr)) {
+               WARN_ONCE(true, "Unexpected interrupt (irqnr %u)\n", irqnr);
+               gic_deactivate_unhandled(irqnr);
        }
+}
+
+static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+{
+       if (gic_irqnr_is_special(irqnr))
+               return;
+
+       gic_complete_ack(irqnr);
 
-       return iar;
+       if (generic_handle_domain_nmi(gic_data.domain, irqnr)) {
+               WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr);
+               gic_deactivate_unhandled(irqnr);
+       }
 }
 
-static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+/*
+ * An exception has been taken from a context with IRQs enabled, and this could
+ * be an IRQ or an NMI.
+ *
+ * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear
+ * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning,
+ * after handling any NMI but before handling any IRQ.
+ *
+ * The entry code has performed IRQ entry, and if an NMI is detected we must
+ * perform NMI entry/exit around invoking the handler.
+ */
+static void __gic_handle_irq_from_irqson(struct pt_regs *regs)
 {
+       bool is_nmi;
        u32 irqnr;
 
-       irqnr = do_read_iar(regs);
+       irqnr = gic_read_iar();
 
-       /* Check for special IDs first */
-       if ((irqnr >= 1020 && irqnr <= 1023))
-               return;
+       is_nmi = gic_rpr_is_nmi_prio();
 
-       if (gic_supports_nmi() &&
-           unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) {
-               gic_handle_nmi(irqnr, regs);
-               return;
+       if (is_nmi) {
+               nmi_enter();
+               __gic_handle_nmi(irqnr, regs);
+               nmi_exit();
        }
 
        if (gic_prio_masking_enabled()) {
@@ -723,15 +742,52 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
                gic_arch_enable_irqs();
        }
 
-       if (static_branch_likely(&supports_deactivate_key))
-               gic_write_eoir(irqnr);
-       else
-               isb();
+       if (!is_nmi)
+               __gic_handle_irq(irqnr, regs);
+}
 
-       if (generic_handle_domain_irq(gic_data.domain, irqnr)) {
-               WARN_ONCE(true, "Unexpected interrupt received!\n");
-               gic_deactivate_unhandled(irqnr);
-       }
+/*
+ * An exception has been taken from a context with IRQs disabled, which can only
+ * be an NMI.
+ *
+ * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave
+ * DAIF.IF (and ICC_PMR_EL1) unchanged.
+ *
+ * The entry code has performed NMI entry.
+ */
+static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs)
+{
+       u64 pmr;
+       u32 irqnr;
+
+       /*
+        * We were in a context with IRQs disabled. However, the
+        * entry code has set PMR to a value that allows any
+        * interrupt to be acknowledged, and not just NMIs. This can
+        * lead to surprising effects if the NMI has been retired in
+        * the meantime, and that there is an IRQ pending. The IRQ
+        * would then be taken in NMI context, something that nobody
+        * wants to debug twice.
+        *
+        * Until we sort this, drop PMR again to a level that will
+        * actually only allow NMIs before reading IAR, and then
+        * restore it to what it was.
+        */
+       pmr = gic_read_pmr();
+       gic_pmr_mask_irqs();
+       isb();
+       irqnr = gic_read_iar();
+       gic_write_pmr(pmr);
+
+       __gic_handle_nmi(irqnr, regs);
+}
+
+static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+{
+       if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs)))
+               __gic_handle_irq_from_irqsoff(regs);
+       else
+               __gic_handle_irq_from_irqson(regs);
 }
 
 static u32 gic_get_pribits(void)
@@ -807,8 +863,8 @@ static void __init gic_dist_init(void)
        for (i = 0; i < GIC_ESPI_NR; i += 4)
                writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i);
 
-       /* Now do the common stuff, and wait for the distributor to drain */
-       gic_dist_config(base, GIC_LINE_NR, gic_dist_wait_for_rwp);
+       /* Now do the common stuff */
+       gic_dist_config(base, GIC_LINE_NR, NULL);
 
        val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1;
        if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) {
@@ -816,8 +872,9 @@ static void __init gic_dist_init(void)
                val |= GICD_CTLR_nASSGIreq;
        }
 
-       /* Enable distributor with ARE, Group1 */
+       /* Enable distributor with ARE, Group1, and wait for it to drain */
        writel_relaxed(val, base + GICD_CTLR);
+       gic_dist_wait_for_rwp();
 
        /*
         * Set all global interrupts to the boot CPU only. ARE must be
@@ -919,6 +976,7 @@ static int __gic_update_rdist_properties(struct redist_region *region,
                                         void __iomem *ptr)
 {
        u64 typer = gic_read_typer(ptr + GICR_TYPER);
+       u32 ctlr = readl_relaxed(ptr + GICR_CTLR);
 
        /* Boot-time cleanip */
        if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) {
@@ -938,9 +996,18 @@ static int __gic_update_rdist_properties(struct redist_region *region,
 
        gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS);
 
-       /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */
+       /*
+        * TYPER.RVPEID implies some form of DirectLPI, no matter what the
+        * doc says... :-/ And CTLR.IR implies another subset of DirectLPI
+        * that the ITS driver can make use of for LPIs (and not VLPIs).
+        *
+        * These are 3 different ways to express the same thing, depending
+        * on the revision of the architecture and its relaxations over
+        * time. Just group them under the 'direct_lpi' banner.
+        */
        gic_data.rdists.has_rvpeid &= !!(typer & GICR_TYPER_RVPEID);
        gic_data.rdists.has_direct_lpi &= (!!(typer & GICR_TYPER_DirectLPIS) |
+                                          !!(ctlr & GICR_CTLR_IR) |
                                           gic_data.rdists.has_rvpeid);
        gic_data.rdists.has_vpend_valid_dirty &= !!(typer & GICR_TYPER_DIRTY);
 
@@ -962,7 +1029,11 @@ static void gic_update_rdist_properties(void)
        gic_iterate_rdists(__gic_update_rdist_properties);
        if (WARN_ON(gic_data.ppi_nr == UINT_MAX))
                gic_data.ppi_nr = 0;
-       pr_info("%d PPIs implemented\n", gic_data.ppi_nr);
+       pr_info("GICv3 features: %d PPIs%s%s\n",
+               gic_data.ppi_nr,
+               gic_data.has_rss ? ", RSS" : "",
+               gic_data.rdists.has_direct_lpi ? ", DirectLPI" : "");
+
        if (gic_data.rdists.has_vlpis)
                pr_info("GICv4 features: %s%s%s\n",
                        gic_data.rdists.has_direct_lpi ? "DirectLPI " : "",
@@ -1284,8 +1355,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
         */
        if (enabled)
                gic_unmask_irq(d);
-       else
-               gic_dist_wait_for_rwp();
 
        irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
@@ -1803,8 +1872,6 @@ static int __init gic_init_bases(void __iomem *dist_base,
        irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);
 
        gic_data.has_rss = !!(typer & GICD_TYPER_RSS);
-       pr_info("Distributor has %sRange Selector support\n",
-               gic_data.has_rss ? "" : "no ");
 
        if (typer & GICD_TYPER_MBIS) {
                err = mbi_init(handle, gic_data.domain);
@@ -1980,10 +2047,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
        u32 nr_redist_regions;
        int err, i;
 
-       dist_base = of_iomap(node, 0);
-       if (!dist_base) {
+       dist_base = of_io_request_and_map(node, 0, "GICD");
+       if (IS_ERR(dist_base)) {
                pr_err("%pOF: unable to map gic dist registers\n", node);
-               return -ENXIO;
+               return PTR_ERR(dist_base);
        }
 
        err = gic_validate_dist_version(dist_base);
@@ -2007,8 +2074,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
                int ret;
 
                ret = of_address_to_resource(node, 1 + i, &res);
-               rdist_regs[i].redist_base = of_iomap(node, 1 + i);
-               if (ret || !rdist_regs[i].redist_base) {
+               rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR");
+               if (ret || IS_ERR(rdist_regs[i].redist_base)) {
                        pr_err("%pOF: couldn't map region %d\n", node, i);
                        err = -ENODEV;
                        goto out_unmap_rdist;
@@ -2034,7 +2101,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 
 out_unmap_rdist:
        for (i = 0; i < nr_redist_regions; i++)
-               if (rdist_regs[i].redist_base)
+               if (rdist_regs[i].redist_base && !IS_ERR(rdist_regs[i].redist_base))
                        iounmap(rdist_regs[i].redist_base);
        kfree(rdist_regs);
 out_unmap_dist:
@@ -2081,6 +2148,7 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header,
                pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
                return -ENOMEM;
        }
+       request_mem_region(redist->base_address, redist->length, "GICR");
 
        gic_acpi_register_redist(redist->base_address, redist_base);
        return 0;
@@ -2103,6 +2171,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
        redist_base = ioremap(gicc->gicr_base_address, size);
        if (!redist_base)
                return -ENOMEM;
+       request_mem_region(gicc->gicr_base_address, size, "GICR");
 
        gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
        return 0;
@@ -2304,6 +2373,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
                pr_err("Unable to map GICD registers\n");
                return -ENOMEM;
        }
+       request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD");
 
        err = gic_validate_dist_version(acpi_data.dist_base);
        if (err) {
index 09c710ecc387de31ebd52b1d24be719c3206cfd2..820404cb56bc7396a5c5aa51ef8c76c3ccb782a9 100644 (file)
@@ -1115,7 +1115,8 @@ static int gic_irq_domain_translate(struct irq_domain *d,
                *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
 
                /* Make it clear that broken DTs are... broken */
-               WARN_ON(*type == IRQ_TYPE_NONE);
+               WARN(*type == IRQ_TYPE_NONE,
+                    "HW irq %ld has invalid type\n", *hwirq);
                return 0;
        }
 
@@ -1132,7 +1133,8 @@ static int gic_irq_domain_translate(struct irq_domain *d,
                *hwirq = fwspec->param[0];
                *type = fwspec->param[1];
 
-               WARN_ON(*type == IRQ_TYPE_NONE);
+               WARN(*type == IRQ_TYPE_NONE,
+                    "HW irq %ld has invalid type\n", *hwirq);
                return 0;
        }
 
index 8d91a02593fc2755d63375c9a4c24655aac831f3..96230a04ec23803771235045e4b555499f6ca7c2 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
 
 #define CTRL_STRIDE_OFF(_t, _r)        (_t * 4 * _r)
@@ -70,7 +71,7 @@ static void imx_irqsteer_irq_mask(struct irq_data *d)
        raw_spin_unlock_irqrestore(&data->lock, flags);
 }
 
-static struct irq_chip imx_irqsteer_irq_chip = {
+static const struct irq_chip imx_irqsteer_irq_chip = {
        .name           = "irqsteer",
        .irq_mask       = imx_irqsteer_irq_mask,
        .irq_unmask     = imx_irqsteer_irq_unmask,
@@ -175,7 +176,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
        data->irq_count = DIV_ROUND_UP(irqs_num, 64);
        data->reg_num = irqs_num / 32;
 
-       if (IS_ENABLED(CONFIG_PM_SLEEP)) {
+       if (IS_ENABLED(CONFIG_PM)) {
                data->saved_reg = devm_kzalloc(&pdev->dev,
                                        sizeof(u32) * data->reg_num,
                                        GFP_KERNEL);
@@ -199,6 +200,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
                ret = -ENOMEM;
                goto out;
        }
+       irq_domain_set_pm_device(data->domain, &pdev->dev);
 
        if (!data->irq_count || data->irq_count > CHAN_MAX_OUTPUT_INT) {
                ret = -EINVAL;
@@ -219,6 +221,9 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, data);
 
+       pm_runtime_set_active(&pdev->dev);
+       pm_runtime_enable(&pdev->dev);
+
        return 0;
 out:
        clk_disable_unprepare(data->ipg_clk);
@@ -241,7 +246,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM
 static void imx_irqsteer_save_regs(struct irqsteer_data *data)
 {
        int i;
@@ -288,7 +293,10 @@ static int imx_irqsteer_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops imx_irqsteer_pm_ops = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_irqsteer_suspend, imx_irqsteer_resume)
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                                     pm_runtime_force_resume)
+       SET_RUNTIME_PM_OPS(imx_irqsteer_suspend,
+                          imx_irqsteer_resume, NULL)
 };
 
 static const struct of_device_id imx_irqsteer_dt_ids[] = {
index abd011fcecf4a30d020c0c1e48fb44fd92acca8b..c7db617e1a2f62a9b9526cb43eb8dac93cf7fc59 100644 (file)
@@ -37,11 +37,26 @@ struct exiu_irq_data {
        u32             spi_base;
 };
 
-static void exiu_irq_eoi(struct irq_data *d)
+static void exiu_irq_ack(struct irq_data *d)
 {
        struct exiu_irq_data *data = irq_data_get_irq_chip_data(d);
 
        writel(BIT(d->hwirq), data->base + EIREQCLR);
+}
+
+static void exiu_irq_eoi(struct irq_data *d)
+{
+       struct exiu_irq_data *data = irq_data_get_irq_chip_data(d);
+
+       /*
+        * Level triggered interrupts are latched and must be cleared during
+        * EOI or the interrupt will be jammed on. Of course if a level
+        * triggered interrupt is still asserted then the write will not clear
+        * the interrupt.
+        */
+       if (irqd_is_level_type(d))
+               writel(BIT(d->hwirq), data->base + EIREQCLR);
+
        irq_chip_eoi_parent(d);
 }
 
@@ -91,10 +106,13 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type)
        writel_relaxed(val, data->base + EILVL);
 
        val = readl_relaxed(data->base + EIEDG);
-       if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH)
+       if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) {
                val &= ~BIT(d->hwirq);
-       else
+               irq_set_handler_locked(d, handle_fasteoi_irq);
+       } else {
                val |= BIT(d->hwirq);
+               irq_set_handler_locked(d, handle_fasteoi_ack_irq);
+       }
        writel_relaxed(val, data->base + EIEDG);
 
        writel_relaxed(BIT(d->hwirq), data->base + EIREQCLR);
@@ -104,6 +122,7 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type)
 
 static struct irq_chip exiu_irq_chip = {
        .name                   = "EXIU",
+       .irq_ack                = exiu_irq_ack,
        .irq_eoi                = exiu_irq_eoi,
        .irq_enable             = exiu_irq_enable,
        .irq_mask               = exiu_irq_mask,
index 4cd3e533740bf7669f38c1e66a57f6f89b3b9f07..a01e440494154ecbf93e07a0fcb5f052ecb478a9 100644 (file)
@@ -249,11 +249,13 @@ static int sun6i_r_intc_domain_alloc(struct irq_domain *domain,
        for (i = 0; i < nr_irqs; ++i, ++hwirq, ++virq) {
                if (hwirq == nmi_hwirq) {
                        irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-                                                     &sun6i_r_intc_nmi_chip, 0);
+                                                     &sun6i_r_intc_nmi_chip,
+                                                     NULL);
                        irq_set_handler(virq, handle_fasteoi_ack_irq);
                } else {
                        irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-                                                     &sun6i_r_intc_wakeup_chip, 0);
+                                                     &sun6i_r_intc_wakeup_chip,
+                                                     NULL);
                }
        }
 
index 27933338f7b363d8d0061b86acbe260e688a4ce6..8c581c985aa7ddb0e9c96008807fb945eb1361ea 100644 (file)
@@ -151,14 +151,25 @@ static struct irq_chip xtensa_mx_irq_chip = {
        .irq_set_affinity = xtensa_mx_irq_set_affinity,
 };
 
+static void __init xtensa_mx_init_common(struct irq_domain *root_domain)
+{
+       unsigned int i;
+
+       irq_set_default_host(root_domain);
+       secondary_init_irq();
+
+       /* Initialize default IRQ routing to CPU 0 */
+       for (i = 0; i < XCHAL_NUM_EXTINTERRUPTS; ++i)
+               set_er(1, MIROUT(i));
+}
+
 int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
 {
        struct irq_domain *root_domain =
                irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
                                &xtensa_mx_irq_domain_ops,
                                &xtensa_mx_irq_chip);
-       irq_set_default_host(root_domain);
-       secondary_init_irq();
+       xtensa_mx_init_common(root_domain);
        return 0;
 }
 
@@ -168,8 +179,7 @@ static int __init xtensa_mx_init(struct device_node *np,
        struct irq_domain *root_domain =
                irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops,
                                &xtensa_mx_irq_chip);
-       irq_set_default_host(root_domain);
-       secondary_init_irq();
+       xtensa_mx_init_common(root_domain);
        return 0;
 }
 IRQCHIP_DECLARE(xtensa_mx_irq_chip, "cdns,xtensa-mx", xtensa_mx_init);
index 097577ae3c47177a6ec0706e106aa14e5a0b66e2..ce13c272c3872366eedd8a7d94894a81ea78d98c 100644 (file)
@@ -336,7 +336,7 @@ static int bch_allocator_thread(void *arg)
                                mutex_unlock(&ca->set->bucket_lock);
                                blkdev_issue_discard(ca->bdev,
                                        bucket_to_sector(ca->set, bucket),
-                                       ca->sb.bucket_size, GFP_KERNEL, 0);
+                                       ca->sb.bucket_size, GFP_KERNEL);
                                mutex_lock(&ca->set->bucket_lock);
                        }
 
index 6230dfdd9286ee1fe780a6989fab2783dac4ccba..7510d1c983a5edff59299e16641d60c66860b8de 100644 (file)
@@ -107,15 +107,16 @@ void bch_btree_verify(struct btree *b)
 
 void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 {
+       unsigned int nr_segs = bio_segments(bio);
        struct bio *check;
        struct bio_vec bv, cbv;
        struct bvec_iter iter, citer = { 0 };
 
-       check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
+       check = bio_kmalloc(nr_segs, GFP_NOIO);
        if (!check)
                return;
-       bio_set_dev(check, bio->bi_bdev);
-       check->bi_opf = REQ_OP_READ;
+       bio_init(check, bio->bi_bdev, check->bi_inline_vecs, nr_segs,
+                REQ_OP_READ);
        check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
        check->bi_iter.bi_size = bio->bi_iter.bi_size;
 
@@ -146,7 +147,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 
        bio_free_pages(check);
 out_put:
-       bio_put(check);
+       bio_uninit(check);
+       kfree(check);
 }
 
 #endif
index 320fcdfef48efae88642c7728b23455160a7a2d3..9c5dde73da88e9ea498125941fff86182bd2310c 100644 (file)
@@ -1005,7 +1005,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
                bio_get(s->iop.bio);
 
                if (bio_op(bio) == REQ_OP_DISCARD &&
-                   !blk_queue_discard(bdev_get_queue(dc->bdev)))
+                   !bdev_max_discard_sectors(dc->bdev))
                        goto insert_data;
 
                /* I/O request sent to backing device */
@@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
        bio->bi_private = ddip;
 
        if ((bio_op(bio) == REQ_OP_DISCARD) &&
-           !blk_queue_discard(bdev_get_queue(dc->bdev)))
+           !bdev_max_discard_sectors(dc->bdev))
                bio->bi_end_io(bio);
        else
                submit_bio_noacct(bio);
index bf3de149d3c9f8ff6695877ffd786df24bb04ff5..2f49e31142f6231c593fd0cc2ccd4a32a280e87b 100644 (file)
@@ -973,7 +973,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 
        blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
 
        blk_queue_write_cache(q, true, true);
 
@@ -2350,7 +2349,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
        ca->bdev->bd_holder = ca;
        ca->sb_disk = sb_disk;
 
-       if (blk_queue_discard(bdev_get_queue(bdev)))
+       if (bdev_max_discard_sectors((bdev)))
                ca->discard = CACHE_DISCARD(&ca->sb);
 
        ret = cache_alloc(ca);
index d1029d71ff3bc26d64518addbb20b39a8d3fc6ed..c6f677059214d33b4f1177a0e4ec4b506194d20b 100644 (file)
@@ -1151,7 +1151,7 @@ STORE(__bch_cache)
        if (attr == &sysfs_discard) {
                bool v = strtoul_or_return(buf);
 
-               if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+               if (bdev_max_discard_sectors(ca->bdev))
                        ca->discard = v;
 
                if (v != CACHE_DISCARD(&ca->sb)) {
index e9cbc70d5a0eec5d5b65571d3ede45943f3263c1..5ffa1dcf84cfc8f441398eacaaac1b16425ef544 100644 (file)
@@ -611,7 +611,8 @@ static void bio_complete(struct bio *bio)
 {
        struct dm_buffer *b = bio->bi_private;
        blk_status_t status = bio->bi_status;
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        b->end_io(b, status);
 }
 
@@ -626,16 +627,14 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
        if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
                vec_size += 2;
 
-       bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
+       bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
        if (!bio) {
 dmio:
                use_dmio(b, rw, sector, n_sectors, offset);
                return;
        }
-
+       bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw);
        bio->bi_iter.bi_sector = sector;
-       bio_set_dev(bio, b->c->bdev);
-       bio_set_op_attrs(bio, rw, 0);
        bio->bi_end_io = bio_complete;
        bio->bi_private = b;
 
index 780a61bc6cc03912f9142334d3636846689c8bc3..28c5de8eca4a0fbd82aa692046d50e476663e736 100644 (file)
@@ -3329,13 +3329,6 @@ static int cache_iterate_devices(struct dm_target *ti,
        return r;
 }
 
-static bool origin_dev_supports_discard(struct block_device *origin_bdev)
-{
-       struct request_queue *q = bdev_get_queue(origin_bdev);
-
-       return blk_queue_discard(q);
-}
-
 /*
  * If discard_passdown was enabled verify that the origin device
  * supports discards.  Disable discard_passdown if not.
@@ -3349,7 +3342,7 @@ static void disable_passdown_if_not_supported(struct cache *cache)
        if (!cache->features.discard_passdown)
                return;
 
-       if (!origin_dev_supports_discard(origin_bdev))
+       if (!bdev_max_discard_sectors(origin_bdev))
                reason = "discard unsupported";
 
        else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
index 128316a73d0163a8513d86e30bcfcc306c4dc4af..811b0a5379d03d5487fd79b05fa4a1e71a437a23 100644 (file)
@@ -2016,13 +2016,6 @@ static void clone_resume(struct dm_target *ti)
        do_waker(&clone->waker.work);
 }
 
-static bool bdev_supports_discards(struct block_device *bdev)
-{
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       return (q && blk_queue_discard(q));
-}
-
 /*
  * If discard_passdown was enabled verify that the destination device supports
  * discards. Disable discard_passdown if not.
@@ -2036,7 +2029,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
        if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
                return;
 
-       if (!bdev_supports_discards(dest_dev))
+       if (!bdev_max_discard_sectors(dest_dev))
                reason = "discard unsupported";
        else if (dest_limits->max_discard_sectors < clone->region_size)
                reason = "max discard sectors smaller than a region";
index 5762366333a27406bb293c39c1297b17a3f8028d..e4b95eaeec8c75514d4cb1b994cb863dd0906f48 100644 (file)
@@ -311,7 +311,7 @@ static void do_region(int op, int op_flags, unsigned region,
         * Reject unsupported discard and write same requests.
         */
        if (op == REQ_OP_DISCARD)
-               special_cmd_max_sectors = q->limits.max_discard_sectors;
+               special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev);
        else if (op == REQ_OP_WRITE_ZEROES)
                special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
        if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) &&
index c9d036d6bb2ee60d23985070eb3fad3382e13bfd..e194226c89e54082e9c010a395ff9b624aa365bb 100644 (file)
@@ -866,9 +866,8 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
 static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
        struct log_writes_c *lc = ti->private;
-       struct request_queue *q = bdev_get_queue(lc->dev->bdev);
 
-       if (!q || !blk_queue_discard(q)) {
+       if (!bdev_max_discard_sectors(lc->dev->bdev)) {
                lc->device_supports_discard = false;
                limits->discard_granularity = lc->sectorsize;
                limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
index 2b26435a6946e8e7f774bf387d0f724ff51a3b96..9526ccbedafbac9ad74c413b2968a3bdc5da776d 100644 (file)
@@ -2963,13 +2963,8 @@ static void configure_discard_support(struct raid_set *rs)
        raid456 = rs_is_raid456(rs);
 
        for (i = 0; i < rs->raid_disks; i++) {
-               struct request_queue *q;
-
-               if (!rs->dev[i].rdev.bdev)
-                       continue;
-
-               q = bdev_get_queue(rs->dev[i].rdev.bdev);
-               if (!q || !blk_queue_discard(q))
+               if (!rs->dev[i].rdev.bdev ||
+                   !bdev_max_discard_sectors(rs->dev[i].rdev.bdev))
                        return;
 
                if (raid456) {
index 03541cfc2317cb0e17780fa8970ab8959d47b171..e7d42f6335a2af2c869b88e1a52cd48d04c886c8 100644 (file)
@@ -1820,9 +1820,7 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
 static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
                                sector_t start, sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return !blk_queue_nonrot(q);
+       return !bdev_nonrot(dev->bdev);
 }
 
 static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
@@ -1890,9 +1888,7 @@ static bool dm_table_supports_nowait(struct dm_table *t)
 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
                                      sector_t start, sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return !blk_queue_discard(q);
+       return !bdev_max_discard_sectors(dev->bdev);
 }
 
 static bool dm_table_supports_discards(struct dm_table *t)
@@ -1924,9 +1920,7 @@ static int device_not_secure_erase_capable(struct dm_target *ti,
                                           struct dm_dev *dev, sector_t start,
                                           sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return !blk_queue_secure_erase(q);
+       return !bdev_max_secure_erase_sectors(dev->bdev);
 }
 
 static bool dm_table_supports_secure_erase(struct dm_table *t)
@@ -1952,9 +1946,7 @@ static int device_requires_stable_pages(struct dm_target *ti,
                                        struct dm_dev *dev, sector_t start,
                                        sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return blk_queue_stable_writes(q);
+       return bdev_stable_writes(dev->bdev);
 }
 
 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
@@ -1974,18 +1966,15 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
                blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
 
        if (!dm_table_supports_discards(t)) {
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
-               /* Must also clear discard limits... */
                q->limits.max_discard_sectors = 0;
                q->limits.max_hw_discard_sectors = 0;
                q->limits.discard_granularity = 0;
                q->limits.discard_alignment = 0;
                q->limits.discard_misaligned = 0;
-       } else
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+       }
 
-       if (dm_table_supports_secure_erase(t))
-               blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
+       if (!dm_table_supports_secure_erase(t))
+               q->limits.max_secure_erase_sectors = 0;
 
        if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
                wc = true;
index 4d25d0e270313a9c1a6b6628c040f7a3b16d9b07..84c083f766736f37e6b514e273045a72e3fe8bd8 100644 (file)
@@ -398,8 +398,8 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
        sector_t s = block_to_sectors(tc->pool, data_b);
        sector_t len = block_to_sectors(tc->pool, data_e - data_b);
 
-       return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
-                                     GFP_NOWAIT, 0, &op->bio);
+       return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT,
+                                     &op->bio);
 }
 
 static void end_discard(struct discard_op *op, int r)
@@ -2802,13 +2802,6 @@ static void requeue_bios(struct pool *pool)
 /*----------------------------------------------------------------
  * Binding of control targets to a pool object
  *--------------------------------------------------------------*/
-static bool data_dev_supports_discard(struct pool_c *pt)
-{
-       struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-
-       return blk_queue_discard(q);
-}
-
 static bool is_factor(sector_t block_size, uint32_t n)
 {
        return !sector_div(block_size, n);
@@ -2828,7 +2821,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
        if (!pt->adjusted_pf.discard_passdown)
                return;
 
-       if (!data_dev_supports_discard(pt))
+       if (!bdev_max_discard_sectors(pt->data_dev->bdev))
                reason = "discard unsupported";
 
        else if (data_limits->max_discard_sectors < pool->sectors_per_block)
@@ -4057,8 +4050,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
                /*
                 * Must explicitly disallow stacking discard limits otherwise the
                 * block layer will stack them if pool's data device has support.
-                * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
-                * user to see that, so make sure to set all discard limits to 0.
                 */
                limits->discard_granularity = 0;
                return;
index cac295cc8840efe5d78d60438f3276359c3b2c1d..0ec5d8b9b1a4e8040ddf93ecad27d377cc680dc6 100644 (file)
@@ -1001,7 +1001,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
        blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
        blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
 
-       limits->discard_alignment = DMZ_BLOCK_SIZE;
+       limits->discard_alignment = 0;
        limits->discard_granularity = DMZ_BLOCK_SIZE;
        limits->max_discard_sectors = chunk_sectors;
        limits->max_hw_discard_sectors = chunk_sectors;
index 82957bd460e894556fed5eed0b991c14f3c0713b..39081338ca6162e4e6c337ed827f90ffde389ba2 100644 (file)
@@ -955,7 +955,6 @@ void disable_discard(struct mapped_device *md)
 
        /* device doesn't really support DISCARD, disable it */
        limits->max_discard_sectors = 0;
-       blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
 }
 
 void disable_write_zeroes(struct mapped_device *md)
@@ -982,7 +981,7 @@ static void clone_endio(struct bio *bio)
 
        if (unlikely(error == BLK_STS_TARGET)) {
                if (bio_op(bio) == REQ_OP_DISCARD &&
-                   !q->limits.max_discard_sectors)
+                   !bdev_max_discard_sectors(bio->bi_bdev))
                        disable_discard(md);
                else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
                         !q->limits.max_write_zeroes_sectors)
index bfd6026d78099b9cfe07bb2e509e077c091760ce..d87f674ab7622d46d7c20c6cd8ace4707d9015a2 100644 (file)
@@ -639,14 +639,6 @@ re_read:
        daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
        write_behind = le32_to_cpu(sb->write_behind);
        sectors_reserved = le32_to_cpu(sb->sectors_reserved);
-       /* Setup nodes/clustername only if bitmap version is
-        * cluster-compatible
-        */
-       if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
-               nodes = le32_to_cpu(sb->nodes);
-               strlcpy(bitmap->mddev->bitmap_info.cluster_name,
-                               sb->cluster_name, 64);
-       }
 
        /* verify that the bitmap-specific fields are valid */
        if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -668,6 +660,16 @@ re_read:
                goto out;
        }
 
+       /*
+        * Setup nodes/clustername only if bitmap version is
+        * cluster-compatible
+        */
+       if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
+               nodes = le32_to_cpu(sb->nodes);
+               strscpy(bitmap->mddev->bitmap_info.cluster_name,
+                               sb->cluster_name, 64);
+       }
+
        /* keep the array size field of the bitmap superblock up to date */
        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 
@@ -695,14 +697,13 @@ re_read:
        if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
                set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
-       strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
        err = 0;
 
 out:
        kunmap_atomic(sb);
-       /* Assigning chunksize is required for "re_read" */
-       bitmap->mddev->bitmap_info.chunksize = chunksize;
        if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
+               /* Assigning chunksize is required for "re_read" */
+               bitmap->mddev->bitmap_info.chunksize = chunksize;
                err = md_setup_cluster(bitmap->mddev, nodes);
                if (err) {
                        pr_warn("%s: Could not setup cluster service (%d)\n",
@@ -713,18 +714,18 @@ out:
                goto re_read;
        }
 
-
 out_no_sb:
-       if (test_bit(BITMAP_STALE, &bitmap->flags))
-               bitmap->events_cleared = bitmap->mddev->events;
-       bitmap->mddev->bitmap_info.chunksize = chunksize;
-       bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
-       bitmap->mddev->bitmap_info.max_write_behind = write_behind;
-       bitmap->mddev->bitmap_info.nodes = nodes;
-       if (bitmap->mddev->bitmap_info.space == 0 ||
-           bitmap->mddev->bitmap_info.space > sectors_reserved)
-               bitmap->mddev->bitmap_info.space = sectors_reserved;
-       if (err) {
+       if (err == 0) {
+               if (test_bit(BITMAP_STALE, &bitmap->flags))
+                       bitmap->events_cleared = bitmap->mddev->events;
+               bitmap->mddev->bitmap_info.chunksize = chunksize;
+               bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+               bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+               bitmap->mddev->bitmap_info.nodes = nodes;
+               if (bitmap->mddev->bitmap_info.space == 0 ||
+                       bitmap->mddev->bitmap_info.space > sectors_reserved)
+                       bitmap->mddev->bitmap_info.space = sectors_reserved;
+       } else {
                md_bitmap_print_sb(bitmap);
                if (bitmap->cluster_slot < 0)
                        md_cluster_stop(bitmap->mddev);
index 1c8a06b77c853b0be249b37c70926dc0323dc627..37cbcce3cc66bcb30c0629733e276b4f19f9420e 100644 (file)
@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
                pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
                goto out_err;
        }
-       strlcpy(res->name, name, namelen + 1);
+       strscpy(res->name, name, namelen + 1);
        if (with_lvb) {
                res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
                if (!res->lksb.sb_lvbptr) {
index 0f55b079371b136abf1fb5c8b338c91f4e7eba6f..138a3b25c5c82ce6a591e4a4b90b5a89628d260e 100644 (file)
@@ -64,7 +64,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
        struct linear_conf *conf;
        struct md_rdev *rdev;
        int i, cnt;
-       bool discard_supported = false;
 
        conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
        if (!conf)
@@ -96,9 +95,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
 
                conf->array_sectors += rdev->sectors;
                cnt++;
-
-               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                       discard_supported = true;
        }
        if (cnt != raid_disks) {
                pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
@@ -106,11 +102,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
                goto out;
        }
 
-       if (!discard_supported)
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
-       else
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
-
        /*
         * Here we calculate the device offsets.
         */
@@ -252,7 +243,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
                start_sector + data_offset;
 
        if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
-                    !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) {
+                    !bdev_max_discard_sectors(bio->bi_bdev))) {
                /* Just ignore it */
                bio_endio(bio);
        } else {
index 309b3af906ad39c7e19608b54203c3465494d6cc..707e802d0082a1ea8a1fd08f6cf259e8cc495ecd 100644 (file)
@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
 
 static bool does_sb_need_changing(struct mddev *mddev)
 {
-       struct md_rdev *rdev;
+       struct md_rdev *rdev = NULL, *iter;
        struct mdp_superblock_1 *sb;
        int role;
 
        /* Find a good rdev */
-       rdev_for_each(rdev, mddev)
-               if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
+       rdev_for_each(iter, mddev)
+               if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
+                       rdev = iter;
                        break;
+               }
 
        /* No good device found. */
        if (!rdev)
@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
        rdev_for_each(rdev, mddev) {
                role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
                /* Device activated? */
-               if (role == 0xffff && rdev->raid_disk >=0 &&
+               if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
                    !test_bit(Faulty, &rdev->flags))
                        return true;
                /* Device turned faulty? */
-               if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
+               if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
                        return true;
        }
 
@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 
        if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
                md_error(rdev->mddev, rdev);
-               if (test_bit(Faulty, &rdev->flags))
-                       err = 0;
-               else
+
+               if (test_bit(MD_BROKEN, &rdev->mddev->flags))
                        err = -EBUSY;
+               else
+                       err = 0;
        } else if (cmd_match(buf, "remove")) {
                if (rdev->mddev->pers) {
                        clear_bit(Blocked, &rdev->flags);
@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
        oldpriv = mddev->private;
        mddev->pers = pers;
        mddev->private = priv;
-       strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+       strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
        mddev->level = mddev->new_level;
        mddev->layout = mddev->new_layout;
        mddev->chunk_sectors = mddev->new_chunk_sectors;
@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
  *     like active, but no writes have been seen for a while (100msec).
  *
  * broken
- *     RAID0/LINEAR-only: same as clean, but array is missing a member.
- *     It's useful because RAID0/LINEAR mounted-arrays aren't stopped
- *     when a member is gone, so this state will at least alert the
- *     user that something is wrong.
+*     Array is failed. It's useful because mounted-arrays aren't stopped
+*     when array is failed, so this state will at least alert the user that
+*     something is wrong.
  */
 enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
                   write_pending, active_idle, broken, bad_word};
@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
                len--;
        if (len >= DISK_NAME_LEN)
                return -E2BIG;
-       strlcpy(buf, val, len+1);
+       strscpy(buf, val, len+1);
        if (strncmp(buf, "md_", 3) == 0)
                return md_alloc(0, buf);
        if (strncmp(buf, "md", 2) == 0 &&
@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
                mddev->level = pers->level;
                mddev->new_level = pers->level;
        }
-       strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+       strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
        if (mddev->reshape_position != MaxSector &&
            pers->start_reshape == NULL) {
@@ -5991,8 +5993,7 @@ int md_run(struct mddev *mddev)
                bool nonrot = true;
 
                rdev_for_each(rdev, mddev) {
-                       if (rdev->raid_disk >= 0 &&
-                           !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+                       if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
                                nonrot = false;
                                break;
                        }
@@ -7444,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
                err =  -ENODEV;
        else {
                md_error(mddev, rdev);
-               if (!test_bit(Faulty, &rdev->flags))
+               if (test_bit(MD_BROKEN, &mddev->flags))
                        err = -EBUSY;
        }
        rcu_read_unlock();
@@ -7985,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 
        if (!mddev->pers || !mddev->pers->error_handler)
                return;
-       mddev->pers->error_handler(mddev,rdev);
-       if (mddev->degraded)
+       mddev->pers->error_handler(mddev, rdev);
+
+       if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
                set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
        sysfs_notify_dirent_safe(rdev->sysfs_state);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-       md_wakeup_thread(mddev->thread);
+       if (!test_bit(MD_BROKEN, &mddev->flags)) {
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
        if (mddev->event_work.func)
                queue_work(md_misc_wq, &mddev->event_work);
        md_new_event();
@@ -8585,7 +8589,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 {
        struct bio *discard_bio = NULL;
 
-       if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
+       if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
                        &discard_bio) || !discard_bio)
                return;
 
@@ -9671,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
 
                if (test_bit(Candidate, &rdev2->flags)) {
-                       if (role == 0xfffe) {
+                       if (role == MD_DISK_ROLE_FAULTY) {
                                pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
                                md_kick_rdev_from_array(rdev2);
                                continue;
@@ -9684,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                        /*
                         * got activated except reshape is happening.
                         */
-                       if (rdev2->raid_disk == -1 && role != 0xffff &&
+                       if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
                            !(le32_to_cpu(sb->feature_map) &
                              MD_FEATURE_RESHAPE_ACTIVE)) {
                                rdev2->saved_raid_disk = role;
@@ -9701,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                         * as faulty. The recovery is performed by the
                         * one who initiated the error.
                         */
-                       if ((role == 0xfffe) || (role == 0xfffd)) {
+                       if (role == MD_DISK_ROLE_FAULTY ||
+                           role == MD_DISK_ROLE_JOURNAL) {
                                md_error(mddev, rdev2);
                                clear_bit(Blocked, &rdev2->flags);
                        }
@@ -9791,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
 
 void md_reload_sb(struct mddev *mddev, int nr)
 {
-       struct md_rdev *rdev;
+       struct md_rdev *rdev = NULL, *iter;
        int err;
 
        /* Find the rdev */
-       rdev_for_each_rcu(rdev, mddev) {
-               if (rdev->desc_nr == nr)
+       rdev_for_each_rcu(iter, mddev) {
+               if (iter->desc_nr == nr) {
+                       rdev = iter;
                        break;
+               }
        }
 
-       if (!rdev || rdev->desc_nr != nr) {
+       if (!rdev) {
                pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
                return;
        }
index 6ac28386453368aaadaaf295bbeedae7dad9b5f0..cf2cbb17acbd423ccebf7d1a008fc47ae07ed6a5 100644 (file)
@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                                int is_new);
 struct md_cluster_info;
 
-/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
+/**
+ * enum mddev_flags - md device flags.
+ * @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
+ * @MD_CLOSING: If set, we are closing the array, do not open it then.
+ * @MD_JOURNAL_CLEAN: A raid with journal is already clean.
+ * @MD_HAS_JOURNAL: The raid array has journal feature set.
+ * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
+ *                            resync lock, need to release the lock.
+ * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
+ *                         calls to md_error() will never cause the array to
+ *                         become failed.
+ * @MD_HAS_PPL:  The raid array has PPL feature set.
+ * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
+ * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
+ *                      without taking reconfig_mutex.
+ * @MD_UPDATING_SB: md_check_recovery is updating the metadata without
+ *                  explicitly holding reconfig_mutex.
+ * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
+ *                array is ready yet.
+ * @MD_BROKEN: This is used to stop writes and mark array as failed.
+ *
+ * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
+ */
 enum mddev_flags {
-       MD_ARRAY_FIRST_USE,     /* First use of array, needs initialization */
-       MD_CLOSING,             /* If set, we are closing the array, do not open
-                                * it then */
-       MD_JOURNAL_CLEAN,       /* A raid with journal is already clean */
-       MD_HAS_JOURNAL,         /* The raid array has journal feature set */
-       MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
-                                  * already took resync lock, need to
-                                  * release the lock */
-       MD_FAILFAST_SUPPORTED,  /* Using MD_FAILFAST on metadata writes is
-                                * supported as calls to md_error() will
-                                * never cause the array to become failed.
-                                */
-       MD_HAS_PPL,             /* The raid array has PPL feature set */
-       MD_HAS_MULTIPLE_PPLS,   /* The raid array has multiple PPLs feature set */
-       MD_ALLOW_SB_UPDATE,     /* md_check_recovery is allowed to update
-                                * the metadata without taking reconfig_mutex.
-                                */
-       MD_UPDATING_SB,         /* md_check_recovery is updating the metadata
-                                * without explicitly holding reconfig_mutex.
-                                */
-       MD_NOT_READY,           /* do_md_run() is active, so 'array_state'
-                                * must not report that array is ready yet
-                                */
-       MD_BROKEN,              /* This is used in RAID-0/LINEAR only, to stop
-                                * I/O in case an array member is gone/failed.
-                                */
+       MD_ARRAY_FIRST_USE,
+       MD_CLOSING,
+       MD_JOURNAL_CLEAN,
+       MD_HAS_JOURNAL,
+       MD_CLUSTER_RESYNC_LOCKED,
+       MD_FAILFAST_SUPPORTED,
+       MD_HAS_PPL,
+       MD_HAS_MULTIPLE_PPLS,
+       MD_ALLOW_SB_UPDATE,
+       MD_UPDATING_SB,
+       MD_NOT_READY,
+       MD_BROKEN,
 };
 
 enum mddev_sb_flags {
index b21e101183f444054ef24a32e6d3ec783392c776..e11701e394ca0b40ea520996468506d4b4d4452e 100644 (file)
@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
        pr_debug("md/raid0:%s: FINAL %d zones\n",
                 mdname(mddev), conf->nr_strip_zones);
 
-       if (conf->nr_strip_zones == 1) {
-               conf->layout = RAID0_ORIG_LAYOUT;
-       } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
-                  mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
-               conf->layout = mddev->layout;
-       } else if (default_layout == RAID0_ORIG_LAYOUT ||
-                  default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
-               conf->layout = default_layout;
-       } else {
-               pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
-                      mdname(mddev));
-               pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
-               err = -ENOTSUPP;
-               goto abort;
-       }
        /*
         * now since we have the hard sector sizes, we can make sure
         * chunk size is a multiple of that sector size
@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
                         (unsigned long long)smallest->sectors);
        }
 
+       if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
+               conf->layout = RAID0_ORIG_LAYOUT;
+       } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+                  mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+               conf->layout = mddev->layout;
+       } else if (default_layout == RAID0_ORIG_LAYOUT ||
+                  default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+               conf->layout = default_layout;
+       } else {
+               pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+                      mdname(mddev));
+               pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
+               err = -EOPNOTSUPP;
+               goto abort;
+       }
+
        pr_debug("md/raid0:%s: done.\n", mdname(mddev));
        *private_conf = conf;
 
@@ -399,7 +400,6 @@ static int raid0_run(struct mddev *mddev)
        conf = mddev->private;
        if (mddev->queue) {
                struct md_rdev *rdev;
-               bool discard_supported = false;
 
                blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
                blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
@@ -412,13 +412,7 @@ static int raid0_run(struct mddev *mddev)
                rdev_for_each(rdev, mddev) {
                        disk_stack_limits(mddev->gendisk, rdev->bdev,
                                          rdev->data_offset << 9);
-                       if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                               discard_supported = true;
                }
-               if (!discard_supported)
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
-               else
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
        }
 
        /* calculate array device size */
index 99d5464a51f810dd7f479df6d8ab61c123926a87..99d5af1362d7675b5b67bee69d5150ab9395a93c 100644 (file)
@@ -165,9 +165,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
         * Allocate bios : 1 for reading, n-1 for writing
         */
        for (j = pi->raid_disks ; j-- ; ) {
-               bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
                if (!bio)
                        goto out_free_bio;
+               bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
                r1_bio->bios[j] = bio;
        }
        /*
@@ -206,8 +207,10 @@ out_free_pages:
                resync_free_pages(&rps[j]);
 
 out_free_bio:
-       while (++j < pi->raid_disks)
-               bio_put(r1_bio->bios[j]);
+       while (++j < pi->raid_disks) {
+               bio_uninit(r1_bio->bios[j]);
+               kfree(r1_bio->bios[j]);
+       }
        kfree(rps);
 
 out_free_r1bio:
@@ -225,7 +228,8 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
        for (i = pi->raid_disks; i--; ) {
                rp = get_resync_pages(r1bio->bios[i]);
                resync_free_pages(rp);
-               bio_put(r1bio->bios[i]);
+               bio_uninit(r1bio->bios[i]);
+               kfree(r1bio->bios[i]);
        }
 
        /* resync pages array stored in the 1st bio's .bi_private */
@@ -704,7 +708,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                        /* At least two disks to choose from so failfast is OK */
                        set_bit(R1BIO_FailFast, &r1_bio->state);
 
-               nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+               nonrot = bdev_nonrot(rdev->bdev);
                has_nonrot_disk |= nonrot;
                pending = atomic_read(&rdev->nr_pending);
                dist = abs(this_sector - conf->mirrors[disk].head_position);
@@ -802,7 +806,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
                if (test_bit(Faulty, &rdev->flags)) {
                        bio_io_error(bio);
                } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
-                                   !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+                                   !bdev_max_discard_sectors(bio->bi_bdev)))
                        /* Just ignore it */
                        bio_endio(bio);
                else
@@ -1637,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
        seq_printf(seq, "]");
 }
 
+/**
+ * raid1_error() - RAID1 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to fail.
+ *
+ * The routine acknowledges &rdev failure and determines new @mddev state.
+ * If it failed, then:
+ *     - &MD_BROKEN flag is set in &mddev->flags.
+ *     - recovery is disabled.
+ * Otherwise, it must be degraded:
+ *     - recovery is interrupted.
+ *     - &mddev->degraded is bumped.
+ *
+ * @rdev is marked as &Faulty excluding case when array is failed and
+ * &mddev->fail_last_dev is off.
+ */
 static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r1conf *conf = mddev->private;
        unsigned long flags;
 
-       /*
-        * If it is not operational, then we have already marked it as dead
-        * else if it is the last working disks with "fail_last_dev == false",
-        * ignore the error, let the next level up know.
-        * else mark the drive as failed
-        */
        spin_lock_irqsave(&conf->device_lock, flags);
-       if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
-           && (conf->raid_disks - mddev->degraded) == 1) {
-               /*
-                * Don't fail the drive, act as though we were just a
-                * normal single drive.
-                * However don't try a recovery from this drive as
-                * it is very likely to fail.
-                */
-               conf->recovery_disabled = mddev->recovery_disabled;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+       if (test_bit(In_sync, &rdev->flags) &&
+           (conf->raid_disks - mddev->degraded) == 1) {
+               set_bit(MD_BROKEN, &mddev->flags);
+
+               if (!mddev->fail_last_dev) {
+                       conf->recovery_disabled = mddev->recovery_disabled;
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       return;
+               }
        }
        set_bit(Blocked, &rdev->flags);
        if (test_and_clear_bit(In_sync, &rdev->flags))
@@ -1826,8 +1839,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                        break;
                }
        }
-       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
        print_conf(conf);
        return err;
 }
@@ -3106,7 +3117,6 @@ static int raid1_run(struct mddev *mddev)
        int i;
        struct md_rdev *rdev;
        int ret;
-       bool discard_supported = false;
 
        if (mddev->level != 1) {
                pr_warn("md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -3141,8 +3151,6 @@ static int raid1_run(struct mddev *mddev)
                        continue;
                disk_stack_limits(mddev->gendisk, rdev->bdev,
                                  rdev->data_offset << 9);
-               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                       discard_supported = true;
        }
 
        mddev->degraded = 0;
@@ -3179,15 +3187,6 @@ static int raid1_run(struct mddev *mddev)
 
        md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
 
-       if (mddev->queue) {
-               if (discard_supported)
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
-               else
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
-                                                 mddev->queue);
-       }
-
        ret = md_integrity_register(mddev);
        if (ret) {
                md_unregister_thread(&mddev->thread);
index dfe7d62d3fbdd1b5b2e1a387ad68de279097886f..dfa576cdf11cd7c382aad044c49c2422125e9fd6 100644 (file)
@@ -145,15 +145,17 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
         * Allocate bios.
         */
        for (j = nalloc ; j-- ; ) {
-               bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
                if (!bio)
                        goto out_free_bio;
+               bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
                r10_bio->devs[j].bio = bio;
                if (!conf->have_replacement)
                        continue;
-               bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
                if (!bio)
                        goto out_free_bio;
+               bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
                r10_bio->devs[j].repl_bio = bio;
        }
        /*
@@ -197,9 +199,11 @@ out_free_pages:
 out_free_bio:
        for ( ; j < nalloc; j++) {
                if (r10_bio->devs[j].bio)
-                       bio_put(r10_bio->devs[j].bio);
+                       bio_uninit(r10_bio->devs[j].bio);
+               kfree(r10_bio->devs[j].bio);
                if (r10_bio->devs[j].repl_bio)
-                       bio_put(r10_bio->devs[j].repl_bio);
+                       bio_uninit(r10_bio->devs[j].repl_bio);
+               kfree(r10_bio->devs[j].repl_bio);
        }
        kfree(rps);
 out_free_r10bio:
@@ -220,12 +224,15 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
                if (bio) {
                        rp = get_resync_pages(bio);
                        resync_free_pages(rp);
-                       bio_put(bio);
+                       bio_uninit(bio);
+                       kfree(bio);
                }
 
                bio = r10bio->devs[j].repl_bio;
-               if (bio)
-                       bio_put(bio);
+               if (bio) {
+                       bio_uninit(bio);
+                       kfree(bio);
+               }
        }
 
        /* resync pages array stored in the 1st bio's .bi_private */
@@ -796,7 +803,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
                if (!do_balance)
                        break;
 
-               nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+               nonrot = bdev_nonrot(rdev->bdev);
                has_nonrot_disk |= nonrot;
                pending = atomic_read(&rdev->nr_pending);
                if (min_pending > pending && nonrot) {
@@ -888,7 +895,7 @@ static void flush_pending_writes(struct r10conf *conf)
                        if (test_bit(Faulty, &rdev->flags)) {
                                bio_io_error(bio);
                        } else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
-                                           !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+                                           !bdev_max_discard_sectors(bio->bi_bdev)))
                                /* Just ignore it */
                                bio_endio(bio);
                        else
@@ -1083,7 +1090,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
                if (test_bit(Faulty, &rdev->flags)) {
                        bio_io_error(bio);
                } else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
-                                   !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+                                   !bdev_max_discard_sectors(bio->bi_bdev)))
                        /* Just ignore it */
                        bio_endio(bio);
                else
@@ -1963,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
                _enough(conf, 1, ignore);
 }
 
+/**
+ * raid10_error() - RAID10 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to fail.
+ *
+ * The routine acknowledges &rdev failure and determines new @mddev state.
+ * If it failed, then:
+ *     - &MD_BROKEN flag is set in &mddev->flags.
+ * Otherwise, it must be degraded:
+ *     - recovery is interrupted.
+ *     - &mddev->degraded is bumped.
+
+ * @rdev is marked as &Faulty excluding case when array is failed and
+ * &mddev->fail_last_dev is off.
+ */
 static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r10conf *conf = mddev->private;
        unsigned long flags;
 
-       /*
-        * If it is not operational, then we have already marked it as dead
-        * else if it is the last working disks with "fail_last_dev == false",
-        * ignore the error, let the next level up know.
-        * else mark the drive as failed
-        */
        spin_lock_irqsave(&conf->device_lock, flags);
-       if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
-           && !enough(conf, rdev->raid_disk)) {
-               /*
-                * Don't fail the drive, just return an IO error.
-                */
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+       if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
+               set_bit(MD_BROKEN, &mddev->flags);
+
+               if (!mddev->fail_last_dev) {
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       return;
+               }
        }
        if (test_and_clear_bit(In_sync, &rdev->flags))
                mddev->degraded++;
-       /*
-        * If recovery is running, make sure it aborts.
-        */
+
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
@@ -2144,8 +2159,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                rcu_assign_pointer(p->rdev, rdev);
                break;
        }
-       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
 
        print_conf(conf);
        return err;
@@ -4069,7 +4082,6 @@ static int raid10_run(struct mddev *mddev)
        sector_t size;
        sector_t min_offset_diff = 0;
        int first = 1;
-       bool discard_supported = false;
 
        if (mddev_init_writes_pending(mddev) < 0)
                return -ENOMEM;
@@ -4140,20 +4152,9 @@ static int raid10_run(struct mddev *mddev)
                                          rdev->data_offset << 9);
 
                disk->head_position = 0;
-
-               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                       discard_supported = true;
                first = 0;
        }
 
-       if (mddev->queue) {
-               if (discard_supported)
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
-               else
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
-                                                 mddev->queue);
-       }
        /* need to check that every block has at least one working mirror */
        if (!enough(conf, -1)) {
                pr_err("md/raid10:%s: not enough operational mirrors.\n",
index a7d50ff9020a82140d28af088703d120cbf84e7a..094a4042589eb5cfdb53393a2cf2660d60196295 100644 (file)
@@ -1318,7 +1318,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 
        r5l_write_super(log, end);
 
-       if (!blk_queue_discard(bdev_get_queue(bdev)))
+       if (!bdev_max_discard_sectors(bdev))
                return;
 
        mddev = log->rdev->mddev;
@@ -1344,14 +1344,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
        if (log->last_checkpoint < end) {
                blkdev_issue_discard(bdev,
                                log->last_checkpoint + log->rdev->data_offset,
-                               end - log->last_checkpoint, GFP_NOIO, 0);
+                               end - log->last_checkpoint, GFP_NOIO);
        } else {
                blkdev_issue_discard(bdev,
                                log->last_checkpoint + log->rdev->data_offset,
                                log->device_size - log->last_checkpoint,
-                               GFP_NOIO, 0);
+                               GFP_NOIO);
                blkdev_issue_discard(bdev, log->rdev->data_offset, end,
-                               GFP_NOIO, 0);
+                               GFP_NOIO);
        }
 }
 
index d3962d92df18a02b2531340901227a7d9a14ef04..55d065a87b8940409b6445fb5c7a3dd28a15ef85 100644 (file)
@@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
                                 (unsigned long long)r_sector, dd_idx,
                                 (unsigned long long)sector);
 
-                       rdev = conf->disks[dd_idx].rdev;
+                       /* Array has not started so rcu dereference is safe */
+                       rdev = rcu_dereference_protected(
+                                       conf->disks[dd_idx].rdev, 1);
                        if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
                                      sector >= rdev->recovery_offset)) {
                                pr_debug("%s:%*s data member disk %d missing\n",
@@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
                parity_sector = raid5_compute_sector(conf, r_sector_first + i,
                                0, &disk, &sh);
                BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
-               parity_rdev = conf->disks[sh.pd_idx].rdev;
+
+               /* Array has not started so rcu dereference is safe */
+               parity_rdev = rcu_dereference_protected(
+                                       conf->disks[sh.pd_idx].rdev, 1);
 
                BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
                pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
@@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
 
        for (i = 0; i < ppl_conf->count; i++) {
                struct ppl_log *log = &ppl_conf->child_logs[i];
-               struct md_rdev *rdev = conf->disks[i].rdev;
+               /* Array has not started so rcu dereference is safe */
+               struct md_rdev *rdev =
+                       rcu_dereference_protected(conf->disks[i].rdev, 1);
 
                mutex_init(&log->io_mutex);
                spin_lock_init(&log->io_list_lock);
index 351d341a1ffa4cf2e9a6641541a39ed48b1a4609..39038fa8b1c8047756d080b5a6232badd58ea790 100644 (file)
@@ -79,18 +79,21 @@ static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
 }
 
 static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+       __acquires(&conf->device_lock)
 {
        spin_lock_irq(conf->hash_locks + hash);
        spin_lock(&conf->device_lock);
 }
 
 static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+       __releases(&conf->device_lock)
 {
        spin_unlock(&conf->device_lock);
        spin_unlock_irq(conf->hash_locks + hash);
 }
 
 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+       __acquires(&conf->device_lock)
 {
        int i;
        spin_lock_irq(conf->hash_locks);
@@ -100,6 +103,7 @@ static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
 }
 
 static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+       __releases(&conf->device_lock)
 {
        int i;
        spin_unlock(&conf->device_lock);
@@ -164,6 +168,7 @@ static bool stripe_is_lowprio(struct stripe_head *sh)
 }
 
 static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
+       __must_hold(&sh->raid_conf->device_lock)
 {
        struct r5conf *conf = sh->raid_conf;
        struct r5worker_group *group;
@@ -211,6 +216,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
 
 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
                              struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
        int i;
        int injournal = 0;      /* number of date pages with R5_InJournal */
@@ -296,6 +302,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
 
 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
                             struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
        if (atomic_dec_and_test(&sh->count))
                do_release_stripe(conf, sh, temp_inactive_list);
@@ -350,9 +357,9 @@ static void release_inactive_stripe_list(struct r5conf *conf,
        }
 }
 
-/* should hold conf->device_lock already */
 static int release_stripe_list(struct r5conf *conf,
                               struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
        struct stripe_head *sh, *t;
        int count = 0;
@@ -629,6 +636,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
  * This is because some failed devices may only affect one
  * of the two sections, and some non-in_sync devices may
  * be insync in the section most affected by failed devices.
+ *
+ * Most calls to this function hold &conf->device_lock. Calls
+ * in raid5_run() do not require the lock as no other threads
+ * have been started yet.
  */
 int raid5_calc_degraded(struct r5conf *conf)
 {
@@ -686,17 +697,17 @@ int raid5_calc_degraded(struct r5conf *conf)
        return degraded;
 }
 
-static int has_failed(struct r5conf *conf)
+static bool has_failed(struct r5conf *conf)
 {
-       int degraded;
+       int degraded = conf->mddev->degraded;
 
-       if (conf->mddev->reshape_position == MaxSector)
-               return conf->mddev->degraded > conf->max_degraded;
+       if (test_bit(MD_BROKEN, &conf->mddev->flags))
+               return true;
 
-       degraded = raid5_calc_degraded(conf);
-       if (degraded > conf->max_degraded)
-               return 1;
-       return 0;
+       if (conf->mddev->reshape_position != MaxSector)
+               degraded = raid5_calc_degraded(conf);
+
+       return degraded > conf->max_degraded;
 }
 
 struct stripe_head *
@@ -2648,6 +2659,28 @@ static void shrink_stripes(struct r5conf *conf)
        conf->slab_cache = NULL;
 }
 
+/*
+ * This helper wraps rcu_dereference_protected() and can be used when
+ * it is known that the nr_pending of the rdev is elevated.
+ */
+static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
+{
+       return rcu_dereference_protected(rdev,
+                       atomic_read(&rcu_access_pointer(rdev)->nr_pending));
+}
+
+/*
+ * This helper wraps rcu_dereference_protected() and should be used
+ * when it is known that the mddev_lock() is held. This is safe
+ * seeing raid5_remove_disk() has the same lock held.
+ */
+static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
+                                        struct md_rdev __rcu *rdev)
+{
+       return rcu_dereference_protected(rdev,
+                       lockdep_is_held(&mddev->reconfig_mutex));
+}
+
 static void raid5_end_read_request(struct bio * bi)
 {
        struct stripe_head *sh = bi->bi_private;
@@ -2674,9 +2707,9 @@ static void raid5_end_read_request(struct bio * bi)
                 * In that case it moved down to 'rdev'.
                 * rdev is not removed until all requests are finished.
                 */
-               rdev = conf->disks[i].replacement;
+               rdev = rdev_pend_deref(conf->disks[i].replacement);
        if (!rdev)
-               rdev = conf->disks[i].rdev;
+               rdev = rdev_pend_deref(conf->disks[i].rdev);
 
        if (use_new_offset(conf, sh))
                s = sh->sector + rdev->new_data_offset;
@@ -2790,11 +2823,11 @@ static void raid5_end_write_request(struct bio *bi)
 
        for (i = 0 ; i < disks; i++) {
                if (bi == &sh->dev[i].req) {
-                       rdev = conf->disks[i].rdev;
+                       rdev = rdev_pend_deref(conf->disks[i].rdev);
                        break;
                }
                if (bi == &sh->dev[i].rreq) {
-                       rdev = conf->disks[i].replacement;
+                       rdev = rdev_pend_deref(conf->disks[i].replacement);
                        if (rdev)
                                replacement = 1;
                        else
@@ -2802,7 +2835,7 @@ static void raid5_end_write_request(struct bio *bi)
                                 * replaced it.  rdev is not removed
                                 * until all requests are finished.
                                 */
-                               rdev = conf->disks[i].rdev;
+                               rdev = rdev_pend_deref(conf->disks[i].rdev);
                        break;
                }
        }
@@ -2863,34 +2896,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
        unsigned long flags;
        pr_debug("raid456: error called\n");
 
+       pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
+               mdname(mddev), bdevname(rdev->bdev, b));
+
        spin_lock_irqsave(&conf->device_lock, flags);
+       set_bit(Faulty, &rdev->flags);
+       clear_bit(In_sync, &rdev->flags);
+       mddev->degraded = raid5_calc_degraded(conf);
 
-       if (test_bit(In_sync, &rdev->flags) &&
-           mddev->degraded == conf->max_degraded) {
-               /*
-                * Don't allow to achieve failed state
-                * Don't try to recover this device
-                */
+       if (has_failed(conf)) {
+               set_bit(MD_BROKEN, &conf->mddev->flags);
                conf->recovery_disabled = mddev->recovery_disabled;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+               pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
+                       mdname(mddev), mddev->degraded, conf->raid_disks);
+       } else {
+               pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
+                       mdname(mddev), conf->raid_disks - mddev->degraded);
        }
 
-       set_bit(Faulty, &rdev->flags);
-       clear_bit(In_sync, &rdev->flags);
-       mddev->degraded = raid5_calc_degraded(conf);
        spin_unlock_irqrestore(&conf->device_lock, flags);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
        set_bit(Blocked, &rdev->flags);
        set_mask_bits(&mddev->sb_flags, 0,
                      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
-       pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
-               "md/raid:%s: Operation continuing on %d devices.\n",
-               mdname(mddev),
-               bdevname(rdev->bdev, b),
-               mdname(mddev),
-               conf->raid_disks - mddev->degraded);
        r5c_update_on_rdev_error(mddev, rdev);
 }
 
@@ -5213,23 +5243,23 @@ finish:
                        struct r5dev *dev = &sh->dev[i];
                        if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
                                /* We own a safe reference to the rdev */
-                               rdev = conf->disks[i].rdev;
+                               rdev = rdev_pend_deref(conf->disks[i].rdev);
                                if (!rdev_set_badblocks(rdev, sh->sector,
                                                        RAID5_STRIPE_SECTORS(conf), 0))
                                        md_error(conf->mddev, rdev);
                                rdev_dec_pending(rdev, conf->mddev);
                        }
                        if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
-                               rdev = conf->disks[i].rdev;
+                               rdev = rdev_pend_deref(conf->disks[i].rdev);
                                rdev_clear_badblocks(rdev, sh->sector,
                                                     RAID5_STRIPE_SECTORS(conf), 0);
                                rdev_dec_pending(rdev, conf->mddev);
                        }
                        if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
-                               rdev = conf->disks[i].replacement;
+                               rdev = rdev_pend_deref(conf->disks[i].replacement);
                                if (!rdev)
                                        /* rdev have been moved down */
-                                       rdev = conf->disks[i].rdev;
+                                       rdev = rdev_pend_deref(conf->disks[i].rdev);
                                rdev_clear_badblocks(rdev, sh->sector,
                                                     RAID5_STRIPE_SECTORS(conf), 0);
                                rdev_dec_pending(rdev, conf->mddev);
@@ -5256,6 +5286,7 @@ finish:
 }
 
 static void raid5_activate_delayed(struct r5conf *conf)
+       __must_hold(&conf->device_lock)
 {
        if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
                while (!list_empty(&conf->delayed_list)) {
@@ -5273,9 +5304,9 @@ static void raid5_activate_delayed(struct r5conf *conf)
 }
 
 static void activate_bit_delay(struct r5conf *conf,
-       struct list_head *temp_inactive_list)
+               struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
-       /* device_lock is held */
        struct list_head head;
        list_add(&head, &conf->bitmap_list);
        list_del_init(&conf->bitmap_list);
@@ -5500,6 +5531,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
  * handle_list.
  */
 static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
+       __must_hold(&conf->device_lock)
 {
        struct stripe_head *sh, *tmp;
        struct list_head *handle_list = NULL;
@@ -6288,7 +6320,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
         */
        rcu_read_lock();
        for (i = 0; i < conf->raid_disks; i++) {
-               struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
+               struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
 
                if (rdev == NULL || test_bit(Faulty, &rdev->flags))
                        still_degraded = 1;
@@ -6371,8 +6403,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
 static int handle_active_stripes(struct r5conf *conf, int group,
                                 struct r5worker *worker,
                                 struct list_head *temp_inactive_list)
-               __releases(&conf->device_lock)
-               __acquires(&conf->device_lock)
+               __must_hold(&conf->device_lock)
 {
        struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
        int i, batch_size = 0, hash;
@@ -7166,7 +7197,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        int i;
        int group_cnt;
        struct r5worker_group *new_group;
-       int ret;
+       int ret = -ENOMEM;
 
        if (mddev->new_level != 5
            && mddev->new_level != 4
@@ -7225,6 +7256,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        spin_lock_init(&conf->device_lock);
        seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
        mutex_init(&conf->cache_size_mutex);
+
        init_waitqueue_head(&conf->wait_for_quiescent);
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
@@ -7242,7 +7274,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        rdev_for_each(rdev, mddev) {
                if (test_bit(Journal, &rdev->flags))
                        continue;
-               if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+               if (bdev_nonrot(rdev->bdev)) {
                        conf->batch_bio_dispatch = false;
                        break;
                }
@@ -7302,11 +7334,13 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 
        conf->level = mddev->new_level;
        conf->chunk_sectors = mddev->new_chunk_sectors;
-       if (raid5_alloc_percpu(conf) != 0)
+       ret = raid5_alloc_percpu(conf);
+       if (ret)
                goto abort;
 
        pr_debug("raid456: run(%s) called.\n", mdname(mddev));
 
+       ret = -EIO;
        rdev_for_each(rdev, mddev) {
                raid_disk = rdev->raid_disk;
                if (raid_disk >= max_disks
@@ -7317,11 +7351,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                if (test_bit(Replacement, &rdev->flags)) {
                        if (disk->replacement)
                                goto abort;
-                       disk->replacement = rdev;
+                       RCU_INIT_POINTER(disk->replacement, rdev);
                } else {
                        if (disk->rdev)
                                goto abort;
-                       disk->rdev = rdev;
+                       RCU_INIT_POINTER(disk->rdev, rdev);
                }
 
                if (test_bit(In_sync, &rdev->flags)) {
@@ -7370,6 +7404,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        if (grow_stripes(conf, conf->min_nr_stripes)) {
                pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
                        mdname(mddev), memory);
+               ret = -ENOMEM;
                goto abort;
        } else
                pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
@@ -7383,7 +7418,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        conf->shrinker.count_objects = raid5_cache_count;
        conf->shrinker.batch = 128;
        conf->shrinker.flags = 0;
-       if (register_shrinker(&conf->shrinker)) {
+       ret = register_shrinker(&conf->shrinker);
+       if (ret) {
                pr_warn("md/raid:%s: couldn't register shrinker.\n",
                        mdname(mddev));
                goto abort;
@@ -7394,17 +7430,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        if (!conf->thread) {
                pr_warn("md/raid:%s: couldn't allocate thread.\n",
                        mdname(mddev));
+               ret = -ENOMEM;
                goto abort;
        }
 
        return conf;
 
  abort:
-       if (conf) {
+       if (conf)
                free_conf(conf);
-               return ERR_PTR(-EIO);
-       } else
-               return ERR_PTR(-ENOMEM);
+       return ERR_PTR(ret);
 }
 
 static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
@@ -7621,17 +7656,18 @@ static int raid5_run(struct mddev *mddev)
 
        for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
             i++) {
-               rdev = conf->disks[i].rdev;
+               rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
                if (!rdev && conf->disks[i].replacement) {
                        /* The replacement is all we have yet */
-                       rdev = conf->disks[i].replacement;
+                       rdev = rdev_mdlock_deref(mddev,
+                                                conf->disks[i].replacement);
                        conf->disks[i].replacement = NULL;
                        clear_bit(Replacement, &rdev->flags);
-                       conf->disks[i].rdev = rdev;
+                       rcu_assign_pointer(conf->disks[i].rdev, rdev);
                }
                if (!rdev)
                        continue;
-               if (conf->disks[i].replacement &&
+               if (rcu_access_pointer(conf->disks[i].replacement) &&
                    conf->reshape_progress != MaxSector) {
                        /* replacements and reshape simply do not mix. */
                        pr_warn("md: cannot handle concurrent replacement and reshape.\n");
@@ -7749,7 +7785,6 @@ static int raid5_run(struct mddev *mddev)
                 */
                stripe = stripe * PAGE_SIZE;
                stripe = roundup_pow_of_two(stripe);
-               mddev->queue->limits.discard_alignment = stripe;
                mddev->queue->limits.discard_granularity = stripe;
 
                blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
@@ -7776,14 +7811,10 @@ static int raid5_run(struct mddev *mddev)
                 * A better idea might be to turn DISCARD into WRITE_ZEROES
                 * requests, as that is required to be safe.
                 */
-               if (devices_handle_discard_safely &&
-                   mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
-                   mddev->queue->limits.discard_granularity >= stripe)
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
-               else
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
+               if (!devices_handle_discard_safely ||
+                   mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
+                   mddev->queue->limits.discard_granularity < stripe)
+                       blk_queue_max_discard_sectors(mddev->queue, 0);
 
                blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
        }
@@ -7832,8 +7863,8 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
 
 static void print_raid5_conf (struct r5conf *conf)
 {
+       struct md_rdev *rdev;
        int i;
-       struct disk_info *tmp;
 
        pr_debug("RAID conf printout:\n");
        if (!conf) {
@@ -7844,50 +7875,54 @@ static void print_raid5_conf (struct r5conf *conf)
               conf->raid_disks,
               conf->raid_disks - conf->mddev->degraded);
 
+       rcu_read_lock();
        for (i = 0; i < conf->raid_disks; i++) {
                char b[BDEVNAME_SIZE];
-               tmp = conf->disks + i;
-               if (tmp->rdev)
+               rdev = rcu_dereference(conf->disks[i].rdev);
+               if (rdev)
                        pr_debug(" disk %d, o:%d, dev:%s\n",
-                              i, !test_bit(Faulty, &tmp->rdev->flags),
-                              bdevname(tmp->rdev->bdev, b));
+                              i, !test_bit(Faulty, &rdev->flags),
+                              bdevname(rdev->bdev, b));
        }
+       rcu_read_unlock();
 }
 
 static int raid5_spare_active(struct mddev *mddev)
 {
        int i;
        struct r5conf *conf = mddev->private;
-       struct disk_info *tmp;
+       struct md_rdev *rdev, *replacement;
        int count = 0;
        unsigned long flags;
 
        for (i = 0; i < conf->raid_disks; i++) {
-               tmp = conf->disks + i;
-               if (tmp->replacement
-                   && tmp->replacement->recovery_offset == MaxSector
-                   && !test_bit(Faulty, &tmp->replacement->flags)
-                   && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
+               rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
+               replacement = rdev_mdlock_deref(mddev,
+                                               conf->disks[i].replacement);
+               if (replacement
+                   && replacement->recovery_offset == MaxSector
+                   && !test_bit(Faulty, &replacement->flags)
+                   && !test_and_set_bit(In_sync, &replacement->flags)) {
                        /* Replacement has just become active. */
-                       if (!tmp->rdev
-                           || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
+                       if (!rdev
+                           || !test_and_clear_bit(In_sync, &rdev->flags))
                                count++;
-                       if (tmp->rdev) {
+                       if (rdev) {
                                /* Replaced device not technically faulty,
                                 * but we need to be sure it gets removed
                                 * and never re-added.
                                 */
-                               set_bit(Faulty, &tmp->rdev->flags);
+                               set_bit(Faulty, &rdev->flags);
                                sysfs_notify_dirent_safe(
-                                       tmp->rdev->sysfs_state);
+                                       rdev->sysfs_state);
                        }
-                       sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
-               } else if (tmp->rdev
-                   && tmp->rdev->recovery_offset == MaxSector
-                   && !test_bit(Faulty, &tmp->rdev->flags)
-                   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+                       sysfs_notify_dirent_safe(replacement->sysfs_state);
+               } else if (rdev
+                   && rdev->recovery_offset == MaxSector
+                   && !test_bit(Faulty, &rdev->flags)
+                   && !test_and_set_bit(In_sync, &rdev->flags)) {
                        count++;
-                       sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(rdev->sysfs_state);
                }
        }
        spin_lock_irqsave(&conf->device_lock, flags);
@@ -7902,8 +7937,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
        struct r5conf *conf = mddev->private;
        int err = 0;
        int number = rdev->raid_disk;
-       struct md_rdev **rdevp;
+       struct md_rdev __rcu **rdevp;
        struct disk_info *p = conf->disks + number;
+       struct md_rdev *tmp;
 
        print_raid5_conf(conf);
        if (test_bit(Journal, &rdev->flags) && conf->log) {
@@ -7921,9 +7957,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                log_exit(conf);
                return 0;
        }
-       if (rdev == p->rdev)
+       if (rdev == rcu_access_pointer(p->rdev))
                rdevp = &p->rdev;
-       else if (rdev == p->replacement)
+       else if (rdev == rcu_access_pointer(p->replacement))
                rdevp = &p->replacement;
        else
                return 0;
@@ -7943,18 +7979,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
        if (!test_bit(Faulty, &rdev->flags) &&
            mddev->recovery_disabled != conf->recovery_disabled &&
            !has_failed(conf) &&
-           (!p->replacement || p->replacement == rdev) &&
+           (!rcu_access_pointer(p->replacement) ||
+            rcu_access_pointer(p->replacement) == rdev) &&
            number < conf->raid_disks) {
                err = -EBUSY;
                goto abort;
        }
        *rdevp = NULL;
        if (!test_bit(RemoveSynchronized, &rdev->flags)) {
+               lockdep_assert_held(&mddev->reconfig_mutex);
                synchronize_rcu();
                if (atomic_read(&rdev->nr_pending)) {
                        /* lost the race, try later */
                        err = -EBUSY;
-                       *rdevp = rdev;
+                       rcu_assign_pointer(*rdevp, rdev);
                }
        }
        if (!err) {
@@ -7962,17 +8000,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                if (err)
                        goto abort;
        }
-       if (p->replacement) {
+
+       tmp = rcu_access_pointer(p->replacement);
+       if (tmp) {
                /* We must have just cleared 'rdev' */
-               p->rdev = p->replacement;
-               clear_bit(Replacement, &p->replacement->flags);
+               rcu_assign_pointer(p->rdev, tmp);
+               clear_bit(Replacement, &tmp->flags);
                smp_mb(); /* Make sure other CPUs may see both as identical
                           * but will never see neither - if they are careful
                           */
-               p->replacement = NULL;
+               rcu_assign_pointer(p->replacement, NULL);
 
                if (!err)
-                       err = log_modify(conf, p->rdev, true);
+                       err = log_modify(conf, tmp, true);
        }
 
        clear_bit(WantReplacement, &rdev->flags);
@@ -7988,6 +8028,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        int ret, err = -EEXIST;
        int disk;
        struct disk_info *p;
+       struct md_rdev *tmp;
        int first = 0;
        int last = conf->raid_disks - 1;
 
@@ -8045,7 +8086,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        }
        for (disk = first; disk <= last; disk++) {
                p = conf->disks + disk;
-               if (test_bit(WantReplacement, &p->rdev->flags) &&
+               tmp = rdev_mdlock_deref(mddev, p->rdev);
+               if (test_bit(WantReplacement, &tmp->flags) &&
                    p->replacement == NULL) {
                        clear_bit(In_sync, &rdev->flags);
                        set_bit(Replacement, &rdev->flags);
@@ -8336,6 +8378,7 @@ static void end_reshape(struct r5conf *conf)
 static void raid5_finish_reshape(struct mddev *mddev)
 {
        struct r5conf *conf = mddev->private;
+       struct md_rdev *rdev;
 
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 
@@ -8347,10 +8390,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
                        for (d = conf->raid_disks ;
                             d < conf->raid_disks - mddev->delta_disks;
                             d++) {
-                               struct md_rdev *rdev = conf->disks[d].rdev;
+                               rdev = rdev_mdlock_deref(mddev,
+                                                        conf->disks[d].rdev);
                                if (rdev)
                                        clear_bit(In_sync, &rdev->flags);
-                               rdev = conf->disks[d].replacement;
+                               rdev = rdev_mdlock_deref(mddev,
+                                               conf->disks[d].replacement);
                                if (rdev)
                                        clear_bit(In_sync, &rdev->flags);
                        }
index 9e8486a9e4451df367b9ececb15ff34476d535b8..638d29863503b836899d72dcf54be1af1b88b6b3 100644 (file)
@@ -473,7 +473,8 @@ enum {
  */
 
 struct disk_info {
-       struct md_rdev  *rdev, *replacement;
+       struct md_rdev  __rcu *rdev;
+       struct md_rdev  __rcu *replacement;
        struct page     *extra_page; /* extra page to use in prexor */
 };
 
@@ -560,6 +561,16 @@ struct r5pending_data {
        struct bio_list bios;
 };
 
+struct raid5_percpu {
+       struct page     *spare_page; /* Used when checking P/Q in raid6 */
+       void            *scribble;  /* space for constructing buffer
+                                    * lists and performing address
+                                    * conversions
+                                    */
+       int             scribble_obj_size;
+       local_lock_t    lock;
+};
+
 struct r5conf {
        struct hlist_head       *stripe_hashtbl;
        /* only protect corresponding hash list and inactive_list */
@@ -635,15 +646,7 @@ struct r5conf {
                                            */
        int                     recovery_disabled;
        /* per cpu variables */
-       struct raid5_percpu {
-               struct page     *spare_page; /* Used when checking P/Q in raid6 */
-               void            *scribble;  /* space for constructing buffer
-                                            * lists and performing address
-                                            * conversions
-                                            */
-               int             scribble_obj_size;
-               local_lock_t    lock;
-       } __percpu *percpu;
+       struct raid5_percpu __percpu *percpu;
        int scribble_disks;
        int scribble_sectors;
        struct hlist_node node;
index 00db21ff115e4f28e2bf8017a440aefeb7d8ebcc..82369c6f889e24f8cfb9d55c56d4e98578f49180 100644 (file)
 #include "lkdtm.h"
 #include <linux/stackleak.h>
 
-void lkdtm_STACKLEAK_ERASING(void)
+#if defined(CONFIG_GCC_PLUGIN_STACKLEAK)
+/*
+ * Check that stackleak tracks the lowest stack pointer and erases the stack
+ * below this as expected.
+ *
+ * To prevent the lowest stack pointer changing during the test, IRQs are
+ * masked and instrumentation of this function is disabled. We assume that the
+ * compiler will create a fixed-size stack frame for this function.
+ *
+ * Any non-inlined function may make further use of the stack, altering the
+ * lowest stack pointer and/or clobbering poison values. To avoid spurious
+ * failures we must avoid printing until the end of the test or have already
+ * encountered a failure condition.
+ */
+static void noinstr check_stackleak_irqoff(void)
 {
-       unsigned long *sp, left, found, i;
-       const unsigned long check_depth =
-                       STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+       const unsigned long task_stack_base = (unsigned long)task_stack_page(current);
+       const unsigned long task_stack_low = stackleak_task_low_bound(current);
+       const unsigned long task_stack_high = stackleak_task_high_bound(current);
+       const unsigned long current_sp = current_stack_pointer;
+       const unsigned long lowest_sp = current->lowest_stack;
+       unsigned long untracked_high;
+       unsigned long poison_high, poison_low;
        bool test_failed = false;
 
        /*
-        * For the details about the alignment of the poison values, see
-        * the comment in stackleak_track_stack().
+        * Check that the current and lowest recorded stack pointer values fall
+        * within the expected task stack boundaries. These tests should never
+        * fail unless the boundaries are incorrect or we're clobbering the
+        * STACK_END_MAGIC, and in either casee something is seriously wrong.
         */
-       sp = PTR_ALIGN(&i, sizeof(unsigned long));
-
-       left = ((unsigned long)sp & (THREAD_SIZE - 1)) / sizeof(unsigned long);
-       sp--;
+       if (current_sp < task_stack_low || current_sp >= task_stack_high) {
+               pr_err("FAIL: current_stack_pointer (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n",
+                      current_sp, task_stack_low, task_stack_high - 1);
+               test_failed = true;
+               goto out;
+       }
+       if (lowest_sp < task_stack_low || lowest_sp >= task_stack_high) {
+               pr_err("FAIL: current->lowest_stack (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n",
+                      lowest_sp, task_stack_low, task_stack_high - 1);
+               test_failed = true;
+               goto out;
+       }
 
        /*
-        * One 'long int' at the bottom of the thread stack is reserved
-        * and not poisoned.
+        * Depending on what has run prior to this test, the lowest recorded
+        * stack pointer could be above or below the current stack pointer.
+        * Start from the lowest of the two.
+        *
+        * Poison values are naturally-aligned unsigned longs. As the current
+        * stack pointer might not be sufficiently aligned, we must align
+        * downwards to find the lowest known stack pointer value. This is the
+        * high boundary for a portion of the stack which may have been used
+        * without being tracked, and has to be scanned for poison.
         */
-       if (left > 1) {
-               left--;
-       } else {
-               pr_err("FAIL: not enough stack space for the test\n");
-               test_failed = true;
-               goto end;
-       }
+       untracked_high = min(current_sp, lowest_sp);
+       untracked_high = ALIGN_DOWN(untracked_high, sizeof(unsigned long));
 
-       pr_info("checking unused part of the thread stack (%lu bytes)...\n",
-                                       left * sizeof(unsigned long));
+       /*
+        * Find the top of the poison in the same way as the erasing code.
+        */
+       poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high);
 
        /*
-        * Search for 'check_depth' poison values in a row (just like
-        * stackleak_erase() does).
+        * Check whether the poisoned portion of the stack (if any) consists
+        * entirely of poison. This verifies the entries that
+        * stackleak_find_top_of_poison() should have checked.
         */
-       for (i = 0, found = 0; i < left && found <= check_depth; i++) {
-               if (*(sp - i) == STACKLEAK_POISON)
-                       found++;
-               else
-                       found = 0;
-       }
+       poison_low = poison_high;
+       while (poison_low > task_stack_low) {
+               poison_low -= sizeof(unsigned long);
+
+               if (*(unsigned long *)poison_low == STACKLEAK_POISON)
+                       continue;
 
-       if (found <= check_depth) {
-               pr_err("FAIL: the erased part is not found (checked %lu bytes)\n",
-                                               i * sizeof(unsigned long));
+               pr_err("FAIL: non-poison value %lu bytes below poison boundary: 0x%lx\n",
+                      poison_high - poison_low, *(unsigned long *)poison_low);
                test_failed = true;
-               goto end;
        }
 
-       pr_info("the erased part begins after %lu not poisoned bytes\n",
-                               (i - found) * sizeof(unsigned long));
-
-       /* The rest of thread stack should be erased */
-       for (; i < left; i++) {
-               if (*(sp - i) != STACKLEAK_POISON) {
-                       pr_err("FAIL: bad value number %lu in the erased part: 0x%lx\n",
-                                                               i, *(sp - i));
-                       test_failed = true;
-               }
-       }
+       pr_info("stackleak stack usage:\n"
+               "  high offset: %lu bytes\n"
+               "  current:     %lu bytes\n"
+               "  lowest:      %lu bytes\n"
+               "  tracked:     %lu bytes\n"
+               "  untracked:   %lu bytes\n"
+               "  poisoned:    %lu bytes\n"
+               "  low offset:  %lu bytes\n",
+               task_stack_base + THREAD_SIZE - task_stack_high,
+               task_stack_high - current_sp,
+               task_stack_high - lowest_sp,
+               task_stack_high - untracked_high,
+               untracked_high - poison_high,
+               poison_high - task_stack_low,
+               task_stack_low - task_stack_base);
 
-end:
+out:
        if (test_failed) {
                pr_err("FAIL: the thread stack is NOT properly erased!\n");
-               pr_expected_config(CONFIG_GCC_PLUGIN_STACKLEAK);
        } else {
                pr_info("OK: the rest of the thread stack is properly erased\n");
        }
 }
+
+void lkdtm_STACKLEAK_ERASING(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       check_stackleak_irqoff();
+       local_irq_restore(flags);
+}
+#else /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */
+void lkdtm_STACKLEAK_ERASING(void)
+{
+       if (IS_ENABLED(CONFIG_HAVE_ARCH_STACKLEAK)) {
+               pr_err("XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n)\n");
+       } else {
+               pr_err("XFAIL: stackleak is not supported on this arch (HAVE_ARCH_STACKLEAK=n)\n");
+       }
+}
+#endif /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */
index 506dc900f5c7c391ef8c04dd0158754c0feba705..1259ca22d62500336e0833800d0bc34294ffa013 100644 (file)
@@ -126,6 +126,7 @@ struct mmc_blk_data {
 #define MMC_BLK_DISCARD                BIT(2)
 #define MMC_BLK_SECDISCARD     BIT(3)
 #define MMC_BLK_CQE_RECOVERY   BIT(4)
+#define MMC_BLK_TRIM           BIT(5)
 
        /*
         * Only set in main mmc_blk_data associated
@@ -330,7 +331,7 @@ static struct attribute *mmc_disk_attrs[] = {
 static umode_t mmc_disk_attrs_is_visible(struct kobject *kobj,
                struct attribute *a, int n)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
+       struct device *dev = kobj_to_dev(kobj);
        struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
        umode_t mode = a->mode;
 
@@ -609,11 +610,11 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
 
        if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
                /*
-                * Ensure RPMB/R1B command has completed by polling CMD13
-                * "Send Status".
+                * Ensure RPMB/R1B command has completed by polling CMD13 "Send Status". Here we
+                * allow to override the default timeout value if a custom timeout is specified.
                 */
-               err = mmc_poll_for_busy(card, MMC_BLK_TIMEOUT_MS, false,
-                                       MMC_BUSY_IO);
+               err = mmc_poll_for_busy(card, idata->ic.cmd_timeout_ms ? : MMC_BLK_TIMEOUT_MS,
+                                       false, MMC_BUSY_IO);
        }
 
        return err;
@@ -676,8 +677,9 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
        struct mmc_ioc_cmd __user *cmds = user->cmds;
        struct mmc_card *card;
        struct mmc_queue *mq;
-       int i, err = 0, ioc_err = 0;
+       int err = 0, ioc_err = 0;
        __u64 num_of_cmds;
+       unsigned int i, n;
        struct request *req;
 
        if (copy_from_user(&num_of_cmds, &user->num_of_cmds,
@@ -690,15 +692,16 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
        if (num_of_cmds > MMC_IOC_MAX_CMDS)
                return -EINVAL;
 
-       idata = kcalloc(num_of_cmds, sizeof(*idata), GFP_KERNEL);
+       n = num_of_cmds;
+       idata = kcalloc(n, sizeof(*idata), GFP_KERNEL);
        if (!idata)
                return -ENOMEM;
 
-       for (i = 0; i < num_of_cmds; i++) {
+       for (i = 0; i < n; i++) {
                idata[i] = mmc_blk_ioctl_copy_from_user(&cmds[i]);
                if (IS_ERR(idata[i])) {
                        err = PTR_ERR(idata[i]);
-                       num_of_cmds = i;
+                       n = i;
                        goto cmd_err;
                }
                /* This will be NULL on non-RPMB ioctl():s */
@@ -725,18 +728,18 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
        req_to_mmc_queue_req(req)->drv_op =
                rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
        req_to_mmc_queue_req(req)->drv_op_data = idata;
-       req_to_mmc_queue_req(req)->ioc_count = num_of_cmds;
+       req_to_mmc_queue_req(req)->ioc_count = n;
        blk_execute_rq(req, false);
        ioc_err = req_to_mmc_queue_req(req)->drv_op_result;
 
        /* copy to user if data and response */
-       for (i = 0; i < num_of_cmds && !err; i++)
+       for (i = 0; i < n && !err; i++)
                err = mmc_blk_ioctl_copy_to_user(&cmds[i], idata[i]);
 
        blk_mq_free_request(req);
 
 cmd_err:
-       for (i = 0; i < num_of_cmds; i++) {
+       for (i = 0; i < n; i++) {
                kfree(idata[i]->buf);
                kfree(idata[i]);
        }
@@ -1090,12 +1093,13 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req)
        blk_mq_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
-static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+static void mmc_blk_issue_erase_rq(struct mmc_queue *mq, struct request *req,
+                                  int type, unsigned int erase_arg)
 {
        struct mmc_blk_data *md = mq->blkdata;
        struct mmc_card *card = md->queue.card;
        unsigned int from, nr;
-       int err = 0, type = MMC_BLK_DISCARD;
+       int err = 0;
        blk_status_t status = BLK_STS_OK;
 
        if (!mmc_can_erase(card)) {
@@ -1111,13 +1115,13 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
                if (card->quirks & MMC_QUIRK_INAND_CMD38) {
                        err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
                                         INAND_CMD38_ARG_EXT_CSD,
-                                        card->erase_arg == MMC_TRIM_ARG ?
+                                        erase_arg == MMC_TRIM_ARG ?
                                         INAND_CMD38_ARG_TRIM :
                                         INAND_CMD38_ARG_ERASE,
                                         card->ext_csd.generic_cmd6_time);
                }
                if (!err)
-                       err = mmc_erase(card, from, nr, card->erase_arg);
+                       err = mmc_erase(card, from, nr, erase_arg);
        } while (err == -EIO && !mmc_blk_reset(md, card->host, type));
        if (err)
                status = BLK_STS_IOERR;
@@ -1127,6 +1131,19 @@ fail:
        blk_mq_end_request(req, status);
 }
 
+static void mmc_blk_issue_trim_rq(struct mmc_queue *mq, struct request *req)
+{
+       mmc_blk_issue_erase_rq(mq, req, MMC_BLK_TRIM, MMC_TRIM_ARG);
+}
+
+static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+{
+       struct mmc_blk_data *md = mq->blkdata;
+       struct mmc_card *card = md->queue.card;
+
+       mmc_blk_issue_erase_rq(mq, req, MMC_BLK_DISCARD, card->erase_arg);
+}
+
 static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
                                       struct request *req)
 {
@@ -2327,6 +2344,9 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
                case REQ_OP_SECURE_ERASE:
                        mmc_blk_issue_secdiscard_rq(mq, req);
                        break;
+               case REQ_OP_WRITE_ZEROES:
+                       mmc_blk_issue_trim_rq(mq, req);
+                       break;
                case REQ_OP_FLUSH:
                        mmc_blk_issue_flush(mq, req);
                        break;
index c6ae16d40766804357ef51dc01ec50b7a9716a21..4b70cbfc6d5de0cb81d13d16a879f80b0d617a2b 100644 (file)
@@ -1988,9 +1988,9 @@ static void mmc_hw_reset_for_init(struct mmc_host *host)
 {
        mmc_pwrseq_reset(host);
 
-       if (!(host->caps & MMC_CAP_HW_RESET) || !host->ops->hw_reset)
+       if (!(host->caps & MMC_CAP_HW_RESET) || !host->ops->card_hw_reset)
                return;
-       host->ops->hw_reset(host);
+       host->ops->card_hw_reset(host);
 }
 
 /**
@@ -2017,8 +2017,9 @@ int mmc_hw_reset(struct mmc_card *card)
 }
 EXPORT_SYMBOL(mmc_hw_reset);
 
-int mmc_sw_reset(struct mmc_host *host)
+int mmc_sw_reset(struct mmc_card *card)
 {
+       struct mmc_host *host = card->host;
        int ret;
 
        if (!host->bus_ops->sw_reset)
index efa95dc4fc4eee88d91ee3792fffa16201554ddf..89cd48fcec79f2aea73f61a951b907722abd78e7 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_runtime.h>
+#include <linux/random.h>
 #include <linux/sysfs.h>
 
 #include <linux/mmc/host.h>
@@ -71,6 +72,12 @@ static int mmc_decode_cid(struct mmc_card *card)
 {
        u32 *resp = card->raw_cid;
 
+       /*
+        * Add the raw card ID (cid) data to the entropy pool. It doesn't
+        * matter that not all of it is unique, it's just bonus entropy.
+        */
+       add_device_randomness(&card->raw_cid, sizeof(card->raw_cid));
+
        /*
         * The selection of the format here is based upon published
         * specs from sandisk and from what people have reported.
@@ -2240,11 +2247,11 @@ static int _mmc_hw_reset(struct mmc_host *host)
         */
        _mmc_flush_cache(host);
 
-       if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
+       if ((host->caps & MMC_CAP_HW_RESET) && host->ops->card_hw_reset &&
             mmc_can_reset(card)) {
                /* If the card accept RST_n signal, send it. */
                mmc_set_clock(host, host->f_init);
-               host->ops->hw_reset(host);
+               host->ops->card_hw_reset(host);
                /* Set initial state and call mmc_set_ios */
                mmc_set_initial_state(host);
        } else {
index 180d7e9d3400a5305b3d5d9b416b9f0942212762..81c55bfd6e0c263e5762c07e814874b016cb7b86 100644 (file)
@@ -21,7 +21,7 @@
 
 #define MMC_BKOPS_TIMEOUT_MS           (120 * 1000) /* 120s */
 #define MMC_SANITIZE_TIMEOUT_MS                (240 * 1000) /* 240s */
-#define MMC_OP_COND_PERIOD_US          (1 * 1000) /* 1ms */
+#define MMC_OP_COND_PERIOD_US          (4 * 1000) /* 4ms */
 #define MMC_OP_COND_TIMEOUT_MS         1000 /* 1s */
 
 static const u8 tuning_blk_pattern_4bit[] = {
index c69b2d9df6f16df48f5ded2e9df6a49cf0ff9787..fa5324ceeebe479b0546a0eace0d91fb107b37c9 100644 (file)
@@ -183,14 +183,15 @@ static void mmc_queue_setup_discard(struct request_queue *q,
        if (!max_discard)
                return;
 
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
        blk_queue_max_discard_sectors(q, max_discard);
        q->limits.discard_granularity = card->pref_erase << 9;
        /* granularity must not be greater than max. discard */
        if (card->pref_erase > max_discard)
                q->limits.discard_granularity = SECTOR_SIZE;
        if (mmc_can_secure_erase_trim(card))
-               blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
+               blk_queue_max_secure_erase_sectors(q, max_discard);
+       if (mmc_can_trim(card) && card->erased_byte == 0)
+               blk_queue_max_write_zeroes_sectors(q, max_discard);
 }
 
 static unsigned short mmc_get_max_segments(struct mmc_host *host)
index 68df6b2f49cc7a3dd6f913620c1aef8ebb902554..c5f1df6ce4c0a0a0364461d290d81dac2c8d40df 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_runtime.h>
+#include <linux/random.h>
 #include <linux/scatterlist.h>
 #include <linux/sysfs.h>
 
@@ -83,6 +84,12 @@ void mmc_decode_cid(struct mmc_card *card)
 {
        u32 *resp = card->raw_cid;
 
+       /*
+        * Add the raw card ID (cid) data to the entropy pool. It doesn't
+        * matter that not all of it is unique, it's just bonus entropy.
+        */
+       add_device_randomness(&card->raw_cid, sizeof(card->raw_cid));
+
        /*
         * SD doesn't currently have a version field so we will
         * have to assume we can parse this.
index 807177c953f3de217b2732337e02604fca75a5cd..91d52ba7a39fc30f9e78d28753299a2e194dec31 100644 (file)
@@ -1122,13 +1122,12 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
        }
 
        /* If we don't have a channel, we can't do DMA */
-       chan = host->dma.chan;
-       if (chan)
-               host->data_chan = chan;
-
-       if (!chan)
+       if (!host->dma.chan)
                return -ENODEV;
 
+       chan = host->dma.chan;
+       host->data_chan = chan;
+
        if (data->flags & MMC_DATA_READ) {
                host->dma_conf.direction = slave_dirn = DMA_DEV_TO_MEM;
                maxburst = atmci_convert_chksize(host,
index 463b707d9e9973440eac556d8b9c6812696cb9f9..641ab4f42125bf6e2bbe39656109b548d2eeb83b 100644 (file)
@@ -1259,7 +1259,7 @@ static void bcm2835_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 static const struct mmc_host_ops bcm2835_ops = {
        .request = bcm2835_request,
        .set_ios = bcm2835_set_ios,
-       .hw_reset = bcm2835_reset,
+       .card_hw_reset = bcm2835_reset,
 };
 
 static int bcm2835_add_host(struct bcm2835_host *host)
index 06dc56cbada879783d411cf047a97eaa3254b172..581614196a8413317c651e9d405a891e2c102add 100644 (file)
@@ -1812,7 +1812,7 @@ static const struct mmc_host_ops dw_mci_ops = {
        .set_ios                = dw_mci_set_ios,
        .get_ro                 = dw_mci_get_ro,
        .get_cd                 = dw_mci_get_cd,
-       .hw_reset               = dw_mci_hw_reset,
+       .card_hw_reset          = dw_mci_hw_reset,
        .enable_sdio_irq        = dw_mci_enable_sdio_irq,
        .ack_sdio_irq           = dw_mci_ack_sdio_irq,
        .execute_tuning         = dw_mci_execute_tuning,
index 7ab1b38a7be504167b9b1f0e3ef6102ed86a8289..b1d563b2ed1b01a10f2da7608a1102e95d240fd1 100644 (file)
@@ -247,6 +247,26 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host)
                return PTR_ERR(host->dma_rx);
        }
 
+       /*
+        * Limit the maximum segment size in any SG entry according to
+        * the parameters of the DMA engine device.
+        */
+       if (host->dma_tx) {
+               struct device *dev = host->dma_tx->device->dev;
+               unsigned int max_seg_size = dma_get_max_seg_size(dev);
+
+               if (max_seg_size < host->mmc->max_seg_size)
+                       host->mmc->max_seg_size = max_seg_size;
+       }
+
+       if (host->dma_rx) {
+               struct device *dev = host->dma_rx->device->dev;
+               unsigned int max_seg_size = dma_get_max_seg_size(dev);
+
+               if (max_seg_size < host->mmc->max_seg_size)
+                       host->mmc->max_seg_size = max_seg_size;
+       }
+
        return 0;
 }
 
index 58ab9d90bc8b9f3ba63518dbd2d954e38277e599..2f08d442e5577a9731601909aee7fcf239bb4917 100644 (file)
@@ -1271,8 +1271,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
                /* data bounce buffer */
                host->bounce_buf_size = mmc->max_req_size;
                host->bounce_buf =
-                       dma_alloc_coherent(host->dev, host->bounce_buf_size,
-                                          &host->bounce_dma_addr, GFP_KERNEL);
+                       dmam_alloc_coherent(host->dev, host->bounce_buf_size,
+                                           &host->bounce_dma_addr, GFP_KERNEL);
                if (host->bounce_buf == NULL) {
                        dev_err(host->dev, "Unable to map allocate DMA bounce buffer.\n");
                        ret = -ENOMEM;
@@ -1280,12 +1280,12 @@ static int meson_mmc_probe(struct platform_device *pdev)
                }
        }
 
-       host->descs = dma_alloc_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
-                     &host->descs_dma_addr, GFP_KERNEL);
+       host->descs = dmam_alloc_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
+                                         &host->descs_dma_addr, GFP_KERNEL);
        if (!host->descs) {
                dev_err(host->dev, "Allocating descriptor DMA buffer failed\n");
                ret = -ENOMEM;
-               goto err_bounce_buf;
+               goto err_free_irq;
        }
 
        mmc->ops = &meson_mmc_ops;
@@ -1293,10 +1293,6 @@ static int meson_mmc_probe(struct platform_device *pdev)
 
        return 0;
 
-err_bounce_buf:
-       if (!host->dram_access_quirk)
-               dma_free_coherent(host->dev, host->bounce_buf_size,
-                                 host->bounce_buf, host->bounce_dma_addr);
 err_free_irq:
        free_irq(host->irq, host);
 err_init_clk:
@@ -1318,13 +1314,6 @@ static int meson_mmc_remove(struct platform_device *pdev)
        writel(0, host->regs + SD_EMMC_IRQ_EN);
        free_irq(host->irq, host);
 
-       dma_free_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
-                         host->descs, host->descs_dma_addr);
-
-       if (!host->dram_access_quirk)
-               dma_free_coherent(host->dev, host->bounce_buf_size,
-                                 host->bounce_buf, host->bounce_dma_addr);
-
        clk_disable_unprepare(host->mmc_clk);
        clk_disable_unprepare(host->core_clk);
 
index 28aa78aa08f3f2752d3da3b2db2a51e98ab80e6a..e92e63cb5641cf30beb9d076d8cff3609373cd9f 100644 (file)
@@ -511,7 +511,7 @@ static int meson_mx_sdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 }
 
 static const struct mmc_host_ops meson_mx_sdhc_ops = {
-       .hw_reset                       = meson_mx_sdhc_hw_reset,
+       .card_hw_reset                  = meson_mx_sdhc_hw_reset,
        .request                        = meson_mx_sdhc_request,
        .set_ios                        = meson_mx_sdhc_set_ios,
        .card_busy                      = meson_mx_sdhc_card_busy,
index 45b8608c935cd70235953122d21b02dec63d8de7..01159eaf8694e9fcedee43d1304fd8241f0ce18c 100644 (file)
@@ -1619,6 +1619,8 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
 
        do {
                status = readl(host->base + MMCISTATUS);
+               if (!status)
+                       break;
 
                if (host->singleirq) {
                        if (status & host->mask1_reg)
@@ -1746,10 +1748,6 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        unsigned long flags;
        int ret;
 
-       if (host->plat->ios_handler &&
-               host->plat->ios_handler(mmc_dev(mmc), ios))
-                       dev_err(mmc_dev(mmc), "platform ios_handler failed\n");
-
        switch (ios->power_mode) {
        case MMC_POWER_OFF:
                if (!IS_ERR(mmc->supply.vmmc))
index 4566d7fc9055af49e4f4b98bb4edf65856c9d4c4..60bca78a72b19f7ccea58c09e87c10c909eac0a6 100644 (file)
@@ -43,6 +43,9 @@ struct sdmmc_lli_desc {
 struct sdmmc_idma {
        dma_addr_t sg_dma;
        void *sg_cpu;
+       dma_addr_t bounce_dma_addr;
+       void *bounce_buf;
+       bool use_bounce_buffer;
 };
 
 struct sdmmc_dlyb {
@@ -54,6 +57,8 @@ struct sdmmc_dlyb {
 static int sdmmc_idma_validate_data(struct mmci_host *host,
                                    struct mmc_data *data)
 {
+       struct sdmmc_idma *idma = host->dma_priv;
+       struct device *dev = mmc_dev(host->mmc);
        struct scatterlist *sg;
        int i;
 
@@ -61,41 +66,69 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
         * idma has constraints on idmabase & idmasize for each element
         * excepted the last element which has no constraint on idmasize
         */
+       idma->use_bounce_buffer = false;
        for_each_sg(data->sg, sg, data->sg_len - 1, i) {
                if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
                    !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
-                       dev_err(mmc_dev(host->mmc),
+                       dev_dbg(mmc_dev(host->mmc),
                                "unaligned scatterlist: ofst:%x length:%d\n",
                                data->sg->offset, data->sg->length);
-                       return -EINVAL;
+                       goto use_bounce_buffer;
                }
        }
 
        if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
-               dev_err(mmc_dev(host->mmc),
+               dev_dbg(mmc_dev(host->mmc),
                        "unaligned last scatterlist: ofst:%x length:%d\n",
                        data->sg->offset, data->sg->length);
-               return -EINVAL;
+               goto use_bounce_buffer;
        }
 
+       return 0;
+
+use_bounce_buffer:
+       if (!idma->bounce_buf) {
+               idma->bounce_buf = dmam_alloc_coherent(dev,
+                                                      host->mmc->max_req_size,
+                                                      &idma->bounce_dma_addr,
+                                                      GFP_KERNEL);
+               if (!idma->bounce_buf) {
+                       dev_err(dev, "Unable to map allocate DMA bounce buffer.\n");
+                       return -ENOMEM;
+               }
+       }
+
+       idma->use_bounce_buffer = true;
+
        return 0;
 }
 
 static int _sdmmc_idma_prep_data(struct mmci_host *host,
                                 struct mmc_data *data)
 {
-       int n_elem;
+       struct sdmmc_idma *idma = host->dma_priv;
 
-       n_elem = dma_map_sg(mmc_dev(host->mmc),
-                           data->sg,
-                           data->sg_len,
-                           mmc_get_dma_dir(data));
+       if (idma->use_bounce_buffer) {
+               if (data->flags & MMC_DATA_WRITE) {
+                       unsigned int xfer_bytes = data->blksz * data->blocks;
 
-       if (!n_elem) {
-               dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
-               return -EINVAL;
-       }
+                       sg_copy_to_buffer(data->sg, data->sg_len,
+                                         idma->bounce_buf, xfer_bytes);
+                       dma_wmb();
+               }
+       } else {
+               int n_elem;
+
+               n_elem = dma_map_sg(mmc_dev(host->mmc),
+                                   data->sg,
+                                   data->sg_len,
+                                   mmc_get_dma_dir(data));
 
+               if (!n_elem) {
+                       dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
+                       return -EINVAL;
+               }
+       }
        return 0;
 }
 
@@ -112,8 +145,19 @@ static int sdmmc_idma_prep_data(struct mmci_host *host,
 static void sdmmc_idma_unprep_data(struct mmci_host *host,
                                   struct mmc_data *data, int err)
 {
-       dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-                    mmc_get_dma_dir(data));
+       struct sdmmc_idma *idma = host->dma_priv;
+
+       if (idma->use_bounce_buffer) {
+               if (data->flags & MMC_DATA_READ) {
+                       unsigned int xfer_bytes = data->blksz * data->blocks;
+
+                       sg_copy_from_buffer(data->sg, data->sg_len,
+                                           idma->bounce_buf, xfer_bytes);
+               }
+       } else {
+               dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+                            mmc_get_dma_dir(data));
+       }
 }
 
 static int sdmmc_idma_setup(struct mmci_host *host)
@@ -137,6 +181,8 @@ static int sdmmc_idma_setup(struct mmci_host *host)
                host->mmc->max_segs = SDMMC_LLI_BUF_LEN /
                        sizeof(struct sdmmc_lli_desc);
                host->mmc->max_seg_size = host->variant->stm32_idmabsize_mask;
+
+               host->mmc->max_req_size = SZ_1M;
        } else {
                host->mmc->max_segs = 1;
                host->mmc->max_seg_size = host->mmc->max_req_size;
@@ -154,8 +200,16 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
        struct scatterlist *sg;
        int i;
 
-       if (!host->variant->dma_lli || data->sg_len == 1) {
-               writel_relaxed(sg_dma_address(data->sg),
+       if (!host->variant->dma_lli || data->sg_len == 1 ||
+           idma->use_bounce_buffer) {
+               u32 dma_addr;
+
+               if (idma->use_bounce_buffer)
+                       dma_addr = idma->bounce_dma_addr;
+               else
+                       dma_addr = sg_dma_address(data->sg);
+
+               writel_relaxed(dma_addr,
                               host->base + MMCI_STM32_IDMABASE0R);
                writel_relaxed(MMCI_STM32_IDMAEN,
                               host->base + MMCI_STM32_IDMACTRLR);
index e61b0b98065a250866097168e4a3fcb5a43073d7..195dc897188b9cb20d083b446ef4c579fd164891 100644 (file)
@@ -2458,7 +2458,7 @@ static const struct mmc_host_ops mt_msdc_ops = {
        .execute_tuning = msdc_execute_tuning,
        .prepare_hs400_tuning = msdc_prepare_hs400_tuning,
        .execute_hs400_tuning = msdc_execute_hs400_tuning,
-       .hw_reset = msdc_hw_reset,
+       .card_hw_reset = msdc_hw_reset,
 };
 
 static const struct cqhci_host_ops msdc_cmdq_ops = {
index 3629550528b6164e1e73c0efe52382e3e0d04cdb..bf54776fb26cea35f811300633b4e1af70e4c1e2 100644 (file)
@@ -70,6 +70,10 @@ struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi)
        } else {
                oms->pdata.caps |= MMC_CAP_NEEDS_POLL;
        }
+       if (device_property_read_bool(dev, "cap-sd-highspeed"))
+               oms->pdata.caps |= MMC_CAP_SD_HIGHSPEED;
+       if (device_property_read_bool(dev, "cap-mmc-highspeed"))
+               oms->pdata.caps |= MMC_CAP_MMC_HIGHSPEED;
 
        dev->platform_data = &oms->pdata;
        return dev->platform_data;
index 5e5af34090f1be8edc352e91bd0da823c961236b..57d39283924da3c847c2f9523c0d19b8552b0c60 100644 (file)
@@ -1374,7 +1374,7 @@ static int mmc_omap_probe(struct platform_device *pdev)
        host->iclk = clk_get(&pdev->dev, "ick");
        if (IS_ERR(host->iclk))
                return PTR_ERR(host->iclk);
-       clk_enable(host->iclk);
+       clk_prepare_enable(host->iclk);
 
        host->fclk = clk_get(&pdev->dev, "fck");
        if (IS_ERR(host->fclk)) {
@@ -1382,16 +1382,18 @@ static int mmc_omap_probe(struct platform_device *pdev)
                goto err_free_iclk;
        }
 
+       ret = clk_prepare(host->fclk);
+       if (ret)
+               goto err_put_fclk;
+
        host->dma_tx_burst = -1;
        host->dma_rx_burst = -1;
 
        host->dma_tx = dma_request_chan(&pdev->dev, "tx");
        if (IS_ERR(host->dma_tx)) {
                ret = PTR_ERR(host->dma_tx);
-               if (ret == -EPROBE_DEFER) {
-                       clk_put(host->fclk);
-                       goto err_free_iclk;
-               }
+               if (ret == -EPROBE_DEFER)
+                       goto err_free_fclk;
 
                host->dma_tx = NULL;
                dev_warn(host->dev, "TX DMA channel request failed\n");
@@ -1403,8 +1405,7 @@ static int mmc_omap_probe(struct platform_device *pdev)
                if (ret == -EPROBE_DEFER) {
                        if (host->dma_tx)
                                dma_release_channel(host->dma_tx);
-                       clk_put(host->fclk);
-                       goto err_free_iclk;
+                       goto err_free_fclk;
                }
 
                host->dma_rx = NULL;
@@ -1454,9 +1455,12 @@ err_free_dma:
                dma_release_channel(host->dma_tx);
        if (host->dma_rx)
                dma_release_channel(host->dma_rx);
+err_free_fclk:
+       clk_unprepare(host->fclk);
+err_put_fclk:
        clk_put(host->fclk);
 err_free_iclk:
-       clk_disable(host->iclk);
+       clk_disable_unprepare(host->iclk);
        clk_put(host->iclk);
        return ret;
 }
@@ -1476,8 +1480,9 @@ static int mmc_omap_remove(struct platform_device *pdev)
 
        mmc_omap_fclk_enable(host, 0);
        free_irq(host->irq, host);
+       clk_unprepare(host->fclk);
        clk_put(host->fclk);
-       clk_disable(host->iclk);
+       clk_disable_unprepare(host->iclk);
        clk_put(host->iclk);
 
        if (host->dma_tx)
index 66d308e73e179774536ff09121d27b0988eddf61..1a1e3e020a8c2d757bdedf62c1fc4728f80df0b3 100644 (file)
@@ -41,6 +41,8 @@ struct renesas_sdhi_of_data {
 struct renesas_sdhi_quirks {
        bool hs400_disabled;
        bool hs400_4taps;
+       bool fixed_addr_mode;
+       bool dma_one_rx_only;
        u32 hs400_bad_taps;
        const u8 (*hs400_calib_table)[SDHI_CALIB_TABLE_MAX];
 };
index ddb5ca2f559e2b9b10d985e39ac803643807818f..4404ca1f98d80158940618fd2a3a89f35557959e 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pinctrl/pinctrl-state.h>
 #include <linux/platform_device.h>
@@ -36,7 +35,6 @@
 #include <linux/reset.h>
 #include <linux/sh_dma.h>
 #include <linux/slab.h>
-#include <linux/sys_soc.h>
 
 #include "renesas_sdhi.h"
 #include "tmio_mmc.h"
index 1685df00863b941a312ed1a60fe6672242999b10..3084b15ae2cbbae439cbd88cd4619874e452047f 100644 (file)
@@ -78,11 +78,7 @@ static unsigned long global_flags;
  * stored into the system memory even if the DMAC interrupt happened.
  * So, this driver then uses one RX DMAC channel only.
  */
-#define SDHI_INTERNAL_DMAC_ONE_RX_ONLY 0
-#define SDHI_INTERNAL_DMAC_RX_IN_USE   1
-
-/* RZ/A2 does not have the ADRR_MODE bit */
-#define SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY 2
+#define SDHI_INTERNAL_DMAC_RX_IN_USE   0
 
 /* Definitions for sampling clocks */
 static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
@@ -108,10 +104,6 @@ static const struct renesas_sdhi_of_data of_data_rza2 = {
        .max_segs       = 1,
 };
 
-static const struct renesas_sdhi_of_data_with_quirks of_rza2_compatible = {
-       .of_data        = &of_data_rza2,
-};
-
 static const struct renesas_sdhi_of_data of_data_rcar_gen3 = {
        .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
                          TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
@@ -128,7 +120,7 @@ static const struct renesas_sdhi_of_data of_data_rcar_gen3 = {
        .sdhi_flags     = SDHI_FLAG_NEED_CLKH_FALLBACK,
 };
 
-static const struct renesas_sdhi_of_data of_data_rcar_gen3_no_fallback = {
+static const struct renesas_sdhi_of_data of_data_rcar_gen3_no_sdh_fallback = {
        .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL |
                          TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
        .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
@@ -169,6 +161,12 @@ static const struct renesas_sdhi_quirks sdhi_quirks_4tap_nohs400 = {
        .hs400_4taps = true,
 };
 
+static const struct renesas_sdhi_quirks sdhi_quirks_4tap_nohs400_one_rx = {
+       .hs400_disabled = true,
+       .hs400_4taps = true,
+       .dma_one_rx_only = true,
+};
+
 static const struct renesas_sdhi_quirks sdhi_quirks_4tap = {
        .hs400_4taps = true,
        .hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
@@ -178,6 +176,10 @@ static const struct renesas_sdhi_quirks sdhi_quirks_nohs400 = {
        .hs400_disabled = true,
 };
 
+static const struct renesas_sdhi_quirks sdhi_quirks_fixed_addr = {
+       .fixed_addr_mode = true,
+};
+
 static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps1357 = {
        .hs400_bad_taps = BIT(1) | BIT(3) | BIT(5) | BIT(7),
 };
@@ -208,10 +210,12 @@ static const struct renesas_sdhi_quirks sdhi_quirks_r8a77990 = {
  */
 static const struct soc_device_attribute sdhi_quirks_match[]  = {
        { .soc_id = "r8a774a1", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
-       { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_4tap_nohs400 },
+       { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_4tap_nohs400_one_rx },
        { .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_4tap },
-       { .soc_id = "r8a7796", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
+       { .soc_id = "r8a7796", .revision = "ES1.0", .data = &sdhi_quirks_4tap_nohs400_one_rx },
+       { .soc_id = "r8a7796", .revision = "ES1.[12]", .data = &sdhi_quirks_4tap_nohs400 },
        { .soc_id = "r8a7796", .revision = "ES1.*", .data = &sdhi_quirks_r8a7796_es13 },
+       { .soc_id = "r8a77980", .revision = "ES1.*", .data = &sdhi_quirks_nohs400 },
        { /* Sentinel. */ }
 };
 
@@ -231,11 +235,7 @@ static const struct renesas_sdhi_of_data_with_quirks of_r8a77965_compatible = {
 };
 
 static const struct renesas_sdhi_of_data_with_quirks of_r8a77970_compatible = {
-       .of_data = &of_data_rcar_gen3_no_fallback,
-};
-
-static const struct renesas_sdhi_of_data_with_quirks of_r8a77980_compatible = {
-       .of_data = &of_data_rcar_gen3,
+       .of_data = &of_data_rcar_gen3_no_sdh_fallback,
        .quirks = &sdhi_quirks_nohs400,
 };
 
@@ -248,16 +248,25 @@ static const struct renesas_sdhi_of_data_with_quirks of_rcar_gen3_compatible = {
        .of_data = &of_data_rcar_gen3,
 };
 
+static const struct renesas_sdhi_of_data_with_quirks of_rcar_gen3_nohs400_compatible = {
+       .of_data = &of_data_rcar_gen3,
+       .quirks = &sdhi_quirks_nohs400,
+};
+
+static const struct renesas_sdhi_of_data_with_quirks of_rza2_compatible = {
+       .of_data        = &of_data_rza2,
+       .quirks         = &sdhi_quirks_fixed_addr,
+};
+
 static const struct of_device_id renesas_sdhi_internal_dmac_of_match[] = {
        { .compatible = "renesas,sdhi-r7s9210", .data = &of_rza2_compatible, },
        { .compatible = "renesas,sdhi-mmc-r8a77470", .data = &of_rcar_gen3_compatible, },
        { .compatible = "renesas,sdhi-r8a7795", .data = &of_r8a7795_compatible, },
-       { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_gen3_compatible, },
        { .compatible = "renesas,sdhi-r8a77961", .data = &of_r8a77961_compatible, },
        { .compatible = "renesas,sdhi-r8a77965", .data = &of_r8a77965_compatible, },
        { .compatible = "renesas,sdhi-r8a77970", .data = &of_r8a77970_compatible, },
-       { .compatible = "renesas,sdhi-r8a77980", .data = &of_r8a77980_compatible, },
        { .compatible = "renesas,sdhi-r8a77990", .data = &of_r8a77990_compatible, },
+       { .compatible = "renesas,sdhi-r8a77995", .data = &of_rcar_gen3_nohs400_compatible, },
        { .compatible = "renesas,rcar-gen3-sdhi", .data = &of_rcar_gen3_compatible, },
        {},
 };
@@ -287,7 +296,8 @@ renesas_sdhi_internal_dmac_enable_dma(struct tmio_mmc_host *host, bool enable)
 }
 
 static void
-renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) {
+renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host)
+{
        u64 val = RST_DTRANRST1 | RST_DTRANRST0;
 
        renesas_sdhi_internal_dmac_enable_dma(host, false);
@@ -303,7 +313,8 @@ renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) {
 }
 
 static void
-renesas_sdhi_internal_dmac_dataend_dma(struct tmio_mmc_host *host) {
+renesas_sdhi_internal_dmac_dataend_dma(struct tmio_mmc_host *host)
+{
        struct renesas_sdhi *priv = host_to_priv(host);
 
        tasklet_schedule(&priv->dma_priv.dma_complete);
@@ -357,10 +368,11 @@ static void
 renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
                                     struct mmc_data *data)
 {
+       struct renesas_sdhi *priv = host_to_priv(host);
        struct scatterlist *sg = host->sg_ptr;
        u32 dtran_mode = DTRAN_MODE_BUS_WIDTH;
 
-       if (!test_bit(SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY, &global_flags))
+       if (!(priv->quirks && priv->quirks->fixed_addr_mode))
                dtran_mode |= DTRAN_MODE_ADDR_MODE;
 
        if (!renesas_sdhi_internal_dmac_map(host, data, COOKIE_MAPPED))
@@ -368,7 +380,7 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
 
        if (data->flags & MMC_DATA_READ) {
                dtran_mode |= DTRAN_MODE_CH_NUM_CH1;
-               if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) &&
+               if (priv->quirks && priv->quirks->dma_one_rx_only &&
                    test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags))
                        goto force_pio_with_unmap;
        } else {
@@ -520,20 +532,6 @@ static const struct tmio_mmc_dma_ops renesas_sdhi_internal_dmac_dma_ops = {
        .end = renesas_sdhi_internal_dmac_end_dma,
 };
 
-/*
- * Whitelist of specific R-Car Gen3 SoC ES versions to use this DMAC
- * implementation as others may use a different implementation.
- */
-static const struct soc_device_attribute soc_dma_quirks[] = {
-       { .soc_id = "r7s9210",
-         .data = (void *)BIT(SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY) },
-       { .soc_id = "r8a7795", .revision = "ES1.*",
-         .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
-       { .soc_id = "r8a7796", .revision = "ES1.0",
-         .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
-       { /* sentinel */ }
-};
-
 static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev)
 {
        const struct soc_device_attribute *attr;
@@ -544,10 +542,6 @@ static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev)
        of_data_quirks = of_device_get_match_data(&pdev->dev);
        quirks = of_data_quirks->quirks;
 
-       attr = soc_device_match(soc_dma_quirks);
-       if (attr)
-               global_flags |= (unsigned long)attr->data;
-
        attr = soc_device_match(sdhi_quirks_match);
        if (attr)
                quirks = attr->data;
index f24623aac2dbecbad15920242e2417771d4e187c..8eb57de48e0c98a43c34ae9e47ef384ddb405fba 100644 (file)
 
 #define SDHCI_VENDOR 0x78
 #define  SDHCI_VENDOR_ENHANCED_STRB 0x1
+#define  SDHCI_VENDOR_GATE_SDCLK_EN 0x2
 
-#define BRCMSTB_PRIV_FLAGS_NO_64BIT            BIT(0)
-#define BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT      BIT(1)
+#define BRCMSTB_MATCH_FLAGS_NO_64BIT           BIT(0)
+#define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT     BIT(1)
+#define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE     BIT(2)
+
+#define BRCMSTB_PRIV_FLAGS_HAS_CQE             BIT(0)
+#define BRCMSTB_PRIV_FLAGS_GATE_CLOCK          BIT(1)
 
 #define SDHCI_ARASAN_CQE_BASE_ADDR             0x200
 
 struct sdhci_brcmstb_priv {
        void __iomem *cfg_regs;
-       bool has_cqe;
+       unsigned int flags;
 };
 
 struct brcmstb_match_priv {
        void (*hs400es)(struct mmc_host *mmc, struct mmc_ios *ios);
        struct sdhci_ops *ops;
-       unsigned int flags;
+       const unsigned int flags;
 };
 
+static inline void enable_clock_gating(struct sdhci_host *host)
+{
+       u32 reg;
+
+       reg = sdhci_readl(host, SDHCI_VENDOR);
+       reg |= SDHCI_VENDOR_GATE_SDCLK_EN;
+       sdhci_writel(host, reg, SDHCI_VENDOR);
+}
+
+static void brcmstb_reset(struct sdhci_host *host, u8 mask)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host);
+
+       sdhci_reset(host, mask);
+
+       /* Reset will clear this, so re-enable it */
+       if (priv->flags & BRCMSTB_PRIV_FLAGS_GATE_CLOCK)
+               enable_clock_gating(host);
+}
+
 static void sdhci_brcmstb_hs400es(struct mmc_host *mmc, struct mmc_ios *ios)
 {
        struct sdhci_host *host = mmc_priv(mmc);
@@ -129,22 +155,23 @@ static struct sdhci_ops sdhci_brcmstb_ops = {
 static struct sdhci_ops sdhci_brcmstb_ops_7216 = {
        .set_clock = sdhci_brcmstb_set_clock,
        .set_bus_width = sdhci_set_bus_width,
-       .reset = sdhci_reset,
+       .reset = brcmstb_reset,
        .set_uhs_signaling = sdhci_brcmstb_set_uhs_signaling,
 };
 
 static struct brcmstb_match_priv match_priv_7425 = {
-       .flags = BRCMSTB_PRIV_FLAGS_NO_64BIT |
-       BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT,
+       .flags = BRCMSTB_MATCH_FLAGS_NO_64BIT |
+       BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT,
        .ops = &sdhci_brcmstb_ops,
 };
 
 static struct brcmstb_match_priv match_priv_7445 = {
-       .flags = BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT,
+       .flags = BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT,
        .ops = &sdhci_brcmstb_ops,
 };
 
 static const struct brcmstb_match_priv match_priv_7216 = {
+       .flags = BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE,
        .hs400es = sdhci_brcmstb_hs400es,
        .ops = &sdhci_brcmstb_ops_7216,
 };
@@ -176,7 +203,7 @@ static int sdhci_brcmstb_add_host(struct sdhci_host *host,
        bool dma64;
        int ret;
 
-       if (!priv->has_cqe)
+       if ((priv->flags & BRCMSTB_PRIV_FLAGS_HAS_CQE) == 0)
                return sdhci_add_host(host);
 
        dev_dbg(mmc_dev(host->mmc), "CQE is enabled\n");
@@ -225,7 +252,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
        struct sdhci_brcmstb_priv *priv;
        struct sdhci_host *host;
        struct resource *iomem;
-       bool has_cqe = false;
        struct clk *clk;
        int res;
 
@@ -244,10 +270,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
                return res;
 
        memset(&brcmstb_pdata, 0, sizeof(brcmstb_pdata));
-       if (device_property_read_bool(&pdev->dev, "supports-cqe")) {
-               has_cqe = true;
-               match_priv->ops->irq = sdhci_brcmstb_cqhci_irq;
-       }
        brcmstb_pdata.ops = match_priv->ops;
        host = sdhci_pltfm_init(pdev, &brcmstb_pdata,
                                sizeof(struct sdhci_brcmstb_priv));
@@ -258,7 +280,10 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
 
        pltfm_host = sdhci_priv(host);
        priv = sdhci_pltfm_priv(pltfm_host);
-       priv->has_cqe = has_cqe;
+       if (device_property_read_bool(&pdev->dev, "supports-cqe")) {
+               priv->flags |= BRCMSTB_PRIV_FLAGS_HAS_CQE;
+               match_priv->ops->irq = sdhci_brcmstb_cqhci_irq;
+       }
 
        /* Map in the non-standard CFG registers */
        iomem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
@@ -273,6 +298,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
        if (res)
                goto err;
 
+       /*
+        * Automatic clock gating does not work for SD cards that may
+        * voltage switch so only enable it for non-removable devices.
+        */
+       if ((match_priv->flags & BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE) &&
+           (host->mmc->caps & MMC_CAP_NONREMOVABLE))
+               priv->flags |= BRCMSTB_PRIV_FLAGS_GATE_CLOCK;
+
        /*
         * If the chip has enhanced strobe and it's enabled, add
         * callback
@@ -287,14 +320,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
         * properties through mmc_of_parse().
         */
        host->caps = sdhci_readl(host, SDHCI_CAPABILITIES);
-       if (match_priv->flags & BRCMSTB_PRIV_FLAGS_NO_64BIT)
+       if (match_priv->flags & BRCMSTB_MATCH_FLAGS_NO_64BIT)
                host->caps &= ~SDHCI_CAN_64BIT;
        host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1);
        host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 |
                         SDHCI_SUPPORT_DDR50);
        host->quirks |= SDHCI_QUIRK_MISSING_CAPS;
 
-       if (match_priv->flags & BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT)
+       if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT)
                host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
        res = sdhci_brcmstb_add_host(host, priv);
index ff9f5b63c337ec59698fdf1b89383a8c0cf639b3..e395411fb6fdc442946fab0026ce0a3ccba61407 100644 (file)
@@ -2435,8 +2435,33 @@ static const struct sdhci_msm_variant_info sdm845_sdhci_var = {
 };
 
 static const struct of_device_id sdhci_msm_dt_match[] = {
+        /* Following two entries are deprecated (kept only for backward compatibility) */
        {.compatible = "qcom,sdhci-msm-v4", .data = &sdhci_msm_mci_var},
        {.compatible = "qcom,sdhci-msm-v5", .data = &sdhci_msm_v5_var},
+       /* Add entries for sdcc versions less than 5.0 here */
+       {.compatible = "qcom,apq8084-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8226-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8916-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8953-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8974-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8992-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8994-sdhci", .data = &sdhci_msm_mci_var},
+       {.compatible = "qcom,msm8996-sdhci", .data = &sdhci_msm_mci_var},
+       /*
+        * Add entries for sdcc version 5.0 here. For SDCC version 5.0.0,
+        * MCI registers are removed from SDCC interface and some registers
+        * are moved to HC.
+        */
+       {.compatible = "qcom,qcs404-sdhci", .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sdx55-sdhci",  .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sdx65-sdhci",  .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sdm630-sdhci", .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sm6125-sdhci", .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sm6350-sdhci", .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sm8150-sdhci", .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sm8250-sdhci", .data = &sdhci_msm_v5_var},
+       {.compatible = "qcom,sc7280-sdhci", .data = &sdhci_msm_v5_var},
+       /* Add entries where soc specific handling is required, here */
        {.compatible = "qcom,sdm845-sdhci", .data = &sdm845_sdhci_var},
        {.compatible = "qcom,sc7180-sdhci", .data = &sdm845_sdhci_var},
        {},
index 6a2e5a468424672ee436853046470b76b72e6424..757801dfc30869d445914b8dc6f83e31a5c77dc0 100644 (file)
@@ -1577,6 +1577,9 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
        const struct sdhci_arasan_of_data *data;
 
        data = of_device_get_match_data(dev);
+       if (!data)
+               return -EINVAL;
+
        host = sdhci_pltfm_init(pdev, data->pdata, sizeof(*sdhci_arasan));
 
        if (IS_ERR(host))
index 64e27c2821f998c5ca2c9587b87e45055d3163c5..86e867ffbb10a0e2691684ace11d1912f1fb93f7 100644 (file)
@@ -1219,16 +1219,11 @@ static int sdhci_omap_probe(struct platform_device *pdev)
        struct sdhci_pltfm_host *pltfm_host;
        struct sdhci_omap_host *omap_host;
        struct mmc_host *mmc;
-       const struct of_device_id *match;
-       struct sdhci_omap_data *data;
+       const struct sdhci_omap_data *data;
        const struct soc_device_attribute *soc;
        struct resource *regs;
 
-       match = of_match_device(omap_sdhci_match, dev);
-       if (!match)
-               return -EINVAL;
-
-       data = (struct sdhci_omap_data *)match->data;
+       data = of_device_get_match_data(&pdev->dev);
        if (!data) {
                dev_err(dev, "no sdhci omap data\n");
                return -EINVAL;
index d09728c37d03e91c8d4704183f37610993235c07..1499a64ec3aa1bafad4646bd20e161d5e227de82 100644 (file)
 #define PCI_GLI_9755_MISC          0x78
 #define   PCI_GLI_9755_MISC_SSC_OFF    BIT(26)
 
+#define PCI_GLI_9755_PM_CTRL     0xFC
+#define   PCI_GLI_9755_PM_STATE    GENMASK(1, 0)
+
 #define GLI_MAX_TUNING_LOOP 40
 
 /* Genesys Logic chipset */
@@ -676,6 +679,13 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot)
                            GLI_9755_CFG2_L1DLY_VALUE);
        pci_write_config_dword(pdev, PCI_GLI_9755_CFG2, value);
 
+       /* toggle PM state to allow GL9755 to enter ASPM L1.2 */
+       pci_read_config_dword(pdev, PCI_GLI_9755_PM_CTRL, &value);
+       value |= PCI_GLI_9755_PM_STATE;
+       pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value);
+       value &= ~PCI_GLI_9755_PM_STATE;
+       pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value);
+
        gl9755_wt_off(pdev);
 }
 
index 07c6da1f2f0fe7b38414ab0afa273c3f912ec20e..22152029e14c5b78fe12a8199a48e31e6907a0d8 100644 (file)
@@ -2999,7 +2999,7 @@ static const struct mmc_host_ops sdhci_ops = {
        .set_ios        = sdhci_set_ios,
        .get_cd         = sdhci_get_cd,
        .get_ro         = sdhci_get_ro,
-       .hw_reset       = sdhci_hw_reset,
+       .card_hw_reset  = sdhci_hw_reset,
        .enable_sdio_irq = sdhci_enable_sdio_irq,
        .ack_sdio_irq    = sdhci_ack_sdio_irq,
        .start_signal_voltage_switch    = sdhci_start_signal_voltage_switch,
index e54fe24d47e734ff2f337ffe7ab9a5cb37e71640..e7ced1496a0731afba7edeaa7f4508aba50c4877 100644 (file)
@@ -147,6 +147,9 @@ struct sdhci_am654_data {
        int drv_strength;
        int strb_sel;
        u32 flags;
+       u32 quirks;
+
+#define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0)
 };
 
 struct sdhci_am654_driver_data {
@@ -369,6 +372,21 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
        }
 }
 
+static void sdhci_am654_reset(struct sdhci_host *host, u8 mask)
+{
+       u8 ctrl;
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
+
+       sdhci_reset(host, mask);
+
+       if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) {
+               ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+               ctrl |= SDHCI_CTRL_CDTEST_INS | SDHCI_CTRL_CDTEST_EN;
+               sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+       }
+}
+
 static int sdhci_am654_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
        struct sdhci_host *host = mmc_priv(mmc);
@@ -500,7 +518,7 @@ static struct sdhci_ops sdhci_j721e_4bit_ops = {
        .set_clock = sdhci_j721e_4bit_set_clock,
        .write_b = sdhci_am654_write_b,
        .irq = sdhci_am654_cqhci_irq,
-       .reset = sdhci_reset,
+       .reset = sdhci_am654_reset,
 };
 
 static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = {
@@ -719,6 +737,9 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev,
        device_property_read_u32(dev, "ti,clkbuf-sel",
                                 &sdhci_am654->clkbuf_sel);
 
+       if (device_property_read_bool(dev, "ti,fails-without-test-cd"))
+               sdhci_am654->quirks |= SDHCI_AM654_QUIRK_FORCE_CDTEST;
+
        sdhci_get_of_property(pdev);
 
        return 0;
index 5f9ebf045b1cb22b59c142105e2d04600deb8160..0fd4c9d644dd5149357cf40c3d110d1b0e417fae 100644 (file)
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
-#include <linux/mmc/sh_mmcif.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/mod_devicetable.h>
 #include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/pagemap.h>
+#include <linux/platform_data/sh_mmcif.h>
 #include <linux/platform_device.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
index 46f9e2923d869807f48d31ed56a0967175f91e2c..b16e12e62e72223ca75a2ac8fa19ce1e0613280d 100644 (file)
@@ -1116,7 +1116,7 @@ static const struct mmc_host_ops sunxi_mmc_ops = {
        .get_cd          = mmc_gpio_get_cd,
        .enable_sdio_irq = sunxi_mmc_enable_sdio_irq,
        .start_signal_voltage_switch = sunxi_mmc_volt_switch,
-       .hw_reset        = sunxi_mmc_hw_reset,
+       .card_hw_reset   = sunxi_mmc_hw_reset,
        .card_busy       = sunxi_mmc_card_busy,
 };
 
index ccbf9885a52bec98f3cc6f5615b7b1b43220888f..3a8defdcca77226ab80307d5058a0169a573997e 100644 (file)
@@ -597,7 +597,7 @@ static int uniphier_sd_probe(struct platform_device *pdev)
                        ret = PTR_ERR(priv->rst_hw);
                        goto free_host;
                }
-               host->ops.hw_reset = uniphier_sd_hw_reset;
+               host->ops.card_hw_reset = uniphier_sd_hw_reset;
        }
 
        if (host->mmc->caps & MMC_CAP_UHS) {
index a761134fd3bea03432d4e846f01a196a13cbf8b6..67453f59c69cbb4902576ff5ecc9c9655a688b3b 100644 (file)
@@ -48,6 +48,7 @@
 #define SST49LF040B            0x0050
 #define SST49LF008A            0x005a
 #define AT49BV6416             0x00d6
+#define S29GL064N_MN12         0x0c01
 
 /*
  * Status Register bit description. Used by flash devices that don't
 #define CFI_SR_WBASB           BIT(3)
 #define CFI_SR_SLSB            BIT(1)
 
+enum cfi_quirks {
+       CFI_QUIRK_DQ_TRUE_DATA = BIT(0),
+};
+
 static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
 static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
 #if !FORCE_WORD_WRITE
@@ -436,6 +441,15 @@ static void fixup_s29ns512p_sectors(struct mtd_info *mtd)
                mtd->name);
 }
 
+static void fixup_quirks(struct mtd_info *mtd)
+{
+       struct map_info *map = mtd->priv;
+       struct cfi_private *cfi = map->fldrv_priv;
+
+       if (cfi->mfr == CFI_MFR_AMD && cfi->id == S29GL064N_MN12)
+               cfi->quirks |= CFI_QUIRK_DQ_TRUE_DATA;
+}
+
 /* Used to fix CFI-Tables of chips without Extended Query Tables */
 static struct cfi_fixup cfi_nopri_fixup_table[] = {
        { CFI_MFR_SST, 0x234a, fixup_sst39vf }, /* SST39VF1602 */
@@ -462,7 +476,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
        { CFI_MFR_AMD, 0x0056, fixup_use_secsi },
        { CFI_MFR_AMD, 0x005C, fixup_use_secsi },
        { CFI_MFR_AMD, 0x005F, fixup_use_secsi },
-       { CFI_MFR_AMD, 0x0c01, fixup_s29gl064n_sectors },
+       { CFI_MFR_AMD, S29GL064N_MN12, fixup_s29gl064n_sectors },
        { CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors },
        { CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors },
        { CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors },
@@ -474,6 +488,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
 #if !FORCE_WORD_WRITE
        { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers },
 #endif
+       { CFI_MFR_ANY, CFI_ID_ANY, fixup_quirks },
        { 0, 0, NULL }
 };
 static struct cfi_fixup jedec_fixup_table[] = {
@@ -801,47 +816,11 @@ static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd)
        return NULL;
 }
 
-/*
- * Return true if the chip is ready.
- *
- * Ready is one of: read mode, query mode, erase-suspend-read mode (in any
- * non-suspended sector) and is indicated by no toggle bits toggling.
- *
- * Note that anything more complicated than checking if no bits are toggling
- * (including checking DQ5 for an error status) is tricky to get working
- * correctly and is therefore not done (particularly with interleaved chips
- * as each chip must be checked independently of the others).
- */
-static int __xipram chip_ready(struct map_info *map, struct flchip *chip,
-                              unsigned long addr)
-{
-       struct cfi_private *cfi = map->fldrv_priv;
-       map_word d, t;
-
-       if (cfi_use_status_reg(cfi)) {
-               map_word ready = CMD(CFI_SR_DRB);
-               /*
-                * For chips that support status register, check device
-                * ready bit
-                */
-               cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi,
-                                cfi->device_type, NULL);
-               d = map_read(map, addr);
-
-               return map_word_andequal(map, d, ready, ready);
-       }
-
-       d = map_read(map, addr);
-       t = map_read(map, addr);
-
-       return map_word_equal(map, d, t);
-}
-
 /*
  * Return true if the chip is ready and has the correct value.
  *
  * Ready is one of: read mode, query mode, erase-suspend-read mode (in any
- * non-suspended sector) and it is indicated by no bits toggling.
+ * non-suspended sector) and is indicated by no toggle bits toggling.
  *
  * Error are indicated by toggling bits or bits held with the wrong value,
  * or with bits toggling.
@@ -850,17 +829,16 @@ static int __xipram chip_ready(struct map_info *map, struct flchip *chip,
  * (including checking DQ5 for an error status) is tricky to get working
  * correctly and is therefore not done (particularly with interleaved chips
  * as each chip must be checked independently of the others).
- *
  */
-static int __xipram chip_good(struct map_info *map, struct flchip *chip,
-                             unsigned long addr, map_word expected)
+static int __xipram chip_ready(struct map_info *map, struct flchip *chip,
+                              unsigned long addr, map_word *expected)
 {
        struct cfi_private *cfi = map->fldrv_priv;
        map_word oldd, curd;
+       int ret;
 
        if (cfi_use_status_reg(cfi)) {
                map_word ready = CMD(CFI_SR_DRB);
-
                /*
                 * For chips that support status register, check device
                 * ready bit
@@ -875,8 +853,24 @@ static int __xipram chip_good(struct map_info *map, struct flchip *chip,
        oldd = map_read(map, addr);
        curd = map_read(map, addr);
 
-       return  map_word_equal(map, oldd, curd) &&
-               map_word_equal(map, curd, expected);
+       ret = map_word_equal(map, oldd, curd);
+
+       if (!ret || !expected)
+               return ret;
+
+       return map_word_equal(map, curd, *expected);
+}
+
+static int __xipram chip_good(struct map_info *map, struct flchip *chip,
+                             unsigned long addr, map_word *expected)
+{
+       struct cfi_private *cfi = map->fldrv_priv;
+       map_word *datum = expected;
+
+       if (cfi->quirks & CFI_QUIRK_DQ_TRUE_DATA)
+               datum = NULL;
+
+       return chip_ready(map, chip, addr, datum);
 }
 
 static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
@@ -893,7 +887,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 
        case FL_STATUS:
                for (;;) {
-                       if (chip_ready(map, chip, adr))
+                       if (chip_ready(map, chip, adr, NULL))
                                break;
 
                        if (time_after(jiffies, timeo)) {
@@ -932,7 +926,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                chip->state = FL_ERASE_SUSPENDING;
                chip->erase_suspended = 1;
                for (;;) {
-                       if (chip_ready(map, chip, adr))
+                       if (chip_ready(map, chip, adr, NULL))
                                break;
 
                        if (time_after(jiffies, timeo)) {
@@ -1463,7 +1457,7 @@ static int do_otp_lock(struct map_info *map, struct flchip *chip, loff_t adr,
        /* wait for chip to become ready */
        timeo = jiffies + msecs_to_jiffies(2);
        for (;;) {
-               if (chip_ready(map, chip, adr))
+               if (chip_ready(map, chip, adr, NULL))
                        break;
 
                if (time_after(jiffies, timeo)) {
@@ -1699,7 +1693,7 @@ static int __xipram do_write_oneword_once(struct map_info *map,
                 * "chip_good" to avoid the failure due to scheduling.
                 */
                if (time_after(jiffies, timeo) &&
-                   !chip_good(map, chip, adr, datum)) {
+                   !chip_good(map, chip, adr, &datum)) {
                        xip_enable(map, chip, adr);
                        printk(KERN_WARNING "MTD %s(): software timeout\n", __func__);
                        xip_disable(map, chip, adr);
@@ -1707,7 +1701,7 @@ static int __xipram do_write_oneword_once(struct map_info *map,
                        break;
                }
 
-               if (chip_good(map, chip, adr, datum)) {
+               if (chip_good(map, chip, adr, &datum)) {
                        if (cfi_check_err_status(map, chip, adr))
                                ret = -EIO;
                        break;
@@ -1979,14 +1973,14 @@ static int __xipram do_write_buffer_wait(struct map_info *map,
                 * "chip_good" to avoid the failure due to scheduling.
                 */
                if (time_after(jiffies, timeo) &&
-                   !chip_good(map, chip, adr, datum)) {
+                   !chip_good(map, chip, adr, &datum)) {
                        pr_err("MTD %s(): software timeout, address:0x%.8lx.\n",
                               __func__, adr);
                        ret = -EIO;
                        break;
                }
 
-               if (chip_good(map, chip, adr, datum)) {
+               if (chip_good(map, chip, adr, &datum)) {
                        if (cfi_check_err_status(map, chip, adr))
                                ret = -EIO;
                        break;
@@ -2195,7 +2189,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
         * If the driver thinks the chip is idle, and no toggle bits
         * are changing, then the chip is actually idle for sure.
         */
-       if (chip->state == FL_READY && chip_ready(map, chip, adr))
+       if (chip->state == FL_READY && chip_ready(map, chip, adr, NULL))
                return 0;
 
        /*
@@ -2212,7 +2206,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
 
                /* wait for the chip to become ready */
                for (i = 0; i < jiffies_to_usecs(timeo); i++) {
-                       if (chip_ready(map, chip, adr))
+                       if (chip_ready(map, chip, adr, NULL))
                                return 0;
 
                        udelay(1);
@@ -2276,13 +2270,13 @@ retry:
        map_write(map, datum, adr);
 
        for (i = 0; i < jiffies_to_usecs(uWriteTimeout); i++) {
-               if (chip_ready(map, chip, adr))
+               if (chip_ready(map, chip, adr, NULL))
                        break;
 
                udelay(1);
        }
 
-       if (!chip_good(map, chip, adr, datum) ||
+       if (!chip_ready(map, chip, adr, &datum) ||
            cfi_check_err_status(map, chip, adr)) {
                /* reset on all failures. */
                map_write(map, CMD(0xF0), chip->start);
@@ -2424,6 +2418,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
        DECLARE_WAITQUEUE(wait, current);
        int ret;
        int retry_cnt = 0;
+       map_word datum = map_word_ff(map);
 
        adr = cfi->addr_unlock1;
 
@@ -2478,7 +2473,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
                        chip->erase_suspended = 0;
                }
 
-               if (chip_good(map, chip, adr, map_word_ff(map))) {
+               if (chip_ready(map, chip, adr, &datum)) {
                        if (cfi_check_err_status(map, chip, adr))
                                ret = -EIO;
                        break;
@@ -2523,6 +2518,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
        DECLARE_WAITQUEUE(wait, current);
        int ret;
        int retry_cnt = 0;
+       map_word datum = map_word_ff(map);
 
        adr += chip->start;
 
@@ -2577,7 +2573,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
                        chip->erase_suspended = 0;
                }
 
-               if (chip_good(map, chip, adr, map_word_ff(map))) {
+               if (chip_ready(map, chip, adr, &datum)) {
                        if (cfi_check_err_status(map, chip, adr))
                                ret = -EIO;
                        break;
@@ -2771,7 +2767,7 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map,
         */
        timeo = jiffies + msecs_to_jiffies(2000);       /* 2s max (un)locking */
        for (;;) {
-               if (chip_ready(map, chip, adr))
+               if (chip_ready(map, chip, adr, NULL))
                        break;
 
                if (time_after(jiffies, timeo)) {
index d503821a3e60613911f409010be56eff1eacf0ae..208bd4d871f42cc3e6adf0beaf4769d50b1736ac 100644 (file)
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
 #include <asm/div64.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
 
 struct phram_mtd_list {
        struct mtd_info mtd;
        struct list_head list;
+       bool cached;
 };
 
 static LIST_HEAD(phram_list);
@@ -77,20 +81,51 @@ static int phram_write(struct mtd_info *mtd, loff_t to, size_t len,
        return 0;
 }
 
+static int phram_map(struct phram_mtd_list *phram, phys_addr_t start, size_t len)
+{
+       void *addr = NULL;
+
+       if (phram->cached)
+               addr = memremap(start, len, MEMREMAP_WB);
+       else
+               addr = (void __force *)ioremap(start, len);
+       if (!addr)
+               return -EIO;
+
+       phram->mtd.priv = addr;
+
+       return 0;
+}
+
+static void phram_unmap(struct phram_mtd_list *phram)
+{
+       void *addr = phram->mtd.priv;
+
+       if (phram->cached) {
+               memunmap(addr);
+               return;
+       }
+
+       iounmap((void __iomem *)addr);
+}
+
 static void unregister_devices(void)
 {
        struct phram_mtd_list *this, *safe;
 
        list_for_each_entry_safe(this, safe, &phram_list, list) {
                mtd_device_unregister(&this->mtd);
-               iounmap(this->mtd.priv);
+               phram_unmap(this);
                kfree(this->mtd.name);
                kfree(this);
        }
 }
 
-static int register_device(char *name, phys_addr_t start, size_t len, uint32_t erasesize)
+static int register_device(struct platform_device *pdev, const char *name,
+                          phys_addr_t start, size_t len, uint32_t erasesize)
 {
+       struct device_node *np = pdev ? pdev->dev.of_node : NULL;
+       bool cached = np ? !of_property_read_bool(np, "no-map") : false;
        struct phram_mtd_list *new;
        int ret = -ENOMEM;
 
@@ -98,9 +133,10 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e
        if (!new)
                goto out0;
 
-       ret = -EIO;
-       new->mtd.priv = ioremap(start, len);
-       if (!new->mtd.priv) {
+       new->cached = cached;
+
+       ret = phram_map(new, start, len);
+       if (ret) {
                pr_err("ioremap failed\n");
                goto out1;
        }
@@ -119,17 +155,23 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e
        new->mtd.erasesize = erasesize;
        new->mtd.writesize = 1;
 
+       mtd_set_of_node(&new->mtd, np);
+
        ret = -EAGAIN;
        if (mtd_device_register(&new->mtd, NULL, 0)) {
                pr_err("Failed to register new device\n");
                goto out2;
        }
 
-       list_add_tail(&new->list, &phram_list);
+       if (pdev)
+               platform_set_drvdata(pdev, new);
+       else
+               list_add_tail(&new->list, &phram_list);
+
        return 0;
 
 out2:
-       iounmap(new->mtd.priv);
+       phram_unmap(new);
 out1:
        kfree(new);
 out0:
@@ -278,7 +320,7 @@ static int phram_setup(const char *val)
                goto error;
        }
 
-       ret = register_device(name, start, len, (uint32_t)erasesize);
+       ret = register_device(NULL, name, start, len, (uint32_t)erasesize);
        if (ret)
                goto error;
 
@@ -325,10 +367,54 @@ static int phram_param_call(const char *val, const struct kernel_param *kp)
 module_param_call(phram, phram_param_call, NULL, NULL, 0200);
 MODULE_PARM_DESC(phram, "Memory region to map. \"phram=<name>,<start>,<length>[,<erasesize>]\"");
 
+#ifdef CONFIG_OF
+static const struct of_device_id phram_of_match[] = {
+       { .compatible = "phram" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, phram_of_match);
+#endif
+
+static int phram_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENOMEM;
+
+       /* mtd_set_of_node() reads name from "label" */
+       return register_device(pdev, NULL, res->start, resource_size(res),
+                              PAGE_SIZE);
+}
+
+static int phram_remove(struct platform_device *pdev)
+{
+       struct phram_mtd_list *phram = platform_get_drvdata(pdev);
+
+       mtd_device_unregister(&phram->mtd);
+       phram_unmap(phram);
+       kfree(phram);
+
+       return 0;
+}
+
+static struct platform_driver phram_driver = {
+       .probe          = phram_probe,
+       .remove         = phram_remove,
+       .driver         = {
+               .name           = "phram",
+               .of_match_table = of_match_ptr(phram_of_match),
+       },
+};
 
 static int __init init_phram(void)
 {
-       int ret = 0;
+       int ret;
+
+       ret = platform_driver_register(&phram_driver);
+       if (ret)
+               return ret;
 
 #ifndef MODULE
        if (phram_paramline[0])
@@ -336,12 +422,16 @@ static int __init init_phram(void)
        phram_init_called = 1;
 #endif
 
+       if (ret)
+               platform_driver_unregister(&phram_driver);
+
        return ret;
 }
 
 static void __exit cleanup_phram(void)
 {
        unregister_devices();
+       platform_driver_unregister(&phram_driver);
 }
 
 module_init(init_phram);
index 983999c020d665208d98f9ea8c04d6a839f8f415..d3377b10fc0f6fded3d5ab5003d39770c0dc8759 100644 (file)
@@ -2126,6 +2126,8 @@ static int stfsm_remove(struct platform_device *pdev)
 {
        struct stfsm *fsm = platform_get_drvdata(pdev);
 
+       clk_disable_unprepare(fsm->clk);
+
        return mtd_device_unregister(&fsm->mtd);
 }
 
index 6a099bbcd8bec2c29a062310ac4e07ab6fb281ac..e098ae937ce88a4deb7e1f3fee659146f4661358 100644 (file)
@@ -300,15 +300,6 @@ config MTD_DC21285
          21285 bridge used with Intel's StrongARM processors. More info at
          <https://www.intel.com/design/bridge/docs/21285_documentation.htm>.
 
-config MTD_IXP4XX
-       tristate "CFI Flash device mapped on Intel IXP4xx based systems"
-       depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX && MTD_CFI_ADV_OPTIONS
-       help
-         This enables MTD access to flash devices on platforms based
-         on Intel's IXP4xx family of network processors such as the
-         IXDP425 and Coyote. If you have an IXP4xx based board and
-         would like to use the flash chips on it, say 'Y'.
-
 config MTD_IMPA7
        tristate "JEDEC Flash device mapped on impA7"
        depends on ARM && MTD_JEDECPROBE
index 2240b100f66a4917e45cd2e7c0ab7276421baf7e..094cfb244086516ba2ed6bb557191e4ca73b6976 100644 (file)
@@ -39,7 +39,6 @@ obj-$(CONFIG_MTD_IMPA7)               += impa7.o
 obj-$(CONFIG_MTD_UCLINUX)      += uclinux.o
 obj-$(CONFIG_MTD_NETtel)       += nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)   += scb2_flash.o
-obj-$(CONFIG_MTD_IXP4XX)       += ixp4xx.o
 obj-$(CONFIG_MTD_PLATRAM)      += plat-ram.o
 obj-$(CONFIG_MTD_INTEL_VR_NOR) += intel_vr_nor.o
 obj-$(CONFIG_MTD_VMU)          += vmu-flash.o
diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
deleted file mode 100644 (file)
index d854320..0000000
+++ /dev/null
@@ -1,262 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * drivers/mtd/maps/ixp4xx.c
- *
- * MTD Map file for IXP4XX based systems. Please do not make per-board
- * changes in here. If your board needs special setup, do it in your
- * platform level code in arch/arm/mach-ixp4xx/board-setup.c
- *
- * Original Author: Intel Corporation
- * Maintainer: Deepak Saxena <dsaxena@mvista.com>
- *
- * Copyright (C) 2002 Intel Corporation
- * Copyright (C) 2003-2004 MontaVista Software, Inc.
- *
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/io.h>
-#include <asm/mach/flash.h>
-
-#include <linux/reboot.h>
-
-/*
- * Read/write a 16 bit word from flash address 'addr'.
- *
- * When the cpu is in little-endian mode it swizzles the address lines
- * ('address coherency') so we need to undo the swizzling to ensure commands
- * and the like end up on the correct flash address.
- *
- * To further complicate matters, due to the way the expansion bus controller
- * handles 32 bit reads, the byte stream ABCD is stored on the flash as:
- *     D15    D0
- *     +---+---+
- *     | A | B | 0
- *     +---+---+
- *     | C | D | 2
- *     +---+---+
- * This means that on LE systems each 16 bit word must be swapped. Note that
- * this requires CONFIG_MTD_CFI_BE_BYTE_SWAP to be enabled to 'unswap' the CFI
- * data and other flash commands which are always in D7-D0.
- */
-#ifndef __ARMEB__
-#ifndef CONFIG_MTD_CFI_BE_BYTE_SWAP
-#  error CONFIG_MTD_CFI_BE_BYTE_SWAP required
-#endif
-
-static inline u16 flash_read16(void __iomem *addr)
-{
-       return be16_to_cpu(__raw_readw((void __iomem *)((unsigned long)addr ^ 0x2)));
-}
-
-static inline void flash_write16(u16 d, void __iomem *addr)
-{
-       __raw_writew(cpu_to_be16(d), (void __iomem *)((unsigned long)addr ^ 0x2));
-}
-
-#define        BYTE0(h)        ((h) & 0xFF)
-#define        BYTE1(h)        (((h) >> 8) & 0xFF)
-
-#else
-
-static inline u16 flash_read16(const void __iomem *addr)
-{
-       return __raw_readw(addr);
-}
-
-static inline void flash_write16(u16 d, void __iomem *addr)
-{
-       __raw_writew(d, addr);
-}
-
-#define        BYTE0(h)        (((h) >> 8) & 0xFF)
-#define        BYTE1(h)        ((h) & 0xFF)
-#endif
-
-static map_word ixp4xx_read16(struct map_info *map, unsigned long ofs)
-{
-       map_word val;
-       val.x[0] = flash_read16(map->virt + ofs);
-       return val;
-}
-
-/*
- * The IXP4xx expansion bus only allows 16-bit wide acceses
- * when attached to a 16-bit wide device (such as the 28F128J3A),
- * so we can't just memcpy_fromio().
- */
-static void ixp4xx_copy_from(struct map_info *map, void *to,
-                            unsigned long from, ssize_t len)
-{
-       u8 *dest = (u8 *) to;
-       void __iomem *src = map->virt + from;
-
-       if (len <= 0)
-               return;
-
-       if (from & 1) {
-               *dest++ = BYTE1(flash_read16(src-1));
-               src++;
-               --len;
-       }
-
-       while (len >= 2) {
-               u16 data = flash_read16(src);
-               *dest++ = BYTE0(data);
-               *dest++ = BYTE1(data);
-               src += 2;
-               len -= 2;
-       }
-
-       if (len > 0)
-               *dest++ = BYTE0(flash_read16(src));
-}
-
-/*
- * Unaligned writes are ignored, causing the 8-bit
- * probe to fail and proceed to the 16-bit probe (which succeeds).
- */
-static void ixp4xx_probe_write16(struct map_info *map, map_word d, unsigned long adr)
-{
-       if (!(adr & 1))
-               flash_write16(d.x[0], map->virt + adr);
-}
-
-/*
- * Fast write16 function without the probing check above
- */
-static void ixp4xx_write16(struct map_info *map, map_word d, unsigned long adr)
-{
-       flash_write16(d.x[0], map->virt + adr);
-}
-
-struct ixp4xx_flash_info {
-       struct mtd_info *mtd;
-       struct map_info map;
-       struct resource *res;
-};
-
-static const char * const probes[] = { "RedBoot", "cmdlinepart", NULL };
-
-static int ixp4xx_flash_remove(struct platform_device *dev)
-{
-       struct flash_platform_data *plat = dev_get_platdata(&dev->dev);
-       struct ixp4xx_flash_info *info = platform_get_drvdata(dev);
-
-       if(!info)
-               return 0;
-
-       if (info->mtd) {
-               mtd_device_unregister(info->mtd);
-               map_destroy(info->mtd);
-       }
-
-       if (plat->exit)
-               plat->exit();
-
-       return 0;
-}
-
-static int ixp4xx_flash_probe(struct platform_device *dev)
-{
-       struct flash_platform_data *plat = dev_get_platdata(&dev->dev);
-       struct ixp4xx_flash_info *info;
-       struct mtd_part_parser_data ppdata = {
-               .origin = dev->resource->start,
-       };
-       int err = -1;
-
-       if (!plat)
-               return -ENODEV;
-
-       if (plat->init) {
-               err = plat->init();
-               if (err)
-                       return err;
-       }
-
-       info = devm_kzalloc(&dev->dev, sizeof(struct ixp4xx_flash_info),
-                           GFP_KERNEL);
-       if(!info) {
-               err = -ENOMEM;
-               goto Error;
-       }
-
-       platform_set_drvdata(dev, info);
-
-       /*
-        * Tell the MTD layer we're not 1:1 mapped so that it does
-        * not attempt to do a direct access on us.
-        */
-       info->map.phys = NO_XIP;
-       info->map.size = resource_size(dev->resource);
-
-       /*
-        * We only support 16-bit accesses for now. If and when
-        * any board use 8-bit access, we'll fixup the driver to
-        * handle that.
-        */
-       info->map.bankwidth = 2;
-       info->map.name = dev_name(&dev->dev);
-       info->map.read = ixp4xx_read16;
-       info->map.write = ixp4xx_probe_write16;
-       info->map.copy_from = ixp4xx_copy_from;
-
-       info->map.virt = devm_ioremap_resource(&dev->dev, dev->resource);
-       if (IS_ERR(info->map.virt)) {
-               err = PTR_ERR(info->map.virt);
-               goto Error;
-       }
-
-       info->mtd = do_map_probe(plat->map_name, &info->map);
-       if (!info->mtd) {
-               printk(KERN_ERR "IXP4XXFlash: map_probe failed\n");
-               err = -ENXIO;
-               goto Error;
-       }
-       info->mtd->dev.parent = &dev->dev;
-
-       /* Use the fast version */
-       info->map.write = ixp4xx_write16;
-
-       err = mtd_device_parse_register(info->mtd, probes, &ppdata,
-                       plat->parts, plat->nr_parts);
-       if (err) {
-               printk(KERN_ERR "Could not parse partitions\n");
-               goto Error;
-       }
-
-       return 0;
-
-Error:
-       ixp4xx_flash_remove(dev);
-       return err;
-}
-
-static struct platform_driver ixp4xx_flash_driver = {
-       .probe          = ixp4xx_flash_probe,
-       .remove         = ixp4xx_flash_remove,
-       .driver         = {
-               .name   = "IXP4XX-Flash",
-       },
-};
-
-module_platform_driver(ixp4xx_flash_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MTD map driver for Intel IXP4xx systems");
-MODULE_AUTHOR("Deepak Saxena");
-MODULE_ALIAS("platform:IXP4XX-Flash");
index 64d2b093f114b6efdca91fc45ab9d19b7873fc7b..f73172111465501eb043fdca37e612f7655156f1 100644 (file)
@@ -377,7 +377,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
        if (tr->discard) {
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
                blk_queue_max_discard_sectors(new->rq, UINT_MAX);
                new->rq->limits.discard_granularity = tr->blksize;
        }
index 03e3de3a5d79e1d5420db77f287a23985f488bb9..1e94e7d10b8be64172d222b3136948133391d1d3 100644 (file)
@@ -257,6 +257,10 @@ static int mtdblock_open(struct mtd_blktrans_dev *mbd)
                return 0;
        }
 
+       if (mtd_type_is_nand(mbd->mtd))
+               pr_warn("%s: MTD device '%s' is NAND, please consider using UBI block devices instead.\n",
+                       mbd->tr->name, mbd->mtd->name);
+
        /* OK, it's not open. Create cache info for it */
        mtdblk->count = 1;
        mutex_init(&mtdblk->cache_mutex);
@@ -322,10 +326,6 @@ static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
        if (!(mtd->flags & MTD_WRITEABLE))
                dev->mbd.readonly = 1;
 
-       if (mtd_type_is_nand(mtd))
-               pr_warn("%s: MTD device '%s' is NAND, please consider using UBI block devices instead.\n",
-                       tr->name, mtd->name);
-
        if (add_mtd_blktrans_dev(&dev->mbd))
                kfree(dev);
 }
index 7731796024e00a71e2614b5bd50511cfb3f53ee6..9eb0680db312f6cd4db83b2e837371ed7dde6dd4 100644 (file)
@@ -557,9 +557,10 @@ static int mtd_nvmem_add(struct mtd_info *mtd)
 
 int add_mtd_device(struct mtd_info *mtd)
 {
+       struct device_node *np = mtd_get_of_node(mtd);
        struct mtd_info *master = mtd_get_master(mtd);
        struct mtd_notifier *not;
-       int i, error;
+       int i, error, ofidx;
 
        /*
         * May occur, for instance, on buggy drivers which call
@@ -598,7 +599,13 @@ int add_mtd_device(struct mtd_info *mtd)
 
        mutex_lock(&mtd_table_mutex);
 
-       i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL);
+       ofidx = -1;
+       if (np)
+               ofidx = of_alias_get_id(np, "mtd");
+       if (ofidx >= 0)
+               i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL);
+       else
+               i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL);
        if (i < 0) {
                error = i;
                goto fail_locked;
index 227df24387df5e2951bab272bc693c5dff696bc2..3d4a2ffb5b01f41897263e70e25793f11be42fd9 100644 (file)
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/timekeeping.h>
 #include <linux/mtd/mtd.h>
 #include <linux/kmsg_dump.h>
 
 /* Maximum MTD partition size */
 #define MTDOOPS_MAX_MTD_SIZE (8 * 1024 * 1024)
 
-#define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
-#define MTDOOPS_HEADER_SIZE   8
-
 static unsigned long record_size = 4096;
 module_param(record_size, ulong, 0400);
 MODULE_PARM_DESC(record_size,
@@ -40,6 +38,15 @@ module_param(dump_oops, int, 0600);
 MODULE_PARM_DESC(dump_oops,
                "set to 1 to dump oopses, 0 to only dump panics (default 1)");
 
+#define MTDOOPS_KERNMSG_MAGIC_v1 0x5d005d00  /* Original */
+#define MTDOOPS_KERNMSG_MAGIC_v2 0x5d005e00  /* Adds the timestamp */
+
+struct mtdoops_hdr {
+       u32 seq;
+       u32 magic;
+       ktime_t timestamp;
+} __packed;
+
 static struct mtdoops_context {
        struct kmsg_dumper dump;
 
@@ -178,16 +185,17 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 {
        struct mtd_info *mtd = cxt->mtd;
        size_t retlen;
-       u32 *hdr;
+       struct mtdoops_hdr *hdr;
        int ret;
 
        if (test_and_set_bit(0, &cxt->oops_buf_busy))
                return;
 
        /* Add mtdoops header to the buffer */
-       hdr = cxt->oops_buf;
-       hdr[0] = cxt->nextcount;
-       hdr[1] = MTDOOPS_KERNMSG_MAGIC;
+       hdr = (struct mtdoops_hdr *)cxt->oops_buf;
+       hdr->seq = cxt->nextcount;
+       hdr->magic = MTDOOPS_KERNMSG_MAGIC_v2;
+       hdr->timestamp = ktime_get_real();
 
        if (panic) {
                ret = mtd_panic_write(mtd, cxt->nextpage * record_size,
@@ -222,8 +230,9 @@ static void mtdoops_workfunc_write(struct work_struct *work)
 static void find_next_position(struct mtdoops_context *cxt)
 {
        struct mtd_info *mtd = cxt->mtd;
+       struct mtdoops_hdr hdr;
        int ret, page, maxpos = 0;
-       u32 count[2], maxcount = 0xffffffff;
+       u32 maxcount = 0xffffffff;
        size_t retlen;
 
        for (page = 0; page < cxt->oops_pages; page++) {
@@ -231,32 +240,33 @@ static void find_next_position(struct mtdoops_context *cxt)
                        continue;
                /* Assume the page is used */
                mark_page_used(cxt, page);
-               ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE,
-                              &retlen, (u_char *)&count[0]);
-               if (retlen != MTDOOPS_HEADER_SIZE ||
+               ret = mtd_read(mtd, page * record_size, sizeof(hdr),
+                              &retlen, (u_char *)&hdr);
+               if (retlen != sizeof(hdr) ||
                                (ret < 0 && !mtd_is_bitflip(ret))) {
-                       printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n",
-                              page * record_size, retlen,
-                              MTDOOPS_HEADER_SIZE, ret);
+                       printk(KERN_ERR "mtdoops: read failure at %ld (%zu of %zu read), err %d\n",
+                              page * record_size, retlen, sizeof(hdr), ret);
                        continue;
                }
 
-               if (count[0] == 0xffffffff && count[1] == 0xffffffff)
+               if (hdr.seq == 0xffffffff && hdr.magic == 0xffffffff)
                        mark_page_unused(cxt, page);
-               if (count[0] == 0xffffffff || count[1] != MTDOOPS_KERNMSG_MAGIC)
+               if (hdr.seq == 0xffffffff ||
+                   (hdr.magic != MTDOOPS_KERNMSG_MAGIC_v1 &&
+                    hdr.magic != MTDOOPS_KERNMSG_MAGIC_v2))
                        continue;
                if (maxcount == 0xffffffff) {
-                       maxcount = count[0];
+                       maxcount = hdr.seq;
                        maxpos = page;
-               } else if (count[0] < 0x40000000 && maxcount > 0xc0000000) {
-                       maxcount = count[0];
+               } else if (hdr.seq < 0x40000000 && maxcount > 0xc0000000) {
+                       maxcount = hdr.seq;
                        maxpos = page;
-               } else if (count[0] > maxcount && count[0] < 0xc0000000) {
-                       maxcount = count[0];
+               } else if (hdr.seq > maxcount && hdr.seq < 0xc0000000) {
+                       maxcount = hdr.seq;
                        maxpos = page;
-               } else if (count[0] > maxcount && count[0] > 0xc0000000
+               } else if (hdr.seq > maxcount && hdr.seq > 0xc0000000
                                        && maxcount > 0x80000000) {
-                       maxcount = count[0];
+                       maxcount = hdr.seq;
                        maxpos = page;
                }
        }
@@ -287,8 +297,9 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 
        if (test_and_set_bit(0, &cxt->oops_buf_busy))
                return;
-       kmsg_dump_get_buffer(&iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
-                            record_size - MTDOOPS_HEADER_SIZE, NULL);
+       kmsg_dump_get_buffer(&iter, true,
+                            cxt->oops_buf + sizeof(struct mtdoops_hdr),
+                            record_size - sizeof(struct mtdoops_hdr), NULL);
        clear_bit(0, &cxt->oops_buf_busy);
 
        if (reason != KMSG_DUMP_OOPS) {
index 357661b62c94d1cc8b27d1e09f3290a9225b3ca2..d442fa94c87200899b8fbf77ed28a1bc42575197 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 
 #include "mtdcore.h"
 
@@ -577,10 +578,16 @@ static int mtd_part_of_parse(struct mtd_info *master,
        struct mtd_part_parser *parser;
        struct device_node *np;
        struct property *prop;
+       struct device *dev;
        const char *compat;
        const char *fixed = "fixed-partitions";
        int ret, err = 0;
 
+       dev = &master->dev;
+       /* Use parent device (controller) if the top level MTD is not registered */
+       if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) && !mtd_is_partition(master))
+               dev = master->dev.parent;
+
        np = mtd_get_of_node(master);
        if (mtd_is_partition(master))
                of_node_get(np);
@@ -593,6 +600,7 @@ static int mtd_part_of_parse(struct mtd_info *master,
                        continue;
                ret = mtd_part_do_parse(parser, master, pparts, NULL);
                if (ret > 0) {
+                       of_platform_populate(np, NULL, NULL, dev);
                        of_node_put(np);
                        return ret;
                }
@@ -600,6 +608,7 @@ static int mtd_part_of_parse(struct mtd_info *master,
                if (ret < 0 && !err)
                        err = ret;
        }
+       of_platform_populate(np, NULL, NULL, dev);
        of_node_put(np);
 
        /*
index 9b249826ef93e2ee04138a0963e184d84cc6bb96..5b0c2c95f10cb8ca99343507fd61e7cdf48221c4 100644 (file)
@@ -53,6 +53,14 @@ config MTD_NAND_ECC_MXIC
        help
          This enables support for the hardware ECC engine from Macronix.
 
+config MTD_NAND_ECC_MEDIATEK
+       tristate "Mediatek hardware ECC engine"
+       depends on HAS_IOMEM
+       depends on ARCH_MEDIATEK || COMPILE_TEST
+       select MTD_NAND_ECC
+       help
+         This enables support for the hardware ECC engine from Mediatek.
+
 endmenu
 
 endmenu
index a4e6b7ae061434c866b22447be1f5ee2f71b2649..19e1291ac4d5e28dab834c28afb832c6f1786654 100644 (file)
@@ -2,6 +2,7 @@
 
 nandcore-objs := core.o bbt.o
 obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
+obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o
 
 obj-y  += onenand/
 obj-y  += raw/
similarity index 98%
rename from drivers/mtd/nand/raw/mtk_ecc.c
rename to drivers/mtd/nand/ecc-mtk.c
index 49ab3448b9b12deaccddd71ae52d677bc9c024ee..9f9b201fe706a8e391af9636a15300df88a913bf 100644 (file)
@@ -15,8 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/mutex.h>
-
-#include "mtk_ecc.h"
+#include <linux/mtd/nand-ecc-mtk.h>
 
 #define ECC_IDLE_MASK          BIT(0)
 #define ECC_IRQ_EN             BIT(0)
@@ -280,7 +279,10 @@ struct mtk_ecc *of_mtk_ecc_get(struct device_node *of_node)
        struct mtk_ecc *ecc = NULL;
        struct device_node *np;
 
-       np = of_parse_phandle(of_node, "ecc-engine", 0);
+       np = of_parse_phandle(of_node, "nand-ecc-engine", 0);
+       /* for backward compatibility */
+       if (!np)
+               np = of_parse_phandle(of_node, "ecc-engine", 0);
        if (np) {
                ecc = mtk_ecc_get(np);
                of_node_put(np);
index 9b078e78f3fabf6feeec06ca9261ee221c1bc9f3..8b6d7a515445e6dc828a8e01a481e6d4a92018ce 100644 (file)
@@ -374,6 +374,7 @@ config MTD_NAND_QCOM
 
 config MTD_NAND_MTK
        tristate "MTK NAND controller"
+       depends on MTD_NAND_ECC_MEDIATEK
        depends on ARCH_MEDIATEK || COMPILE_TEST
        depends on HAS_IOMEM
        help
index 88a566513c562d1f0280a4952ab5723d12dee3dc..fa1d0012031012268b09d9e4e13b1aa8bf7e0939 100644 (file)
@@ -48,7 +48,7 @@ obj-$(CONFIG_MTD_NAND_SUNXI)          += sunxi_nand.o
 obj-$(CONFIG_MTD_NAND_HISI504)         += hisi504_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)                += brcmnand/
 obj-$(CONFIG_MTD_NAND_QCOM)            += qcom_nandc.o
-obj-$(CONFIG_MTD_NAND_MTK)             += mtk_ecc.o mtk_nand.o
+obj-$(CONFIG_MTD_NAND_MTK)             += mtk_nand.o
 obj-$(CONFIG_MTD_NAND_MXIC)            += mxic_nand.o
 obj-$(CONFIG_MTD_NAND_TEGRA)           += tegra_nand.o
 obj-$(CONFIG_MTD_NAND_STM32_FMC2)      += stm32_fmc2_nand.o
index 7eec60ea90564788d47f7392ee8c613172e226de..0d72672f8b64d5648557891405f37d7d923ed7d8 100644 (file)
@@ -2983,11 +2983,10 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev)
        if (IS_ERR(cdns_ctrl->reg))
                return PTR_ERR(cdns_ctrl->reg);
 
-       res = platform_get_resource(ofdev, IORESOURCE_MEM, 1);
-       cdns_ctrl->io.dma = res->start;
-       cdns_ctrl->io.virt = devm_ioremap_resource(&ofdev->dev, res);
+       cdns_ctrl->io.virt = devm_platform_get_and_ioremap_resource(ofdev, 1, &res);
        if (IS_ERR(cdns_ctrl->io.virt))
                return PTR_ERR(cdns_ctrl->io.virt);
+       cdns_ctrl->io.dma = res->start;
 
        dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk");
        if (IS_ERR(dt->clk))
index 6edf78c16fc8b0a5a1abceb4281e8764886892c2..f0a15717cf055a03ca7803740b9ba8e55d488e2b 100644 (file)
@@ -104,17 +104,12 @@ static int cs553x_write_ctrl_byte(struct cs553x_nand_controller *cs553x,
                                  u32 ctl, u8 data)
 {
        u8 status;
-       int ret;
 
        writeb(ctl, cs553x->mmio + MM_NAND_CTL);
        writeb(data, cs553x->mmio + MM_NAND_IO);
-       ret = readb_poll_timeout_atomic(cs553x->mmio + MM_NAND_STS, status,
+       return readb_poll_timeout_atomic(cs553x->mmio + MM_NAND_STS, status,
                                        !(status & CS_NAND_CTLR_BUSY), 1,
                                        100000);
-       if (ret)
-               return ret;
-
-       return 0;
 }
 
 static void cs553x_data_in(struct cs553x_nand_controller *cs553x, void *buf,
index 45fec8c192abaa1395f10084c24897d362a2e0bd..3e98e3c255bfe1e4d00e2adf97da1fa0988e2eec 100644 (file)
@@ -727,7 +727,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
                return -ENODEV;
 
        /* which external chipselect will we be managing? */
-       if (pdata->core_chipsel < 0 || pdata->core_chipsel > 3)
+       if (pdata->core_chipsel > 3)
                return -ENODEV;
 
        info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
index 20c085a30adcba8558b8c79aaaec7e8d6108f1c7..de7e722d38262513e5c73e9f8641916de1b4389d 100644 (file)
@@ -74,22 +74,21 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
                return ret;
        }
 
-       denali->reg = ioremap(csr_base, csr_len);
+       denali->reg = devm_ioremap(denali->dev, csr_base, csr_len);
        if (!denali->reg) {
                dev_err(&dev->dev, "Spectra: Unable to remap memory region\n");
                return -ENOMEM;
        }
 
-       denali->host = ioremap(mem_base, mem_len);
+       denali->host = devm_ioremap(denali->dev, mem_base, mem_len);
        if (!denali->host) {
                dev_err(&dev->dev, "Spectra: ioremap failed!");
-               ret = -ENOMEM;
-               goto out_unmap_reg;
+               return -ENOMEM;
        }
 
        ret = denali_init(denali);
        if (ret)
-               goto out_unmap_host;
+               return ret;
 
        nsels = denali->nbanks;
 
@@ -117,10 +116,6 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 out_remove_denali:
        denali_remove(denali);
-out_unmap_host:
-       iounmap(denali->host);
-out_unmap_reg:
-       iounmap(denali->reg);
        return ret;
 }
 
@@ -129,8 +124,6 @@ static void denali_pci_remove(struct pci_dev *dev)
        struct denali_controller *denali = pci_get_drvdata(dev);
 
        denali_remove(denali);
-       iounmap(denali->reg);
-       iounmap(denali->host);
 }
 
 static struct pci_driver denali_pci_driver = {
index 44b14c9dc9a7301e9ad617787d0f0b3c77d22a0b..0b68d05846e184421459ff1825c665f525839bf5 100644 (file)
@@ -218,7 +218,8 @@ static void gpmi_dump_info(struct gpmi_nand_data *this)
                "ECC Strength           : %u\n"
                "Page Size in Bytes     : %u\n"
                "Metadata Size in Bytes : %u\n"
-               "ECC Chunk Size in Bytes: %u\n"
+               "ECC0 Chunk Size in Bytes: %u\n"
+               "ECCn Chunk Size in Bytes: %u\n"
                "ECC Chunk Count        : %u\n"
                "Payload Size in Bytes  : %u\n"
                "Auxiliary Size in Bytes: %u\n"
@@ -229,7 +230,8 @@ static void gpmi_dump_info(struct gpmi_nand_data *this)
                geo->ecc_strength,
                geo->page_size,
                geo->metadata_size,
-               geo->ecc_chunk_size,
+               geo->ecc0_chunk_size,
+               geo->eccn_chunk_size,
                geo->ecc_chunk_count,
                geo->payload_size,
                geo->auxiliary_size,
@@ -238,9 +240,15 @@ static void gpmi_dump_info(struct gpmi_nand_data *this)
                geo->block_mark_bit_offset);
 }
 
-static inline bool gpmi_check_ecc(struct gpmi_nand_data *this)
+static bool gpmi_check_ecc(struct gpmi_nand_data *this)
 {
+       struct nand_chip *chip = &this->nand;
        struct bch_geometry *geo = &this->bch_geometry;
+       struct nand_device *nand = &chip->base;
+       struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
+
+       conf->step_size = geo->eccn_chunk_size;
+       conf->strength = geo->ecc_strength;
 
        /* Do the sanity check. */
        if (GPMI_IS_MXS(this)) {
@@ -248,7 +256,47 @@ static inline bool gpmi_check_ecc(struct gpmi_nand_data *this)
                if (geo->gf_len == 14)
                        return false;
        }
-       return geo->ecc_strength <= this->devdata->bch_max_ecc_strength;
+
+       if (geo->ecc_strength > this->devdata->bch_max_ecc_strength)
+               return false;
+
+       if (!nand_ecc_is_strong_enough(nand))
+               return false;
+
+       return true;
+}
+
+/* check if bbm locates in data chunk rather than ecc chunk */
+static bool bbm_in_data_chunk(struct gpmi_nand_data *this,
+                       unsigned int *chunk_num)
+{
+       struct bch_geometry *geo = &this->bch_geometry;
+       struct nand_chip *chip = &this->nand;
+       struct mtd_info *mtd = nand_to_mtd(chip);
+       unsigned int i, j;
+
+       if (geo->ecc0_chunk_size != geo->eccn_chunk_size) {
+               dev_err(this->dev,
+                       "The size of ecc0_chunk must equal to eccn_chunk\n");
+               return false;
+       }
+
+       i = (mtd->writesize * 8 - geo->metadata_size * 8) /
+               (geo->gf_len * geo->ecc_strength +
+                       geo->eccn_chunk_size * 8);
+
+       j = (mtd->writesize * 8 - geo->metadata_size * 8) -
+               (geo->gf_len * geo->ecc_strength +
+                       geo->eccn_chunk_size * 8) * i;
+
+       if (j < geo->eccn_chunk_size * 8) {
+               *chunk_num = i+1;
+               dev_dbg(this->dev, "Set ecc to %d and bbm in chunk %d\n",
+                               geo->ecc_strength, *chunk_num);
+               return true;
+       }
+
+       return false;
 }
 
 /*
@@ -280,13 +328,14 @@ static int set_geometry_by_ecc_info(struct gpmi_nand_data *this,
                        nanddev_get_ecc_requirements(&chip->base)->step_size);
                return -EINVAL;
        }
-       geo->ecc_chunk_size = ecc_step;
+       geo->ecc0_chunk_size = ecc_step;
+       geo->eccn_chunk_size = ecc_step;
        geo->ecc_strength = round_up(ecc_strength, 2);
        if (!gpmi_check_ecc(this))
                return -EINVAL;
 
        /* Keep the C >= O */
-       if (geo->ecc_chunk_size < mtd->oobsize) {
+       if (geo->eccn_chunk_size < mtd->oobsize) {
                dev_err(this->dev,
                        "unsupported nand chip. ecc size: %d, oob size : %d\n",
                        ecc_step, mtd->oobsize);
@@ -296,7 +345,7 @@ static int set_geometry_by_ecc_info(struct gpmi_nand_data *this,
        /* The default value, see comment in the legacy_set_geometry(). */
        geo->metadata_size = 10;
 
-       geo->ecc_chunk_count = mtd->writesize / geo->ecc_chunk_size;
+       geo->ecc_chunk_count = mtd->writesize / geo->eccn_chunk_size;
 
        /*
         * Now, the NAND chip with 2K page(data chunk is 512byte) shows below:
@@ -399,6 +448,134 @@ static inline int get_ecc_strength(struct gpmi_nand_data *this)
        return round_down(ecc_strength, 2);
 }
 
+static int set_geometry_for_large_oob(struct gpmi_nand_data *this)
+{
+       struct bch_geometry *geo = &this->bch_geometry;
+       struct nand_chip *chip = &this->nand;
+       struct mtd_info *mtd = nand_to_mtd(chip);
+       const struct nand_ecc_props *requirements =
+               nanddev_get_ecc_requirements(&chip->base);
+       unsigned int block_mark_bit_offset;
+       unsigned int max_ecc;
+       unsigned int bbm_chunk;
+       unsigned int i;
+
+       /* sanity check for the minimum ecc nand required */
+       if (!(requirements->strength > 0 &&
+             requirements->step_size > 0))
+               return -EINVAL;
+       geo->ecc_strength = requirements->strength;
+
+       /* check if platform can support this nand */
+       if (!gpmi_check_ecc(this)) {
+               dev_err(this->dev,
+                       "unsupported NAND chip, minimum ecc required %d\n",
+                       geo->ecc_strength);
+               return -EINVAL;
+       }
+
+       /* calculate the maximum ecc platform can support*/
+       geo->metadata_size = 10;
+       geo->gf_len = 14;
+       geo->ecc0_chunk_size = 1024;
+       geo->eccn_chunk_size = 1024;
+       geo->ecc_chunk_count = mtd->writesize / geo->eccn_chunk_size;
+       max_ecc = min(get_ecc_strength(this),
+                     this->devdata->bch_max_ecc_strength);
+
+       /*
+        * search a supported ecc strength that makes bbm
+        * located in data chunk
+        */
+       geo->ecc_strength = max_ecc;
+       while (!(geo->ecc_strength < requirements->strength)) {
+               if (bbm_in_data_chunk(this, &bbm_chunk))
+                       goto geo_setting;
+               geo->ecc_strength -= 2;
+       }
+
+       /* if none of them works, keep using the minimum ecc */
+       /* nand required but changing ecc page layout  */
+       geo->ecc_strength = requirements->strength;
+       /* add extra ecc for meta data */
+       geo->ecc0_chunk_size = 0;
+       geo->ecc_chunk_count = (mtd->writesize / geo->eccn_chunk_size) + 1;
+       geo->ecc_for_meta = 1;
+       /* check if oob can afford this extra ecc chunk */
+       if (mtd->oobsize * 8 < geo->metadata_size * 8 +
+           geo->gf_len * geo->ecc_strength * geo->ecc_chunk_count) {
+               dev_err(this->dev, "unsupported NAND chip with new layout\n");
+               return -EINVAL;
+       }
+
+       /* calculate in which chunk bbm located */
+       bbm_chunk = (mtd->writesize * 8 - geo->metadata_size * 8 -
+                    geo->gf_len * geo->ecc_strength) /
+                    (geo->gf_len * geo->ecc_strength +
+                    geo->eccn_chunk_size * 8) + 1;
+
+geo_setting:
+
+       geo->page_size = mtd->writesize + geo->metadata_size +
+               (geo->gf_len * geo->ecc_strength * geo->ecc_chunk_count) / 8;
+       geo->payload_size = mtd->writesize;
+
+       /*
+        * The auxiliary buffer contains the metadata and the ECC status. The
+        * metadata is padded to the nearest 32-bit boundary. The ECC status
+        * contains one byte for every ECC chunk, and is also padded to the
+        * nearest 32-bit boundary.
+        */
+       geo->auxiliary_status_offset = ALIGN(geo->metadata_size, 4);
+       geo->auxiliary_size = ALIGN(geo->metadata_size, 4)
+                                   + ALIGN(geo->ecc_chunk_count, 4);
+
+       if (!this->swap_block_mark)
+               return 0;
+
+       /* calculate the number of ecc chunk behind the bbm */
+       i = (mtd->writesize / geo->eccn_chunk_size) - bbm_chunk + 1;
+
+       block_mark_bit_offset = mtd->writesize * 8 -
+               (geo->ecc_strength * geo->gf_len * (geo->ecc_chunk_count - i)
+               + geo->metadata_size * 8);
+
+       geo->block_mark_byte_offset = block_mark_bit_offset / 8;
+       geo->block_mark_bit_offset  = block_mark_bit_offset % 8;
+
+       dev_dbg(this->dev, "BCH Geometry :\n"
+               "GF length              : %u\n"
+               "ECC Strength           : %u\n"
+               "Page Size in Bytes     : %u\n"
+               "Metadata Size in Bytes : %u\n"
+               "ECC0 Chunk Size in Bytes: %u\n"
+               "ECCn Chunk Size in Bytes: %u\n"
+               "ECC Chunk Count        : %u\n"
+               "Payload Size in Bytes  : %u\n"
+               "Auxiliary Size in Bytes: %u\n"
+               "Auxiliary Status Offset: %u\n"
+               "Block Mark Byte Offset : %u\n"
+               "Block Mark Bit Offset  : %u\n"
+               "Block Mark in chunk    : %u\n"
+               "Ecc for Meta data      : %u\n",
+               geo->gf_len,
+               geo->ecc_strength,
+               geo->page_size,
+               geo->metadata_size,
+               geo->ecc0_chunk_size,
+               geo->eccn_chunk_size,
+               geo->ecc_chunk_count,
+               geo->payload_size,
+               geo->auxiliary_size,
+               geo->auxiliary_status_offset,
+               geo->block_mark_byte_offset,
+               geo->block_mark_bit_offset,
+               bbm_chunk,
+               geo->ecc_for_meta);
+
+       return 0;
+}
+
 static int legacy_set_geometry(struct gpmi_nand_data *this)
 {
        struct bch_geometry *geo = &this->bch_geometry;
@@ -418,13 +595,15 @@ static int legacy_set_geometry(struct gpmi_nand_data *this)
        geo->gf_len = 13;
 
        /* The default for chunk size. */
-       geo->ecc_chunk_size = 512;
-       while (geo->ecc_chunk_size < mtd->oobsize) {
-               geo->ecc_chunk_size *= 2; /* keep C >= O */
+       geo->ecc0_chunk_size = 512;
+       geo->eccn_chunk_size = 512;
+       while (geo->eccn_chunk_size < mtd->oobsize) {
+               geo->ecc0_chunk_size *= 2; /* keep C >= O */
+               geo->eccn_chunk_size *= 2; /* keep C >= O */
                geo->gf_len = 14;
        }
 
-       geo->ecc_chunk_count = mtd->writesize / geo->ecc_chunk_size;
+       geo->ecc_chunk_count = mtd->writesize / geo->eccn_chunk_size;
 
        /* We use the same ECC strength for all chunks. */
        geo->ecc_strength = get_ecc_strength(this);
@@ -514,24 +693,40 @@ static int legacy_set_geometry(struct gpmi_nand_data *this)
 static int common_nfc_set_geometry(struct gpmi_nand_data *this)
 {
        struct nand_chip *chip = &this->nand;
+       struct mtd_info *mtd = nand_to_mtd(&this->nand);
        const struct nand_ecc_props *requirements =
                nanddev_get_ecc_requirements(&chip->base);
+       bool use_minimun_ecc;
+       int err;
 
-       if (chip->ecc.strength > 0 && chip->ecc.size > 0)
-               return set_geometry_by_ecc_info(this, chip->ecc.strength,
-                                               chip->ecc.size);
+       use_minimun_ecc = of_property_read_bool(this->dev->of_node,
+                                               "fsl,use-minimum-ecc");
 
-       if ((of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc"))
-                               || legacy_set_geometry(this)) {
-               if (!(requirements->strength > 0 && requirements->step_size > 0))
-                       return -EINVAL;
+       /* use legacy bch geometry settings by default*/
+       if ((!use_minimun_ecc && mtd->oobsize < 1024) ||
+           !(requirements->strength > 0 && requirements->step_size > 0)) {
+               dev_dbg(this->dev, "use legacy bch geometry\n");
+               err = legacy_set_geometry(this);
+               if (!err)
+                       return 0;
+       }
 
-               return set_geometry_by_ecc_info(this,
-                                               requirements->strength,
-                                               requirements->step_size);
+       /* for large oob nand */
+       if (mtd->oobsize > 1024) {
+               dev_dbg(this->dev, "use large oob bch geometry\n");
+               err = set_geometry_for_large_oob(this);
+               if (!err)
+                       return 0;
        }
 
-       return 0;
+       /* otherwise use the minimum ecc nand chip required */
+       dev_dbg(this->dev, "use minimum ecc bch geometry\n");
+       err = set_geometry_by_ecc_info(this, requirements->strength,
+                                       requirements->step_size);
+       if (err)
+               dev_err(this->dev, "none of the bch geometry setting works\n");
+
+       return err;
 }
 
 /* Configures the geometry for BCH.  */
@@ -843,7 +1038,7 @@ static int gpmi_raw_len_to_len(struct gpmi_nand_data *this, int raw_len)
         * we are passed in exec_op. Calculate the data length from it.
         */
        if (this->bch)
-               return ALIGN_DOWN(raw_len, this->bch_geometry.ecc_chunk_size);
+               return ALIGN_DOWN(raw_len, this->bch_geometry.eccn_chunk_size);
        else
                return raw_len;
 }
@@ -1235,7 +1430,7 @@ static int gpmi_count_bitflips(struct nand_chip *chip, void *buf, int first,
 
                        /* Read ECC bytes into our internal raw_buffer */
                        offset = nfc_geo->metadata_size * 8;
-                       offset += ((8 * nfc_geo->ecc_chunk_size) + eccbits) * (i + 1);
+                       offset += ((8 * nfc_geo->eccn_chunk_size) + eccbits) * (i + 1);
                        offset -= eccbits;
                        bitoffset = offset % 8;
                        eccbytes = DIV_ROUND_UP(offset + eccbits, 8);
@@ -1272,16 +1467,16 @@ static int gpmi_count_bitflips(struct nand_chip *chip, void *buf, int first,
                        if (i == 0) {
                                /* The first block includes metadata */
                                flips = nand_check_erased_ecc_chunk(
-                                               buf + i * nfc_geo->ecc_chunk_size,
-                                               nfc_geo->ecc_chunk_size,
+                                               buf + i * nfc_geo->eccn_chunk_size,
+                                               nfc_geo->eccn_chunk_size,
                                                eccbuf, eccbytes,
                                                this->auxiliary_virt,
                                                nfc_geo->metadata_size,
                                                nfc_geo->ecc_strength);
                        } else {
                                flips = nand_check_erased_ecc_chunk(
-                                               buf + i * nfc_geo->ecc_chunk_size,
-                                               nfc_geo->ecc_chunk_size,
+                                               buf + i * nfc_geo->eccn_chunk_size,
+                                               nfc_geo->eccn_chunk_size,
                                                eccbuf, eccbytes,
                                                NULL, 0,
                                                nfc_geo->ecc_strength);
@@ -1310,20 +1505,21 @@ static void gpmi_bch_layout_std(struct gpmi_nand_data *this)
        struct bch_geometry *geo = &this->bch_geometry;
        unsigned int ecc_strength = geo->ecc_strength >> 1;
        unsigned int gf_len = geo->gf_len;
-       unsigned int block_size = geo->ecc_chunk_size;
+       unsigned int block0_size = geo->ecc0_chunk_size;
+       unsigned int blockn_size = geo->eccn_chunk_size;
 
        this->bch_flashlayout0 =
                BF_BCH_FLASH0LAYOUT0_NBLOCKS(geo->ecc_chunk_count - 1) |
                BF_BCH_FLASH0LAYOUT0_META_SIZE(geo->metadata_size) |
                BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this) |
                BF_BCH_FLASH0LAYOUT0_GF(gf_len, this) |
-               BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this);
+               BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block0_size, this);
 
        this->bch_flashlayout1 =
                BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(geo->page_size) |
                BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this) |
                BF_BCH_FLASH0LAYOUT1_GF(gf_len, this) |
-               BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this);
+               BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(blockn_size, this);
 }
 
 static int gpmi_ecc_read_page(struct nand_chip *chip, uint8_t *buf,
@@ -1406,29 +1602,49 @@ static int gpmi_ecc_read_subpage(struct nand_chip *chip, uint32_t offs,
                }
        }
 
+       /*
+        * if there is an ECC dedicate for meta:
+        * - need to add an extra ECC size when calculating col and page_size,
+        *   if the meta size is NOT zero.
+        * - ecc0_chunk size need to set to the same size as other chunks,
+        *   if the meta size is zero.
+        */
+
        meta = geo->metadata_size;
        if (first) {
-               col = meta + (size + ecc_parity_size) * first;
+               if (geo->ecc_for_meta)
+                       col = meta + ecc_parity_size
+                               + (size + ecc_parity_size) * first;
+               else
+                       col = meta + (size + ecc_parity_size) * first;
+
                meta = 0;
                buf = buf + first * size;
        }
 
        ecc_parity_size = geo->gf_len * geo->ecc_strength / 8;
-
        n = last - first + 1;
-       page_size = meta + (size + ecc_parity_size) * n;
+
+       if (geo->ecc_for_meta && meta)
+               page_size = meta + ecc_parity_size
+                           + (size + ecc_parity_size) * n;
+       else
+               page_size = meta + (size + ecc_parity_size) * n;
+
        ecc_strength = geo->ecc_strength >> 1;
 
-       this->bch_flashlayout0 = BF_BCH_FLASH0LAYOUT0_NBLOCKS(n - 1) |
+       this->bch_flashlayout0 = BF_BCH_FLASH0LAYOUT0_NBLOCKS(
+               (geo->ecc_for_meta ? n : n - 1)) |
                BF_BCH_FLASH0LAYOUT0_META_SIZE(meta) |
                BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this) |
                BF_BCH_FLASH0LAYOUT0_GF(geo->gf_len, this) |
-               BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(geo->ecc_chunk_size, this);
+               BF_BCH_FLASH0LAYOUT0_DATA0_SIZE((geo->ecc_for_meta ?
+               0 : geo->ecc0_chunk_size), this);
 
        this->bch_flashlayout1 = BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size) |
                BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this) |
                BF_BCH_FLASH0LAYOUT1_GF(geo->gf_len, this) |
-               BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(geo->ecc_chunk_size, this);
+               BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(geo->eccn_chunk_size, this);
 
        this->bch = true;
 
@@ -1597,7 +1813,7 @@ static int gpmi_ecc_read_page_raw(struct nand_chip *chip, uint8_t *buf,
        struct mtd_info *mtd = nand_to_mtd(chip);
        struct gpmi_nand_data *this = nand_get_controller_data(chip);
        struct bch_geometry *nfc_geo = &this->bch_geometry;
-       int eccsize = nfc_geo->ecc_chunk_size;
+       int eccsize = nfc_geo->eccn_chunk_size;
        int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len;
        u8 *tmp_buf = this->raw_buffer;
        size_t src_bit_off;
@@ -1682,7 +1898,7 @@ static int gpmi_ecc_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
        struct mtd_info *mtd = nand_to_mtd(chip);
        struct gpmi_nand_data *this = nand_get_controller_data(chip);
        struct bch_geometry *nfc_geo = &this->bch_geometry;
-       int eccsize = nfc_geo->ecc_chunk_size;
+       int eccsize = nfc_geo->eccn_chunk_size;
        int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len;
        u8 *tmp_buf = this->raw_buffer;
        uint8_t *oob = chip->oob_poi;
@@ -2056,7 +2272,7 @@ static int gpmi_init_last(struct gpmi_nand_data *this)
        ecc->read_oob_raw = gpmi_ecc_read_oob_raw;
        ecc->write_oob_raw = gpmi_ecc_write_oob_raw;
        ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       ecc->size       = bch_geo->ecc_chunk_size;
+       ecc->size       = bch_geo->eccn_chunk_size;
        ecc->strength   = bch_geo->ecc_strength;
        mtd_set_ooblayout(mtd, &gpmi_ooblayout_ops);
 
index 5e1c3ddae5f83ae4380534668c1ed0a4bcf8e4db..c3ff56ac62a7ebb89e0d337480be571cc8ea31ac 100644 (file)
@@ -30,9 +30,9 @@ struct resources {
  * @page_size:                The size, in bytes, of a physical page, including
  *                            both data and OOB.
  * @metadata_size:            The size, in bytes, of the metadata.
- * @ecc_chunk_size:           The size, in bytes, of a single ECC chunk. Note
- *                            the first chunk in the page includes both data and
- *                            metadata, so it's a bit larger than this value.
+ * @ecc0_chunk_size:          The size, in bytes, of a first ECC chunk.
+ * @eccn_chunk_size:          The size, in bytes, of a single ECC chunk after
+ *                            the first chunk in the page.
  * @ecc_chunk_count:          The number of ECC chunks in the page,
  * @payload_size:             The size, in bytes, of the payload buffer.
  * @auxiliary_size:           The size, in bytes, of the auxiliary buffer.
@@ -42,19 +42,23 @@ struct resources {
  *                            which the underlying physical block mark appears.
  * @block_mark_bit_offset:    The bit offset into the ECC-based page view at
  *                            which the underlying physical block mark appears.
+ * @ecc_for_meta:             The flag to indicate if there is a dedicate ecc
+ *                            for meta.
  */
 struct bch_geometry {
        unsigned int  gf_len;
        unsigned int  ecc_strength;
        unsigned int  page_size;
        unsigned int  metadata_size;
-       unsigned int  ecc_chunk_size;
+       unsigned int  ecc0_chunk_size;
+       unsigned int  eccn_chunk_size;
        unsigned int  ecc_chunk_count;
        unsigned int  payload_size;
        unsigned int  auxiliary_size;
        unsigned int  auxiliary_status_offset;
        unsigned int  block_mark_byte_offset;
        unsigned int  block_mark_bit_offset;
+       unsigned int  ecc_for_meta; /* ECC for meta data */
 };
 
 /**
index 7c1c80dae826aab0efb9b02517f85bba157eb21e..e91b879b32bdb78ef28c11301149299d88924607 100644 (file)
@@ -619,9 +619,9 @@ static int ebu_nand_probe(struct platform_device *pdev)
        resname = devm_kasprintf(dev, GFP_KERNEL, "nand_cs%d", cs);
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, resname);
        ebu_host->cs[cs].chipaddr = devm_ioremap_resource(dev, res);
-       ebu_host->cs[cs].nand_pa = res->start;
        if (IS_ERR(ebu_host->cs[cs].chipaddr))
                return PTR_ERR(ebu_host->cs[cs].chipaddr);
+       ebu_host->cs[cs].nand_pa = res->start;
 
        ebu_host->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(ebu_host->clk))
index 5b9271b9c32655b4d4d8f2d972c3d478d7b3b6d9..800d774aed8ef5db5be95b7dc17e0921089239a6 100644 (file)
@@ -595,8 +595,7 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd)
        struct nand_chip *chip = mtd_to_nand(mtd);
        struct mpc5121_nfc_prv *prv = nand_get_controller_data(chip);
 
-       if (prv->clk)
-               clk_disable_unprepare(prv->clk);
+       clk_disable_unprepare(prv->clk);
 
        if (prv->csreg)
                iounmap(prv->csreg);
index 66f04c693c87d0ca079c1d87cf096cfda50fa279..d540454cbbdfaf5470ed3b790e4677ff518d1513 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/iopoll.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include "mtk_ecc.h"
+#include <linux/mtd/nand-ecc-mtk.h>
 
 /* NAND controller register definition */
 #define NFI_CNFG               (0x00)
index 284fff62ac49e727cc0cd943b8f9206c3b82dd01..6b67b7dfe7ce69601534d80e50fc86434d8e2c43 100644 (file)
@@ -4502,11 +4502,13 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
        len = instr->len;
 
        while (len) {
+               loff_t ofs = (loff_t)page << chip->page_shift;
+
                /* Check if we have a bad block, we do not erase bad blocks! */
                if (nand_block_checkbad(chip, ((loff_t) page) <<
                                        chip->page_shift, allowbbt)) {
-                       pr_warn("%s: attempt to erase a bad block at page 0x%08x\n",
-                                   __func__, page);
+                       pr_warn("%s: attempt to erase a bad block at 0x%08llx\n",
+                                   __func__, (unsigned long long)ofs);
                        ret = -EIO;
                        goto erase_exit;
                }
@@ -4524,8 +4526,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
                if (ret) {
                        pr_debug("%s: failed erase, page 0x%08x\n",
                                        __func__, page);
-                       instr->fail_addr =
-                               ((loff_t)page << chip->page_shift);
+                       instr->fail_addr = ofs;
                        goto erase_exit;
                }
 
index 6e41902be35f16ab3e178a33253ec1a6d10753c3..88c2440b47d84a50dea28420fb8aa99682eb8db6 100644 (file)
@@ -29,6 +29,9 @@ struct nand_flash_dev nand_flash_ids[] = {
        {"TC58NVG0S3E 1G 3.3V 8-bit",
                { .id = {0x98, 0xd1, 0x90, 0x15, 0x76, 0x14, 0x01, 0x00} },
                  SZ_2K, SZ_128, SZ_128K, 0, 8, 64, NAND_ECC_INFO(1, SZ_512), },
+       {"TC58NVG0S3HTA00 1G 3.3V 8-bit",
+               { .id = {0x98, 0xf1, 0x80, 0x15} },
+                 SZ_2K, SZ_128, SZ_128K, 0, 4, 128, NAND_ECC_INFO(8, SZ_512), },
        {"TC58NVG2S0F 4G 3.3V 8-bit",
                { .id = {0x98, 0xdc, 0x90, 0x26, 0x76, 0x15, 0x01, 0x08} },
                  SZ_4K, SZ_512, SZ_256K, 0, 8, 224, NAND_ECC_INFO(4, SZ_512) },
@@ -58,6 +61,9 @@ struct nand_flash_dev nand_flash_ids[] = {
        {"TH58NVG2S3HBAI4 4G 3.3V 8-bit",
                { .id = {0x98, 0xdc, 0x91, 0x15, 0x76} },
                  SZ_2K, SZ_512, SZ_128K, 0, 5, 128, NAND_ECC_INFO(8, SZ_512) },
+       {"TH58NVG3S0HBAI4 8G 3.3V 8-bit",
+               { .id = {0x98, 0xd3, 0x91, 0x26, 0x76} },
+                 SZ_4K, SZ_1K, SZ_256K, 0, 5, 256, NAND_ECC_INFO(8, SZ_512)},
 
        LEGACY_ID_NAND("NAND 4MiB 5V 8-bit",   0x6B, 4, SZ_8K, SP_OPTIONS),
        LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS),
index cf4f37959421ce5ad3a77d94faa68a1b4e984895..d3d34d71921f080b4794c0b53e8b56ae019c1cdb 100644 (file)
@@ -287,8 +287,10 @@ static int toshiba_nand_init(struct nand_chip *chip)
        if (!strncmp("TC58NVG0S3E", chip->parameters.model,
                     sizeof("TC58NVG0S3E") - 1))
                tc58nvg0s3e_init(chip);
-       if (!strncmp("TH58NVG2S3HBAI4", chip->parameters.model,
-                    sizeof("TH58NVG2S3HBAI4") - 1))
+       if ((!strncmp("TH58NVG2S3HBAI4", chip->parameters.model,
+                    sizeof("TH58NVG2S3HBAI4") - 1)) ||
+           (!strncmp("TH58NVG3S0HBAI4", chip->parameters.model,
+                    sizeof("TH58NVG3S0HBAI4") - 1)))
                th58nvg2s3hbai4_init(chip);
 
        return 0;
index 893e9979c4a25d9825da5d958dddd64440a27a8d..4796a48e1012af88ec9a11aed9d7dbfcbc222906 100644 (file)
@@ -548,6 +548,7 @@ static SIMPLE_DEV_PM_OPS(elm_pm_ops, elm_suspend, elm_resume);
 #ifdef CONFIG_OF
 static const struct of_device_id elm_of_match[] = {
        { .compatible = "ti,am3352-elm" },
+       { .compatible = "ti,am64-elm" },
        {},
 };
 MODULE_DEVICE_TABLE(of, elm_of_match);
index 6db063b230a927ee1de084d86dab5bd615667320..1620e25a1147ef62a0c4bc168f84ed6360049a6d 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mtd/rawnand.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
 #define COMMAND_REG 0x00
@@ -216,8 +217,7 @@ struct rnandc {
        struct nand_controller controller;
        struct device *dev;
        void __iomem *regs;
-       struct clk *hclk;
-       struct clk *eclk;
+       unsigned long ext_clk_rate;
        unsigned long assigned_cs;
        struct list_head chips;
        struct nand_chip *selected_chip;
@@ -891,7 +891,7 @@ static int rnandc_setup_interface(struct nand_chip *chip, int chipnr,
 {
        struct rnand_chip *rnand = to_rnand(chip);
        struct rnandc *rnandc = to_rnandc(chip->controller);
-       unsigned int period_ns = 1000000000 / clk_get_rate(rnandc->eclk);
+       unsigned int period_ns = 1000000000 / rnandc->ext_clk_rate;
        const struct nand_sdr_timings *sdr;
        unsigned int cyc, cle, ale, bef_dly, ca_to_data;
 
@@ -1319,6 +1319,7 @@ cleanup_chips:
 static int rnandc_probe(struct platform_device *pdev)
 {
        struct rnandc *rnandc;
+       struct clk *eclk;
        int irq, ret;
 
        rnandc = devm_kzalloc(&pdev->dev, sizeof(*rnandc), GFP_KERNEL);
@@ -1335,29 +1336,26 @@ static int rnandc_probe(struct platform_device *pdev)
        if (IS_ERR(rnandc->regs))
                return PTR_ERR(rnandc->regs);
 
-       /* APB clock */
-       rnandc->hclk = devm_clk_get(&pdev->dev, "hclk");
-       if (IS_ERR(rnandc->hclk))
-               return PTR_ERR(rnandc->hclk);
-
-       /* External NAND bus clock */
-       rnandc->eclk = devm_clk_get(&pdev->dev, "eclk");
-       if (IS_ERR(rnandc->eclk))
-               return PTR_ERR(rnandc->eclk);
-
-       ret = clk_prepare_enable(rnandc->hclk);
-       if (ret)
+       devm_pm_runtime_enable(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
+       if (ret < 0)
                return ret;
 
-       ret = clk_prepare_enable(rnandc->eclk);
-       if (ret)
-               goto disable_hclk;
+       /* The external NAND bus clock rate is needed for computing timings */
+       eclk = clk_get(&pdev->dev, "eclk");
+       if (IS_ERR(eclk)) {
+               ret = PTR_ERR(eclk);
+               goto dis_runtime_pm;
+       }
+
+       rnandc->ext_clk_rate = clk_get_rate(eclk);
+       clk_put(eclk);
 
        rnandc_dis_interrupts(rnandc);
        irq = platform_get_irq_optional(pdev, 0);
        if (irq == -EPROBE_DEFER) {
                ret = irq;
-               goto disable_eclk;
+               goto dis_runtime_pm;
        } else if (irq < 0) {
                dev_info(&pdev->dev, "No IRQ found, fallback to polling\n");
                rnandc->use_polling = true;
@@ -1365,12 +1363,12 @@ static int rnandc_probe(struct platform_device *pdev)
                ret = devm_request_irq(&pdev->dev, irq, rnandc_irq_handler, 0,
                                       "renesas-nand-controller", rnandc);
                if (ret < 0)
-                       goto disable_eclk;
+                       goto dis_runtime_pm;
        }
 
        ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
        if (ret)
-               goto disable_eclk;
+               goto dis_runtime_pm;
 
        rnandc_clear_fifo(rnandc);
 
@@ -1378,14 +1376,12 @@ static int rnandc_probe(struct platform_device *pdev)
 
        ret = rnandc_chips_init(rnandc);
        if (ret)
-               goto disable_eclk;
+               goto dis_runtime_pm;
 
        return 0;
 
-disable_eclk:
-       clk_disable_unprepare(rnandc->eclk);
-disable_hclk:
-       clk_disable_unprepare(rnandc->hclk);
+dis_runtime_pm:
+       pm_runtime_put(&pdev->dev);
 
        return ret;
 }
@@ -1396,8 +1392,7 @@ static int rnandc_remove(struct platform_device *pdev)
 
        rnandc_chips_cleanup(rnandc);
 
-       clk_disable_unprepare(rnandc->eclk);
-       clk_disable_unprepare(rnandc->hclk);
+       pm_runtime_put(&pdev->dev);
 
        return 0;
 }
index cbaa4f1c83da45e2c7d7a71c53ae2b1b890e1c53..f133985cc053a0703aed1e191b4d32310e516770 100644 (file)
@@ -911,8 +911,7 @@ static int rk_nfc_enable_clks(struct device *dev, struct rk_nfc *nfc)
        ret = clk_prepare_enable(nfc->ahb_clk);
        if (ret) {
                dev_err(dev, "failed to enable ahb clk\n");
-               if (!IS_ERR(nfc->nfc_clk))
-                       clk_disable_unprepare(nfc->nfc_clk);
+               clk_disable_unprepare(nfc->nfc_clk);
                return ret;
        }
 
@@ -921,8 +920,7 @@ static int rk_nfc_enable_clks(struct device *dev, struct rk_nfc *nfc)
 
 static void rk_nfc_disable_clks(struct rk_nfc *nfc)
 {
-       if (!IS_ERR(nfc->nfc_clk))
-               clk_disable_unprepare(nfc->nfc_clk);
+       clk_disable_unprepare(nfc->nfc_clk);
        clk_disable_unprepare(nfc->ahb_clk);
 }
 
index de8e919d0ebe647ab39f474f01e445c303b0c051..8f1a42bf199c7ebc166e90b498d228cf14606e98 100644 (file)
@@ -390,6 +390,9 @@ static int tmio_probe(struct platform_device *dev)
        if (data == NULL)
                dev_warn(&dev->dev, "NULL platform data!\n");
 
+       if (!ccr || !fcr)
+               return -EINVAL;
+
        tmio = devm_kzalloc(&dev->dev, sizeof(*tmio), GFP_KERNEL);
        if (!tmio)
                return -ENOMEM;
index 9662b9c1d5a9f992a635ec2cd59eb1efa07f9216..80dabe6ff0f33a037d64eb720567fc099021bb3c 100644 (file)
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o gigadevice.o macronix.o micron.o paragon.o toshiba.o winbond.o
+spinand-objs := core.o gigadevice.o macronix.o micron.o paragon.o toshiba.o winbond.o xtx.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
index ff8336870bc0dd8328c7d28d67883eb43cd3916b..d5b685d1605e5d62849e75ed4ac2edbb5323c66e 100644 (file)
@@ -933,6 +933,7 @@ static const struct spinand_manufacturer *spinand_manufacturers[] = {
        &paragon_spinand_manufacturer,
        &toshiba_spinand_manufacturer,
        &winbond_spinand_manufacturer,
+       &xtx_spinand_manufacturer,
 };
 
 static int spinand_manufacturer_match(struct spinand_device *spinand,
index 1dd1c589809341734d98fb3ebb1eca0be8c91d3a..6b043e24855fb1b557d44a01ceb5b163d115b78d 100644 (file)
@@ -39,6 +39,22 @@ static SPINAND_OP_VARIANTS(read_cache_variants_f,
                SPINAND_PAGE_READ_FROM_CACHE_OP_3A(true, 0, 1, NULL, 0),
                SPINAND_PAGE_READ_FROM_CACHE_OP_3A(false, 0, 0, NULL, 0));
 
+static SPINAND_OP_VARIANTS(read_cache_variants_1gq5,
+               SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(read_cache_variants_2gq5,
+               SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
 static SPINAND_OP_VARIANTS(write_cache_variants,
                SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
                SPINAND_PROG_LOAD(true, 0, NULL, 0));
@@ -325,6 +341,36 @@ static const struct spinand_info gigadevice_spinand_table[] = {
                     SPINAND_HAS_QE_BIT,
                     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
                                     gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F1GQ4RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xc1),
+                    NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F2GQ4UExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xd2),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F2GQ4RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xc2),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
        SPINAND_INFO("GD5F1GQ4UFxxG",
                     SPINAND_ID(SPINAND_READID_METHOD_OPCODE, 0xb1, 0x48),
                     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
@@ -339,12 +385,122 @@ static const struct spinand_info gigadevice_spinand_table[] = {
                     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x51),
                     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
                     NAND_ECCREQ(4, 512),
-                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq5xexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F1GQ5RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x41),
+                    NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
+                    NAND_ECCREQ(4, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
                                              &write_cache_variants,
                                              &update_cache_variants),
                     SPINAND_HAS_QE_BIT,
                     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
                                     gd5fxgq5xexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F2GQ5UExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x52),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(4, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq5xexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F2GQ5RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x42),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(4, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq5xexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F4GQ6UExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x55),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 2, 1),
+                    NAND_ECCREQ(4, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq5xexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F4GQ6RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x45),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 2, 1),
+                    NAND_ECCREQ(4, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq5xexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F1GM7UExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x91),
+                    NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F1GM7RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x81),
+                    NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F2GM7UExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x92),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F2GM7RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x82),
+                    NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F4GM8UExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x95),
+                    NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
+       SPINAND_INFO("GD5F4GM8RExxG",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x85),
+                    NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
+                                    gd5fxgq4uexxg_ecc_get_status)),
 };
 
 static const struct spinand_manufacturer_ops gigadevice_spinand_manuf_ops = {
diff --git a/drivers/mtd/nand/spi/xtx.c b/drivers/mtd/nand/spi/xtx.c
new file mode 100644 (file)
index 0000000..3911520
--- /dev/null
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author:
+ * Felix Matouschek <felix@matouschek.org>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_XTX        0x0B
+
+#define XT26G0XA_STATUS_ECC_MASK       GENMASK(5, 2)
+#define XT26G0XA_STATUS_ECC_NO_DETECTED        (0 << 2)
+#define XT26G0XA_STATUS_ECC_8_CORRECTED        (3 << 4)
+#define XT26G0XA_STATUS_ECC_UNCOR_ERROR        (2 << 4)
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+               SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+               SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+               SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+               SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+               SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+               SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int xt26g0xa_ooblayout_ecc(struct mtd_info *mtd, int section,
+                                  struct mtd_oob_region *region)
+{
+       if (section)
+               return -ERANGE;
+
+       region->offset = 48;
+       region->length = 16;
+
+       return 0;
+}
+
+static int xt26g0xa_ooblayout_free(struct mtd_info *mtd, int section,
+                                  struct mtd_oob_region *region)
+{
+       if (section)
+               return -ERANGE;
+
+       region->offset = 1;
+       region->length = 47;
+
+       return 0;
+}
+
+static const struct mtd_ooblayout_ops xt26g0xa_ooblayout = {
+       .ecc = xt26g0xa_ooblayout_ecc,
+       .free = xt26g0xa_ooblayout_free,
+};
+
+static int xt26g0xa_ecc_get_status(struct spinand_device *spinand,
+                                        u8 status)
+{
+       status = status & XT26G0XA_STATUS_ECC_MASK;
+
+       switch (status) {
+       case XT26G0XA_STATUS_ECC_NO_DETECTED:
+               return 0;
+       case XT26G0XA_STATUS_ECC_8_CORRECTED:
+               return 8;
+       case XT26G0XA_STATUS_ECC_UNCOR_ERROR:
+               return -EBADMSG;
+       default:
+               break;
+       }
+
+       /* At this point values greater than (2 << 4) are invalid  */
+       if (status > XT26G0XA_STATUS_ECC_UNCOR_ERROR)
+               return -EINVAL;
+
+       /* (1 << 2) through (7 << 2) are 1-7 corrected errors */
+       return status >> 2;
+}
+
+static const struct spinand_info xtx_spinand_table[] = {
+       SPINAND_INFO("XT26G01A",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xE1),
+                    NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&xt26g0xa_ooblayout,
+                                    xt26g0xa_ecc_get_status)),
+       SPINAND_INFO("XT26G02A",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xE2),
+                    NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&xt26g0xa_ooblayout,
+                                    xt26g0xa_ecc_get_status)),
+       SPINAND_INFO("XT26G04A",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xE3),
+                    NAND_MEMORG(1, 2048, 64, 128, 2048, 40, 1, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    SPINAND_HAS_QE_BIT,
+                    SPINAND_ECCINFO(&xt26g0xa_ooblayout,
+                                    xt26g0xa_ecc_get_status)),
+};
+
+static const struct spinand_manufacturer_ops xtx_spinand_manuf_ops = {
+};
+
+const struct spinand_manufacturer xtx_spinand_manufacturer = {
+       .id = SPINAND_MFR_XTX,
+       .name = "XTX",
+       .chips = xtx_spinand_table,
+       .nchips = ARRAY_SIZE(xtx_spinand_table),
+       .ops = &xtx_spinand_manuf_ops,
+};
index 6012a10f10c83aef7aa26a1542f83559ed0e3833..50fcf4c2174ba93aa7c804927576dac8e128985b 100644 (file)
@@ -237,7 +237,7 @@ static int bcm47xxpart_parse(struct mtd_info *master,
                               (uint8_t *)buf);
                if (err && !mtd_is_bitflip(err)) {
                        pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
-                              offset, err);
+                              offset + 0x8000, err);
                        continue;
                }
 
index 6b904e43937289c1e6118f8a9a9ab4c877d88dc8..e347b435a038ec5301ab6fdc47b0764ac1f409d1 100644 (file)
@@ -17,6 +17,7 @@ spi-nor-objs                  += sst.o
 spi-nor-objs                   += winbond.o
 spi-nor-objs                   += xilinx.o
 spi-nor-objs                   += xmc.o
+spi-nor-$(CONFIG_DEBUG_FS)     += debugfs.o
 obj-$(CONFIG_MTD_SPI_NOR)      += spi-nor.o
 
 obj-$(CONFIG_MTD_SPI_NOR)      += controllers/
index 50f4f3484d42618bf8ad82d5c5339043f6dd28fd..ca45dcd3ffe81f87dbf9ddc2a1535244ea92be20 100644 (file)
@@ -1,14 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config SPI_ASPEED_SMC
-       tristate "Aspeed flash controllers in SPI mode"
-       depends on ARCH_ASPEED || COMPILE_TEST
-       depends on HAS_IOMEM && OF
-       help
-         This enables support for the Firmware Memory controller (FMC)
-         in the Aspeed AST2500/AST2400 SoCs when attached to SPI NOR chips,
-         and support for the SPI flash memory controller (SPI) for
-         the host firmware. The implementation only supports SPI NOR.
-
 config SPI_HISI_SFC
        tristate "Hisilicon FMC SPI NOR Flash Controller(SFC)"
        depends on ARCH_HISI || COMPILE_TEST
index 6e2a1dc684662aeac48c5f9cc1da722ded2a2f48..0b8e1d5309138619bbfdf3e27639b6be2935e65e 100644 (file)
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_SPI_ASPEED_SMC)   += aspeed-smc.o
 obj-$(CONFIG_SPI_HISI_SFC)     += hisi-sfc.o
 obj-$(CONFIG_SPI_NXP_SPIFI)    += nxp-spifi.o
diff --git a/drivers/mtd/spi-nor/controllers/aspeed-smc.c b/drivers/mtd/spi-nor/controllers/aspeed-smc.c
deleted file mode 100644 (file)
index acfe010..0000000
+++ /dev/null
@@ -1,921 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * ASPEED Static Memory Controller driver
- *
- * Copyright (c) 2015-2016, IBM Corporation.
- */
-
-#include <linux/bug.h>
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
-#include <linux/mtd/spi-nor.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/sizes.h>
-#include <linux/sysfs.h>
-
-#define DEVICE_NAME    "aspeed-smc"
-
-/*
- * The driver only support SPI flash
- */
-enum aspeed_smc_flash_type {
-       smc_type_nor  = 0,
-       smc_type_nand = 1,
-       smc_type_spi  = 2,
-};
-
-struct aspeed_smc_chip;
-
-struct aspeed_smc_info {
-       u32 maxsize;            /* maximum size of chip window */
-       u8 nce;                 /* number of chip enables */
-       bool hastype;           /* flash type field exists in config reg */
-       u8 we0;                 /* shift for write enable bit for CE0 */
-       u8 ctl0;                /* offset in regs of ctl for CE0 */
-
-       void (*set_4b)(struct aspeed_smc_chip *chip);
-};
-
-static void aspeed_smc_chip_set_4b_spi_2400(struct aspeed_smc_chip *chip);
-static void aspeed_smc_chip_set_4b(struct aspeed_smc_chip *chip);
-
-static const struct aspeed_smc_info fmc_2400_info = {
-       .maxsize = 64 * 1024 * 1024,
-       .nce = 5,
-       .hastype = true,
-       .we0 = 16,
-       .ctl0 = 0x10,
-       .set_4b = aspeed_smc_chip_set_4b,
-};
-
-static const struct aspeed_smc_info spi_2400_info = {
-       .maxsize = 64 * 1024 * 1024,
-       .nce = 1,
-       .hastype = false,
-       .we0 = 0,
-       .ctl0 = 0x04,
-       .set_4b = aspeed_smc_chip_set_4b_spi_2400,
-};
-
-static const struct aspeed_smc_info fmc_2500_info = {
-       .maxsize = 256 * 1024 * 1024,
-       .nce = 3,
-       .hastype = true,
-       .we0 = 16,
-       .ctl0 = 0x10,
-       .set_4b = aspeed_smc_chip_set_4b,
-};
-
-static const struct aspeed_smc_info spi_2500_info = {
-       .maxsize = 128 * 1024 * 1024,
-       .nce = 2,
-       .hastype = false,
-       .we0 = 16,
-       .ctl0 = 0x10,
-       .set_4b = aspeed_smc_chip_set_4b,
-};
-
-enum aspeed_smc_ctl_reg_value {
-       smc_base,               /* base value without mode for other commands */
-       smc_read,               /* command reg for (maybe fast) reads */
-       smc_write,              /* command reg for writes */
-       smc_max,
-};
-
-struct aspeed_smc_controller;
-
-struct aspeed_smc_chip {
-       int cs;
-       struct aspeed_smc_controller *controller;
-       void __iomem *ctl;                      /* control register */
-       void __iomem *ahb_base;                 /* base of chip window */
-       u32 ahb_window_size;                    /* chip mapping window size */
-       u32 ctl_val[smc_max];                   /* control settings */
-       enum aspeed_smc_flash_type type;        /* what type of flash */
-       struct spi_nor nor;
-};
-
-struct aspeed_smc_controller {
-       struct device *dev;
-
-       struct mutex mutex;                     /* controller access mutex */
-       const struct aspeed_smc_info *info;     /* type info of controller */
-       void __iomem *regs;                     /* controller registers */
-       void __iomem *ahb_base;                 /* per-chip windows resource */
-       u32 ahb_window_size;                    /* full mapping window size */
-
-       struct aspeed_smc_chip *chips[];        /* pointers to attached chips */
-};
-
-/*
- * SPI Flash Configuration Register (AST2500 SPI)
- *     or
- * Type setting Register (AST2500 FMC).
- * CE0 and CE1 can only be of type SPI. CE2 can be of type NOR but the
- * driver does not support it.
- */
-#define CONFIG_REG                     0x0
-#define CONFIG_DISABLE_LEGACY          BIT(31) /* 1 */
-
-#define CONFIG_CE2_WRITE               BIT(18)
-#define CONFIG_CE1_WRITE               BIT(17)
-#define CONFIG_CE0_WRITE               BIT(16)
-
-#define CONFIG_CE2_TYPE                        BIT(4) /* AST2500 FMC only */
-#define CONFIG_CE1_TYPE                        BIT(2) /* AST2500 FMC only */
-#define CONFIG_CE0_TYPE                        BIT(0) /* AST2500 FMC only */
-
-/*
- * CE Control Register
- */
-#define CE_CONTROL_REG                 0x4
-
-/*
- * CEx Control Register
- */
-#define CONTROL_AAF_MODE               BIT(31)
-#define CONTROL_IO_MODE_MASK           GENMASK(30, 28)
-#define CONTROL_IO_DUAL_DATA           BIT(29)
-#define CONTROL_IO_DUAL_ADDR_DATA      (BIT(29) | BIT(28))
-#define CONTROL_IO_QUAD_DATA           BIT(30)
-#define CONTROL_IO_QUAD_ADDR_DATA      (BIT(30) | BIT(28))
-#define CONTROL_CE_INACTIVE_SHIFT      24
-#define CONTROL_CE_INACTIVE_MASK       GENMASK(27, \
-                                       CONTROL_CE_INACTIVE_SHIFT)
-/* 0 = 16T ... 15 = 1T   T=HCLK */
-#define CONTROL_COMMAND_SHIFT          16
-#define CONTROL_DUMMY_COMMAND_OUT      BIT(15)
-#define CONTROL_IO_DUMMY_HI            BIT(14)
-#define CONTROL_IO_DUMMY_HI_SHIFT      14
-#define CONTROL_CLK_DIV4               BIT(13) /* others */
-#define CONTROL_IO_ADDRESS_4B          BIT(13) /* AST2400 SPI */
-#define CONTROL_RW_MERGE               BIT(12)
-#define CONTROL_IO_DUMMY_LO_SHIFT      6
-#define CONTROL_IO_DUMMY_LO            GENMASK(7, \
-                                               CONTROL_IO_DUMMY_LO_SHIFT)
-#define CONTROL_IO_DUMMY_MASK          (CONTROL_IO_DUMMY_HI | \
-                                        CONTROL_IO_DUMMY_LO)
-#define CONTROL_IO_DUMMY_SET(dummy)                             \
-       (((((dummy) >> 2) & 0x1) << CONTROL_IO_DUMMY_HI_SHIFT) | \
-        (((dummy) & 0x3) << CONTROL_IO_DUMMY_LO_SHIFT))
-
-#define CONTROL_CLOCK_FREQ_SEL_SHIFT   8
-#define CONTROL_CLOCK_FREQ_SEL_MASK    GENMASK(11, \
-                                               CONTROL_CLOCK_FREQ_SEL_SHIFT)
-#define CONTROL_LSB_FIRST              BIT(5)
-#define CONTROL_CLOCK_MODE_3           BIT(4)
-#define CONTROL_IN_DUAL_DATA           BIT(3)
-#define CONTROL_CE_STOP_ACTIVE_CONTROL BIT(2)
-#define CONTROL_COMMAND_MODE_MASK      GENMASK(1, 0)
-#define CONTROL_COMMAND_MODE_NORMAL    0
-#define CONTROL_COMMAND_MODE_FREAD     1
-#define CONTROL_COMMAND_MODE_WRITE     2
-#define CONTROL_COMMAND_MODE_USER      3
-
-#define CONTROL_KEEP_MASK                                              \
-       (CONTROL_AAF_MODE | CONTROL_CE_INACTIVE_MASK | CONTROL_CLK_DIV4 | \
-        CONTROL_CLOCK_FREQ_SEL_MASK | CONTROL_LSB_FIRST | CONTROL_CLOCK_MODE_3)
-
-/*
- * The Segment Register uses a 8MB unit to encode the start address
- * and the end address of the mapping window of a flash SPI slave :
- *
- *        | byte 1 | byte 2 | byte 3 | byte 4 |
- *        +--------+--------+--------+--------+
- *        |  end   |  start |   0    |   0    |
- */
-#define SEGMENT_ADDR_REG0              0x30
-#define SEGMENT_ADDR_START(_r)         ((((_r) >> 16) & 0xFF) << 23)
-#define SEGMENT_ADDR_END(_r)           ((((_r) >> 24) & 0xFF) << 23)
-#define SEGMENT_ADDR_VALUE(start, end)                                 \
-       (((((start) >> 23) & 0xFF) << 16) | ((((end) >> 23) & 0xFF) << 24))
-#define SEGMENT_ADDR_REG(controller, cs)       \
-       ((controller)->regs + SEGMENT_ADDR_REG0 + (cs) * 4)
-
-/*
- * In user mode all data bytes read or written to the chip decode address
- * range are transferred to or from the SPI bus. The range is treated as a
- * fifo of arbitratry 1, 2, or 4 byte width but each write has to be aligned
- * to its size. The address within the multiple 8kB range is ignored when
- * sending bytes to the SPI bus.
- *
- * On the arm architecture, as of Linux version 4.3, memcpy_fromio and
- * memcpy_toio on little endian targets use the optimized memcpy routines
- * that were designed for well behavied memory storage. These routines
- * have a stutter if the source and destination are not both word aligned,
- * once with a duplicate access to the source after aligning to the
- * destination to a word boundary, and again with a duplicate access to
- * the source when the final byte count is not word aligned.
- *
- * When writing or reading the fifo this stutter discards data or sends
- * too much data to the fifo and can not be used by this driver.
- *
- * While the low level io string routines that implement the insl family do
- * the desired accesses and memory increments, the cross architecture io
- * macros make them essentially impossible to use on a memory mapped address
- * instead of a a token from the call to iomap of an io port.
- *
- * These fifo routines use readl and friends to a constant io port and update
- * the memory buffer pointer and count via explicit code. The final updates
- * to len are optimistically suppressed.
- */
-static int aspeed_smc_read_from_ahb(void *buf, void __iomem *src, size_t len)
-{
-       size_t offset = 0;
-
-       if (IS_ALIGNED((uintptr_t)src, sizeof(uintptr_t)) &&
-           IS_ALIGNED((uintptr_t)buf, sizeof(uintptr_t))) {
-               ioread32_rep(src, buf, len >> 2);
-               offset = len & ~0x3;
-               len -= offset;
-       }
-       ioread8_rep(src, (u8 *)buf + offset, len);
-       return 0;
-}
-
-static int aspeed_smc_write_to_ahb(void __iomem *dst, const void *buf,
-                                  size_t len)
-{
-       size_t offset = 0;
-
-       if (IS_ALIGNED((uintptr_t)dst, sizeof(uintptr_t)) &&
-           IS_ALIGNED((uintptr_t)buf, sizeof(uintptr_t))) {
-               iowrite32_rep(dst, buf, len >> 2);
-               offset = len & ~0x3;
-               len -= offset;
-       }
-       iowrite8_rep(dst, (const u8 *)buf + offset, len);
-       return 0;
-}
-
-static inline u32 aspeed_smc_chip_write_bit(struct aspeed_smc_chip *chip)
-{
-       return BIT(chip->controller->info->we0 + chip->cs);
-}
-
-static void aspeed_smc_chip_check_config(struct aspeed_smc_chip *chip)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       u32 reg;
-
-       reg = readl(controller->regs + CONFIG_REG);
-
-       if (reg & aspeed_smc_chip_write_bit(chip))
-               return;
-
-       dev_dbg(controller->dev, "config write is not set ! @%p: 0x%08x\n",
-               controller->regs + CONFIG_REG, reg);
-       reg |= aspeed_smc_chip_write_bit(chip);
-       writel(reg, controller->regs + CONFIG_REG);
-}
-
-static void aspeed_smc_start_user(struct spi_nor *nor)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-       u32 ctl = chip->ctl_val[smc_base];
-
-       /*
-        * When the chip is controlled in user mode, we need write
-        * access to send the opcodes to it. So check the config.
-        */
-       aspeed_smc_chip_check_config(chip);
-
-       ctl |= CONTROL_COMMAND_MODE_USER |
-               CONTROL_CE_STOP_ACTIVE_CONTROL;
-       writel(ctl, chip->ctl);
-
-       ctl &= ~CONTROL_CE_STOP_ACTIVE_CONTROL;
-       writel(ctl, chip->ctl);
-}
-
-static void aspeed_smc_stop_user(struct spi_nor *nor)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-
-       u32 ctl = chip->ctl_val[smc_read];
-       u32 ctl2 = ctl | CONTROL_COMMAND_MODE_USER |
-               CONTROL_CE_STOP_ACTIVE_CONTROL;
-
-       writel(ctl2, chip->ctl);        /* stop user CE control */
-       writel(ctl, chip->ctl);         /* default to fread or read mode */
-}
-
-static int aspeed_smc_prep(struct spi_nor *nor)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-
-       mutex_lock(&chip->controller->mutex);
-       return 0;
-}
-
-static void aspeed_smc_unprep(struct spi_nor *nor)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-
-       mutex_unlock(&chip->controller->mutex);
-}
-
-static int aspeed_smc_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
-                              size_t len)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-
-       aspeed_smc_start_user(nor);
-       aspeed_smc_write_to_ahb(chip->ahb_base, &opcode, 1);
-       aspeed_smc_read_from_ahb(buf, chip->ahb_base, len);
-       aspeed_smc_stop_user(nor);
-       return 0;
-}
-
-static int aspeed_smc_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
-                               size_t len)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-
-       aspeed_smc_start_user(nor);
-       aspeed_smc_write_to_ahb(chip->ahb_base, &opcode, 1);
-       aspeed_smc_write_to_ahb(chip->ahb_base, buf, len);
-       aspeed_smc_stop_user(nor);
-       return 0;
-}
-
-static void aspeed_smc_send_cmd_addr(struct spi_nor *nor, u8 cmd, u32 addr)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-       __be32 temp;
-       u32 cmdaddr;
-
-       switch (nor->addr_width) {
-       default:
-               WARN_ONCE(1, "Unexpected address width %u, defaulting to 3\n",
-                         nor->addr_width);
-               fallthrough;
-       case 3:
-               cmdaddr = addr & 0xFFFFFF;
-               cmdaddr |= cmd << 24;
-
-               temp = cpu_to_be32(cmdaddr);
-               aspeed_smc_write_to_ahb(chip->ahb_base, &temp, 4);
-               break;
-       case 4:
-               temp = cpu_to_be32(addr);
-               aspeed_smc_write_to_ahb(chip->ahb_base, &cmd, 1);
-               aspeed_smc_write_to_ahb(chip->ahb_base, &temp, 4);
-               break;
-       }
-}
-
-static ssize_t aspeed_smc_read_user(struct spi_nor *nor, loff_t from,
-                                   size_t len, u_char *read_buf)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-       int i;
-       u8 dummy = 0xFF;
-
-       aspeed_smc_start_user(nor);
-       aspeed_smc_send_cmd_addr(nor, nor->read_opcode, from);
-       for (i = 0; i < chip->nor.read_dummy / 8; i++)
-               aspeed_smc_write_to_ahb(chip->ahb_base, &dummy, sizeof(dummy));
-
-       aspeed_smc_read_from_ahb(read_buf, chip->ahb_base, len);
-       aspeed_smc_stop_user(nor);
-       return len;
-}
-
-static ssize_t aspeed_smc_write_user(struct spi_nor *nor, loff_t to,
-                                    size_t len, const u_char *write_buf)
-{
-       struct aspeed_smc_chip *chip = nor->priv;
-
-       aspeed_smc_start_user(nor);
-       aspeed_smc_send_cmd_addr(nor, nor->program_opcode, to);
-       aspeed_smc_write_to_ahb(chip->ahb_base, write_buf, len);
-       aspeed_smc_stop_user(nor);
-       return len;
-}
-
-static int aspeed_smc_unregister(struct aspeed_smc_controller *controller)
-{
-       struct aspeed_smc_chip *chip;
-       int n;
-
-       for (n = 0; n < controller->info->nce; n++) {
-               chip = controller->chips[n];
-               if (chip)
-                       mtd_device_unregister(&chip->nor.mtd);
-       }
-
-       return 0;
-}
-
-static int aspeed_smc_remove(struct platform_device *dev)
-{
-       return aspeed_smc_unregister(platform_get_drvdata(dev));
-}
-
-static const struct of_device_id aspeed_smc_matches[] = {
-       { .compatible = "aspeed,ast2400-fmc", .data = &fmc_2400_info },
-       { .compatible = "aspeed,ast2400-spi", .data = &spi_2400_info },
-       { .compatible = "aspeed,ast2500-fmc", .data = &fmc_2500_info },
-       { .compatible = "aspeed,ast2500-spi", .data = &spi_2500_info },
-       { }
-};
-MODULE_DEVICE_TABLE(of, aspeed_smc_matches);
-
-/*
- * Each chip has a mapping window defined by a segment address
- * register defining a start and an end address on the AHB bus. These
- * addresses can be configured to fit the chip size and offer a
- * contiguous memory region across chips. For the moment, we only
- * check that each chip segment is valid.
- */
-static void __iomem *aspeed_smc_chip_base(struct aspeed_smc_chip *chip,
-                                         struct resource *res)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       u32 offset = 0;
-       u32 reg;
-
-       if (controller->info->nce > 1) {
-               reg = readl(SEGMENT_ADDR_REG(controller, chip->cs));
-
-               if (SEGMENT_ADDR_START(reg) >= SEGMENT_ADDR_END(reg))
-                       return NULL;
-
-               offset = SEGMENT_ADDR_START(reg) - res->start;
-       }
-
-       return controller->ahb_base + offset;
-}
-
-static u32 aspeed_smc_ahb_base_phy(struct aspeed_smc_controller *controller)
-{
-       u32 seg0_val = readl(SEGMENT_ADDR_REG(controller, 0));
-
-       return SEGMENT_ADDR_START(seg0_val);
-}
-
-static u32 chip_set_segment(struct aspeed_smc_chip *chip, u32 cs, u32 start,
-                           u32 size)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       void __iomem *seg_reg;
-       u32 seg_oldval, seg_newval, ahb_base_phy, end;
-
-       ahb_base_phy = aspeed_smc_ahb_base_phy(controller);
-
-       seg_reg = SEGMENT_ADDR_REG(controller, cs);
-       seg_oldval = readl(seg_reg);
-
-       /*
-        * If the chip size is not specified, use the default segment
-        * size, but take into account the possible overlap with the
-        * previous segment
-        */
-       if (!size)
-               size = SEGMENT_ADDR_END(seg_oldval) - start;
-
-       /*
-        * The segment cannot exceed the maximum window size of the
-        * controller.
-        */
-       if (start + size > ahb_base_phy + controller->ahb_window_size) {
-               size = ahb_base_phy + controller->ahb_window_size - start;
-               dev_warn(chip->nor.dev, "CE%d window resized to %dMB",
-                        cs, size >> 20);
-       }
-
-       end = start + size;
-       seg_newval = SEGMENT_ADDR_VALUE(start, end);
-       writel(seg_newval, seg_reg);
-
-       /*
-        * Restore default value if something goes wrong. The chip
-        * might have set some bogus value and we would loose access
-        * to the chip.
-        */
-       if (seg_newval != readl(seg_reg)) {
-               dev_err(chip->nor.dev, "CE%d window invalid", cs);
-               writel(seg_oldval, seg_reg);
-               start = SEGMENT_ADDR_START(seg_oldval);
-               end = SEGMENT_ADDR_END(seg_oldval);
-               size = end - start;
-       }
-
-       dev_info(chip->nor.dev, "CE%d window [ 0x%.8x - 0x%.8x ] %dMB",
-                cs, start, end, size >> 20);
-
-       return size;
-}
-
-/*
- * The segment register defines the mapping window on the AHB bus and
- * it needs to be configured depending on the chip size. The segment
- * register of the following CE also needs to be tuned in order to
- * provide a contiguous window across multiple chips.
- *
- * This is expected to be called in increasing CE order
- */
-static u32 aspeed_smc_chip_set_segment(struct aspeed_smc_chip *chip)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       u32 ahb_base_phy, start;
-       u32 size = chip->nor.mtd.size;
-
-       /*
-        * Each controller has a chip size limit for direct memory
-        * access
-        */
-       if (size > controller->info->maxsize)
-               size = controller->info->maxsize;
-
-       /*
-        * The AST2400 SPI controller only handles one chip and does
-        * not have segment registers. Let's use the chip size for the
-        * AHB window.
-        */
-       if (controller->info == &spi_2400_info)
-               goto out;
-
-       /*
-        * The AST2500 SPI controller has a HW bug when the CE0 chip
-        * size reaches 128MB. Enforce a size limit of 120MB to
-        * prevent the controller from using bogus settings in the
-        * segment register.
-        */
-       if (chip->cs == 0 && controller->info == &spi_2500_info &&
-           size == SZ_128M) {
-               size = 120 << 20;
-               dev_info(chip->nor.dev,
-                        "CE%d window resized to %dMB (AST2500 HW quirk)",
-                        chip->cs, size >> 20);
-       }
-
-       ahb_base_phy = aspeed_smc_ahb_base_phy(controller);
-
-       /*
-        * As a start address for the current segment, use the default
-        * start address if we are handling CE0 or use the previous
-        * segment ending address
-        */
-       if (chip->cs) {
-               u32 prev = readl(SEGMENT_ADDR_REG(controller, chip->cs - 1));
-
-               start = SEGMENT_ADDR_END(prev);
-       } else {
-               start = ahb_base_phy;
-       }
-
-       size = chip_set_segment(chip, chip->cs, start, size);
-
-       /* Update chip base address on the AHB bus */
-       chip->ahb_base = controller->ahb_base + (start - ahb_base_phy);
-
-       /*
-        * Now, make sure the next segment does not overlap with the
-        * current one we just configured, even if there is no
-        * available chip. That could break access in Command Mode.
-        */
-       if (chip->cs < controller->info->nce - 1)
-               chip_set_segment(chip, chip->cs + 1, start + size, 0);
-
-out:
-       if (size < chip->nor.mtd.size)
-               dev_warn(chip->nor.dev,
-                        "CE%d window too small for chip %dMB",
-                        chip->cs, (u32)chip->nor.mtd.size >> 20);
-
-       return size;
-}
-
-static void aspeed_smc_chip_enable_write(struct aspeed_smc_chip *chip)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       u32 reg;
-
-       reg = readl(controller->regs + CONFIG_REG);
-
-       reg |= aspeed_smc_chip_write_bit(chip);
-       writel(reg, controller->regs + CONFIG_REG);
-}
-
-static void aspeed_smc_chip_set_type(struct aspeed_smc_chip *chip, int type)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       u32 reg;
-
-       chip->type = type;
-
-       reg = readl(controller->regs + CONFIG_REG);
-       reg &= ~(3 << (chip->cs * 2));
-       reg |= chip->type << (chip->cs * 2);
-       writel(reg, controller->regs + CONFIG_REG);
-}
-
-/*
- * The first chip of the AST2500 FMC flash controller is strapped by
- * hardware, or autodetected, but other chips need to be set. Enforce
- * the 4B setting for all chips.
- */
-static void aspeed_smc_chip_set_4b(struct aspeed_smc_chip *chip)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       u32 reg;
-
-       reg = readl(controller->regs + CE_CONTROL_REG);
-       reg |= 1 << chip->cs;
-       writel(reg, controller->regs + CE_CONTROL_REG);
-}
-
-/*
- * The AST2400 SPI flash controller does not have a CE Control
- * register. It uses the CE0 control register to set 4Byte mode at the
- * controller level.
- */
-static void aspeed_smc_chip_set_4b_spi_2400(struct aspeed_smc_chip *chip)
-{
-       chip->ctl_val[smc_base] |= CONTROL_IO_ADDRESS_4B;
-       chip->ctl_val[smc_read] |= CONTROL_IO_ADDRESS_4B;
-}
-
-static int aspeed_smc_chip_setup_init(struct aspeed_smc_chip *chip,
-                                     struct resource *res)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       const struct aspeed_smc_info *info = controller->info;
-       u32 reg, base_reg;
-
-       /*
-        * Always turn on the write enable bit to allow opcodes to be
-        * sent in user mode.
-        */
-       aspeed_smc_chip_enable_write(chip);
-
-       /* The driver only supports SPI type flash */
-       if (info->hastype)
-               aspeed_smc_chip_set_type(chip, smc_type_spi);
-
-       /*
-        * Configure chip base address in memory
-        */
-       chip->ahb_base = aspeed_smc_chip_base(chip, res);
-       if (!chip->ahb_base) {
-               dev_warn(chip->nor.dev, "CE%d window closed", chip->cs);
-               return -EINVAL;
-       }
-
-       /*
-        * Get value of the inherited control register. U-Boot usually
-        * does some timing calibration on the FMC chip, so it's good
-        * to keep them. In the future, we should handle calibration
-        * from Linux.
-        */
-       reg = readl(chip->ctl);
-       dev_dbg(controller->dev, "control register: %08x\n", reg);
-
-       base_reg = reg & CONTROL_KEEP_MASK;
-       if (base_reg != reg) {
-               dev_dbg(controller->dev,
-                       "control register changed to: %08x\n",
-                       base_reg);
-       }
-       chip->ctl_val[smc_base] = base_reg;
-
-       /*
-        * Retain the prior value of the control register as the
-        * default if it was normal access mode. Otherwise start with
-        * the sanitized base value set to read mode.
-        */
-       if ((reg & CONTROL_COMMAND_MODE_MASK) ==
-           CONTROL_COMMAND_MODE_NORMAL)
-               chip->ctl_val[smc_read] = reg;
-       else
-               chip->ctl_val[smc_read] = chip->ctl_val[smc_base] |
-                       CONTROL_COMMAND_MODE_NORMAL;
-
-       dev_dbg(controller->dev, "default control register: %08x\n",
-               chip->ctl_val[smc_read]);
-       return 0;
-}
-
-static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
-{
-       struct aspeed_smc_controller *controller = chip->controller;
-       const struct aspeed_smc_info *info = controller->info;
-       u32 cmd;
-
-       if (chip->nor.addr_width == 4 && info->set_4b)
-               info->set_4b(chip);
-
-       /* This is for direct AHB access when using Command Mode. */
-       chip->ahb_window_size = aspeed_smc_chip_set_segment(chip);
-
-       /*
-        * base mode has not been optimized yet. use it for writes.
-        */
-       chip->ctl_val[smc_write] = chip->ctl_val[smc_base] |
-               chip->nor.program_opcode << CONTROL_COMMAND_SHIFT |
-               CONTROL_COMMAND_MODE_WRITE;
-
-       dev_dbg(controller->dev, "write control register: %08x\n",
-               chip->ctl_val[smc_write]);
-
-       /*
-        * TODO: Adjust clocks if fast read is supported and interpret
-        * SPI NOR flags to adjust controller settings.
-        */
-       if (chip->nor.read_proto == SNOR_PROTO_1_1_1) {
-               if (chip->nor.read_dummy == 0)
-                       cmd = CONTROL_COMMAND_MODE_NORMAL;
-               else
-                       cmd = CONTROL_COMMAND_MODE_FREAD;
-       } else {
-               dev_err(chip->nor.dev, "unsupported SPI read mode\n");
-               return -EINVAL;
-       }
-
-       chip->ctl_val[smc_read] |= cmd |
-               CONTROL_IO_DUMMY_SET(chip->nor.read_dummy / 8);
-
-       dev_dbg(controller->dev, "base control register: %08x\n",
-               chip->ctl_val[smc_read]);
-       return 0;
-}
-
-static const struct spi_nor_controller_ops aspeed_smc_controller_ops = {
-       .prepare = aspeed_smc_prep,
-       .unprepare = aspeed_smc_unprep,
-       .read_reg = aspeed_smc_read_reg,
-       .write_reg = aspeed_smc_write_reg,
-       .read = aspeed_smc_read_user,
-       .write = aspeed_smc_write_user,
-};
-
-static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
-                                 struct device_node *np, struct resource *r)
-{
-       const struct spi_nor_hwcaps hwcaps = {
-               .mask = SNOR_HWCAPS_READ |
-                       SNOR_HWCAPS_READ_FAST |
-                       SNOR_HWCAPS_PP,
-       };
-       const struct aspeed_smc_info *info = controller->info;
-       struct device *dev = controller->dev;
-       struct device_node *child;
-       unsigned int cs;
-       int ret = -ENODEV;
-       bool found_one = false;
-
-       for_each_available_child_of_node(np, child) {
-               struct aspeed_smc_chip *chip;
-               struct spi_nor *nor;
-               struct mtd_info *mtd;
-
-               /* This driver does not support NAND or NOR flash devices. */
-               if (!of_device_is_compatible(child, "jedec,spi-nor"))
-                       continue;
-
-               ret = of_property_read_u32(child, "reg", &cs);
-               if (ret) {
-                       dev_err(dev, "Couldn't not read chip select.\n");
-                       break;
-               }
-
-               if (cs >= info->nce) {
-                       dev_err(dev, "Chip select %d out of range.\n",
-                               cs);
-                       ret = -ERANGE;
-                       break;
-               }
-
-               if (controller->chips[cs]) {
-                       dev_err(dev, "Chip select %d already in use by %s\n",
-                               cs, dev_name(controller->chips[cs]->nor.dev));
-                       ret = -EBUSY;
-                       break;
-               }
-
-               chip = devm_kzalloc(controller->dev, sizeof(*chip), GFP_KERNEL);
-               if (!chip) {
-                       ret = -ENOMEM;
-                       break;
-               }
-
-               chip->controller = controller;
-               chip->ctl = controller->regs + info->ctl0 + cs * 4;
-               chip->cs = cs;
-
-               nor = &chip->nor;
-               mtd = &nor->mtd;
-
-               nor->dev = dev;
-               nor->priv = chip;
-               spi_nor_set_flash_node(nor, child);
-               nor->controller_ops = &aspeed_smc_controller_ops;
-
-               ret = aspeed_smc_chip_setup_init(chip, r);
-               if (ret)
-                       break;
-
-               /*
-                * TODO: Add support for Dual and Quad SPI protocols
-                * attach when board support is present as determined
-                * by of property.
-                */
-               ret = spi_nor_scan(nor, NULL, &hwcaps);
-               /*
-                * If we fail to scan the device it might not be present or
-                * broken.  Don't fail the whole controller if others work.
-                */
-               if (ret) {
-                       if (found_one)
-                               ret = 0;
-
-                       devm_kfree(controller->dev, chip);
-                       continue;
-               }
-
-               ret = aspeed_smc_chip_setup_finish(chip);
-               if (ret)
-                       break;
-
-               ret = mtd_device_register(mtd, NULL, 0);
-               if (ret)
-                       break;
-
-               controller->chips[cs] = chip;
-               found_one = true;
-       }
-
-       if (ret) {
-               of_node_put(child);
-               aspeed_smc_unregister(controller);
-       }
-
-       return ret;
-}
-
-static int aspeed_smc_probe(struct platform_device *pdev)
-{
-       struct device_node *np = pdev->dev.of_node;
-       struct device *dev = &pdev->dev;
-       struct aspeed_smc_controller *controller;
-       const struct of_device_id *match;
-       const struct aspeed_smc_info *info;
-       struct resource *res;
-       int ret;
-
-       match = of_match_device(aspeed_smc_matches, &pdev->dev);
-       if (!match || !match->data)
-               return -ENODEV;
-       info = match->data;
-
-       controller = devm_kzalloc(&pdev->dev,
-                                 struct_size(controller, chips, info->nce),
-                                 GFP_KERNEL);
-       if (!controller)
-               return -ENOMEM;
-       controller->info = info;
-       controller->dev = dev;
-
-       mutex_init(&controller->mutex);
-       platform_set_drvdata(pdev, controller);
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       controller->regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(controller->regs))
-               return PTR_ERR(controller->regs);
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       controller->ahb_base = devm_ioremap_resource(dev, res);
-       if (IS_ERR(controller->ahb_base))
-               return PTR_ERR(controller->ahb_base);
-
-       controller->ahb_window_size = resource_size(res);
-
-       ret = aspeed_smc_setup_flash(controller, np, res);
-       if (ret)
-               dev_err(dev, "Aspeed SMC probe failed %d\n", ret);
-
-       return ret;
-}
-
-static struct platform_driver aspeed_smc_driver = {
-       .probe = aspeed_smc_probe,
-       .remove = aspeed_smc_remove,
-       .driver = {
-               .name = DEVICE_NAME,
-               .of_match_table = aspeed_smc_matches,
-       }
-};
-
-module_platform_driver(aspeed_smc_driver);
-
-MODULE_DESCRIPTION("ASPEED Static Memory Controller Driver");
-MODULE_AUTHOR("Cedric Le Goater <clg@kaod.org>");
-MODULE_LICENSE("GPL v2");
index b4f141ad9c9c37d62ee456bdbdfd7c4549abd019..502967c76c5f395a9002b83f63af1a5776f6a81f 100644 (file)
@@ -307,6 +307,52 @@ ssize_t spi_nor_write_data(struct spi_nor *nor, loff_t to, size_t len,
        return nor->controller_ops->write(nor, to, len, buf);
 }
 
+/**
+ * spi_nor_read_any_reg() - read any register from flash memory, nonvolatile or
+ * volatile.
+ * @nor:        pointer to 'struct spi_nor'.
+ * @op:                SPI memory operation. op->data.buf must be DMA-able.
+ * @proto:     SPI protocol to use for the register operation.
+ *
+ * Return: zero on success, -errno otherwise
+ */
+int spi_nor_read_any_reg(struct spi_nor *nor, struct spi_mem_op *op,
+                        enum spi_nor_protocol proto)
+{
+       if (!nor->spimem)
+               return -EOPNOTSUPP;
+
+       spi_nor_spimem_setup_op(nor, op, proto);
+       return spi_nor_spimem_exec_op(nor, op);
+}
+
+/**
+ * spi_nor_write_any_volatile_reg() - write any volatile register to flash
+ * memory.
+ * @nor:        pointer to 'struct spi_nor'
+ * @op:                SPI memory operation. op->data.buf must be DMA-able.
+ * @proto:     SPI protocol to use for the register operation.
+ *
+ * Writing volatile registers are instant according to some manufacturers
+ * (Cypress, Micron) and do not need any status polling.
+ *
+ * Return: zero on success, -errno otherwise
+ */
+int spi_nor_write_any_volatile_reg(struct spi_nor *nor, struct spi_mem_op *op,
+                                  enum spi_nor_protocol proto)
+{
+       int ret;
+
+       if (!nor->spimem)
+               return -EOPNOTSUPP;
+
+       ret = spi_nor_write_enable(nor);
+       if (ret)
+               return ret;
+       spi_nor_spimem_setup_op(nor, op, proto);
+       return spi_nor_spimem_exec_op(nor, op);
+}
+
 /**
  * spi_nor_write_enable() - Set write enable latch with Write Enable command.
  * @nor:       pointer to 'struct spi_nor'.
@@ -318,11 +364,7 @@ int spi_nor_write_enable(struct spi_nor *nor)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = SPI_NOR_WREN_OP;
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -349,11 +391,7 @@ int spi_nor_write_disable(struct spi_nor *nor)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = SPI_NOR_WRDI_OP;
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -369,6 +407,37 @@ int spi_nor_write_disable(struct spi_nor *nor)
        return ret;
 }
 
+/**
+ * spi_nor_read_id() - Read the JEDEC ID.
+ * @nor:       pointer to 'struct spi_nor'.
+ * @naddr:     number of address bytes to send. Can be zero if the operation
+ *             does not need to send an address.
+ * @ndummy:    number of dummy bytes to send after an opcode or address. Can
+ *             be zero if the operation does not require dummy bytes.
+ * @id:                pointer to a DMA-able buffer where the value of the JEDEC ID
+ *             will be written.
+ * @proto:     the SPI protocol for register operation.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+int spi_nor_read_id(struct spi_nor *nor, u8 naddr, u8 ndummy, u8 *id,
+                   enum spi_nor_protocol proto)
+{
+       int ret;
+
+       if (nor->spimem) {
+               struct spi_mem_op op =
+                       SPI_NOR_READID_OP(naddr, ndummy, id, SPI_NOR_MAX_ID_LEN);
+
+               spi_nor_spimem_setup_op(nor, &op, proto);
+               ret = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               ret = nor->controller_ops->read_reg(nor, SPINOR_OP_RDID, id,
+                                                   SPI_NOR_MAX_ID_LEN);
+       }
+       return ret;
+}
+
 /**
  * spi_nor_read_sr() - Read the Status Register.
  * @nor:       pointer to 'struct spi_nor'.
@@ -382,11 +451,7 @@ int spi_nor_read_sr(struct spi_nor *nor, u8 *sr)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(1, sr, 0));
+               struct spi_mem_op op = SPI_NOR_RDSR_OP(sr);
 
                if (nor->reg_proto == SNOR_PROTO_8_8_8_DTR) {
                        op.addr.nbytes = nor->params->rdsr_addr_nbytes;
@@ -426,11 +491,7 @@ int spi_nor_read_cr(struct spi_nor *nor, u8 *cr)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDCR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(1, cr, 0));
+               struct spi_mem_op op = SPI_NOR_RDCR_OP(cr);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -459,14 +520,7 @@ int spi_nor_set_4byte_addr_mode(struct spi_nor *nor, bool enable)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(enable ?
-                                                 SPINOR_OP_EN4B :
-                                                 SPINOR_OP_EX4B,
-                                                 0),
-                                 SPI_MEM_OP_NO_ADDR,
-                                 SPI_MEM_OP_NO_DUMMY,
-                                 SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = SPI_NOR_EN4B_EX4B_OP(enable);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -500,11 +554,7 @@ static int spansion_set_4byte_addr_mode(struct spi_nor *nor, bool enable)
        nor->bouncebuf[0] = enable << 7;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_BRWR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 0));
+               struct spi_mem_op op = SPI_NOR_BRWR_OP(nor->bouncebuf);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -520,40 +570,6 @@ static int spansion_set_4byte_addr_mode(struct spi_nor *nor, bool enable)
        return ret;
 }
 
-/**
- * spi_nor_write_ear() - Write Extended Address Register.
- * @nor:       pointer to 'struct spi_nor'.
- * @ear:       value to write to the Extended Address Register.
- *
- * Return: 0 on success, -errno otherwise.
- */
-int spi_nor_write_ear(struct spi_nor *nor, u8 ear)
-{
-       int ret;
-
-       nor->bouncebuf[0] = ear;
-
-       if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREAR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 0));
-
-               spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
-
-               ret = spi_mem_exec_op(nor->spimem, &op);
-       } else {
-               ret = spi_nor_controller_ops_write_reg(nor, SPINOR_OP_WREAR,
-                                                      nor->bouncebuf, 1);
-       }
-
-       if (ret)
-               dev_dbg(nor->dev, "error %d writing EAR\n", ret);
-
-       return ret;
-}
-
 /**
  * spi_nor_sr_ready() - Query the Status Register to see if the flash is ready
  * for new commands.
@@ -649,11 +665,7 @@ int spi_nor_global_block_unlock(struct spi_nor *nor)
                return ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_GBULK, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = SPI_NOR_GBULK_OP;
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -688,11 +700,7 @@ int spi_nor_write_sr(struct spi_nor *nor, const u8 *sr, size_t len)
                return ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(len, sr, 0));
+               struct spi_mem_op op = SPI_NOR_WRSR_OP(sr, len);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -788,6 +796,15 @@ static int spi_nor_write_16bit_sr_and_check(struct spi_nor *nor, u8 sr1)
        if (ret)
                return ret;
 
+       ret = spi_nor_read_sr(nor, sr_cr);
+       if (ret)
+               return ret;
+
+       if (sr1 != sr_cr[0]) {
+               dev_dbg(nor->dev, "SR: Read back test failed\n");
+               return -EIO;
+       }
+
        if (nor->flags & SNOR_F_NO_READ_CR)
                return 0;
 
@@ -892,11 +909,7 @@ static int spi_nor_write_sr2(struct spi_nor *nor, const u8 *sr2)
                return ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR2, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(1, sr2, 0));
+               struct spi_mem_op op = SPI_NOR_WRSR2_OP(sr2);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -928,11 +941,7 @@ static int spi_nor_read_sr2(struct spi_nor *nor, u8 *sr2)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR2, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(1, sr2, 0));
+               struct spi_mem_op op = SPI_NOR_RDSR2_OP(sr2);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -961,11 +970,7 @@ static int spi_nor_erase_chip(struct spi_nor *nor)
        dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd.size >> 10));
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CHIP_ERASE, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = SPI_NOR_CHIP_ERASE_OP;
 
                spi_nor_spimem_setup_op(nor, &op, nor->write_proto);
 
@@ -1107,10 +1112,8 @@ int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 
        if (nor->spimem) {
                struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 0),
-                                  SPI_MEM_OP_ADDR(nor->addr_width, addr, 0),
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+                       SPI_NOR_SECTOR_ERASE_OP(nor->erase_opcode,
+                                               nor->addr_width, addr);
 
                spi_nor_spimem_setup_op(nor, &op, nor->write_proto);
 
@@ -1629,58 +1632,45 @@ static const struct spi_nor_manufacturer *manufacturers[] = {
        &spi_nor_xmc,
 };
 
-static const struct flash_info *
-spi_nor_search_part_by_id(const struct flash_info *parts, unsigned int nparts,
-                         const u8 *id)
+static const struct flash_info *spi_nor_match_id(struct spi_nor *nor,
+                                                const u8 *id)
 {
-       unsigned int i;
+       const struct flash_info *part;
+       unsigned int i, j;
 
-       for (i = 0; i < nparts; i++) {
-               if (parts[i].id_len &&
-                   !memcmp(parts[i].id, id, parts[i].id_len))
-                       return &parts[i];
+       for (i = 0; i < ARRAY_SIZE(manufacturers); i++) {
+               for (j = 0; j < manufacturers[i]->nparts; j++) {
+                       part = &manufacturers[i]->parts[j];
+                       if (part->id_len &&
+                           !memcmp(part->id, id, part->id_len)) {
+                               nor->manufacturer = manufacturers[i];
+                               return part;
+                       }
+               }
        }
 
        return NULL;
 }
 
-static const struct flash_info *spi_nor_read_id(struct spi_nor *nor)
+static const struct flash_info *spi_nor_detect(struct spi_nor *nor)
 {
        const struct flash_info *info;
        u8 *id = nor->bouncebuf;
-       unsigned int i;
        int ret;
 
-       if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(SPI_NOR_MAX_ID_LEN, id, 1));
-
-               ret = spi_mem_exec_op(nor->spimem, &op);
-       } else {
-               ret = nor->controller_ops->read_reg(nor, SPINOR_OP_RDID, id,
-                                                   SPI_NOR_MAX_ID_LEN);
-       }
+       ret = spi_nor_read_id(nor, 0, 0, id, nor->reg_proto);
        if (ret) {
                dev_dbg(nor->dev, "error %d reading JEDEC ID\n", ret);
                return ERR_PTR(ret);
        }
 
-       for (i = 0; i < ARRAY_SIZE(manufacturers); i++) {
-               info = spi_nor_search_part_by_id(manufacturers[i]->parts,
-                                                manufacturers[i]->nparts,
-                                                id);
-               if (info) {
-                       nor->manufacturer = manufacturers[i];
-                       return info;
-               }
+       info = spi_nor_match_id(nor, id);
+       if (!info) {
+               dev_err(nor->dev, "unrecognized JEDEC id bytes: %*ph\n",
+                       SPI_NOR_MAX_ID_LEN, id);
+               return ERR_PTR(-ENODEV);
        }
-
-       dev_err(nor->dev, "unrecognized JEDEC id bytes: %*ph\n",
-               SPI_NOR_MAX_ID_LEN, id);
-       return ERR_PTR(-ENODEV);
+       return info;
 }
 
 static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
@@ -1860,7 +1850,7 @@ int spi_nor_hwcaps_read2cmd(u32 hwcaps)
                                  ARRAY_SIZE(hwcaps_read2cmd));
 }
 
-static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
+int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
 {
        static const int hwcaps_pp2cmd[][2] = {
                { SNOR_HWCAPS_PP,               SNOR_CMD_PP },
@@ -1919,10 +1909,7 @@ static int spi_nor_spimem_check_op(struct spi_nor *nor,
 static int spi_nor_spimem_check_readop(struct spi_nor *nor,
                                       const struct spi_nor_read_command *read)
 {
-       struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(read->opcode, 0),
-                                         SPI_MEM_OP_ADDR(3, 0, 0),
-                                         SPI_MEM_OP_DUMMY(1, 0),
-                                         SPI_MEM_OP_DATA_IN(2, NULL, 0));
+       struct spi_mem_op op = SPI_NOR_READ_OP(read->opcode);
 
        spi_nor_spimem_setup_op(nor, &op, read->proto);
 
@@ -1945,10 +1932,7 @@ static int spi_nor_spimem_check_readop(struct spi_nor *nor,
 static int spi_nor_spimem_check_pp(struct spi_nor *nor,
                                   const struct spi_nor_pp_command *pp)
 {
-       struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(pp->opcode, 0),
-                                         SPI_MEM_OP_ADDR(3, 0, 0),
-                                         SPI_MEM_OP_NO_DUMMY,
-                                         SPI_MEM_OP_DATA_OUT(2, NULL, 0));
+       struct spi_mem_op op = SPI_NOR_PP_OP(pp->opcode);
 
        spi_nor_spimem_setup_op(nor, &op, pp->proto);
 
@@ -2772,10 +2756,7 @@ static void spi_nor_soft_reset(struct spi_nor *nor)
        struct spi_mem_op op;
        int ret;
 
-       op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_SRSTEN, 0),
-                       SPI_MEM_OP_NO_DUMMY,
-                       SPI_MEM_OP_NO_ADDR,
-                       SPI_MEM_OP_NO_DATA);
+       op = (struct spi_mem_op)SPINOR_SRSTEN_OP;
 
        spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -2785,10 +2766,7 @@ static void spi_nor_soft_reset(struct spi_nor *nor)
                return;
        }
 
-       op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_SRST, 0),
-                       SPI_MEM_OP_NO_DUMMY,
-                       SPI_MEM_OP_NO_ADDR,
-                       SPI_MEM_OP_NO_DATA);
+       op = (struct spi_mem_op)SPINOR_SRST_OP;
 
        spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
@@ -2876,8 +2854,8 @@ void spi_nor_restore(struct spi_nor *nor)
 }
 EXPORT_SYMBOL_GPL(spi_nor_restore);
 
-static const struct flash_info *spi_nor_match_id(struct spi_nor *nor,
-                                                const char *name)
+static const struct flash_info *spi_nor_match_name(struct spi_nor *nor,
+                                                  const char *name)
 {
        unsigned int i, j;
 
@@ -2899,12 +2877,10 @@ static const struct flash_info *spi_nor_get_flash_info(struct spi_nor *nor,
        const struct flash_info *info = NULL;
 
        if (name)
-               info = spi_nor_match_id(nor, name);
+               info = spi_nor_match_name(nor, name);
        /* Try to auto-detect if chip name wasn't specified or not found */
        if (!info)
-               info = spi_nor_read_id(nor);
-       if (IS_ERR_OR_NULL(info))
-               return ERR_PTR(-ENOENT);
+               return spi_nor_detect(nor);
 
        /*
         * If caller has specified name of flash model that can normally be
@@ -2913,7 +2889,7 @@ static const struct flash_info *spi_nor_get_flash_info(struct spi_nor *nor,
        if (name && info->id_len) {
                const struct flash_info *jinfo;
 
-               jinfo = spi_nor_read_id(nor);
+               jinfo = spi_nor_detect(nor);
                if (IS_ERR(jinfo)) {
                        return jinfo;
                } else if (jinfo != info) {
@@ -3156,6 +3132,8 @@ static int spi_nor_probe(struct spi_mem *spimem)
        if (ret)
                return ret;
 
+       spi_nor_debugfs_register(nor);
+
        /*
         * None of the existing parts have > 512B pages, but let's play safe
         * and add this logic so that if anyone ever adds support for such
index b7fd760e3b471d1d91bb6fe509e161add0741cf1..3f841ec36e564be0351a1af090e6e538caf963f6 100644 (file)
 
 #define SPI_NOR_MAX_ID_LEN     6
 
+/* Standard SPI NOR flash operations. */
+#define SPI_NOR_READID_OP(naddr, ndummy, buf, len)                     \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 0),                   \
+                  SPI_MEM_OP_ADDR(naddr, 0, 0),                        \
+                  SPI_MEM_OP_DUMMY(ndummy, 0),                         \
+                  SPI_MEM_OP_DATA_IN(len, buf, 0))
+
+#define SPI_NOR_WREN_OP                                                        \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPI_NOR_WRDI_OP                                                        \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPI_NOR_RDSR_OP(buf)                                           \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_IN(1, buf, 0))
+
+#define SPI_NOR_WRSR_OP(buf, len)                                      \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(len, buf, 0))
+
+#define SPI_NOR_RDSR2_OP(buf)                                          \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR2, 0),                  \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(1, buf, 0))
+
+#define SPI_NOR_WRSR2_OP(buf)                                          \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR2, 0),                  \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(1, buf, 0))
+
+#define SPI_NOR_RDCR_OP(buf)                                           \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDCR, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_IN(1, buf, 0))
+
+#define SPI_NOR_EN4B_EX4B_OP(enable)                                   \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B, 0), \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPI_NOR_BRWR_OP(buf)                                           \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_BRWR, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(1, buf, 0))
+
+#define SPI_NOR_GBULK_OP                                               \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_GBULK, 0),                  \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPI_NOR_CHIP_ERASE_OP                                          \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CHIP_ERASE, 0),             \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPI_NOR_SECTOR_ERASE_OP(opcode, addr_width, addr)              \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 0),                           \
+                  SPI_MEM_OP_ADDR(addr_width, addr, 0),                \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPI_NOR_READ_OP(opcode)                                                \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 0),                           \
+                  SPI_MEM_OP_ADDR(3, 0, 0),                            \
+                  SPI_MEM_OP_DUMMY(1, 0),                              \
+                  SPI_MEM_OP_DATA_IN(2, NULL, 0))
+
+#define SPI_NOR_PP_OP(opcode)                                          \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 0),                           \
+                  SPI_MEM_OP_ADDR(3, 0, 0),                            \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(2, NULL, 0))
+
+#define SPINOR_SRSTEN_OP                                               \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_SRSTEN, 0),                 \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DATA)
+
+#define SPINOR_SRST_OP                                                 \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_SRST, 0),                   \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DATA)
+
+/* Keep these in sync with the list in debugfs.c */
 enum spi_nor_option_flags {
        SNOR_F_HAS_SR_TB        = BIT(0),
        SNOR_F_NO_OP_CHIP_ERASE = BIT(1),
@@ -236,9 +340,10 @@ struct spi_nor_otp {
  * @writesize          Minimal writable flash unit size. Defaults to 1. Set to
  *                     ECC unit size for ECC-ed flashes.
  * @page_size:         the page size of the SPI NOR flash memory.
- * @rdsr_dummy:                dummy cycles needed for Read Status Register command.
+ * @rdsr_dummy:                dummy cycles needed for Read Status Register command
+ *                     in octal DTR mode.
  * @rdsr_addr_nbytes:  dummy address bytes needed for Read Status Register
- *                     command.
+ *                     command in octal DTR mode.
  * @hwcaps:            describes the read and page program hardware
  *                     capabilities.
  * @reads:             read capabilities ordered by priority: the higher index
@@ -526,7 +631,6 @@ void spi_nor_spimem_setup_op(const struct spi_nor *nor,
 int spi_nor_write_enable(struct spi_nor *nor);
 int spi_nor_write_disable(struct spi_nor *nor);
 int spi_nor_set_4byte_addr_mode(struct spi_nor *nor, bool enable);
-int spi_nor_write_ear(struct spi_nor *nor, u8 ear);
 int spi_nor_wait_till_ready(struct spi_nor *nor);
 int spi_nor_global_block_unlock(struct spi_nor *nor);
 int spi_nor_lock_and_prep(struct spi_nor *nor);
@@ -534,6 +638,8 @@ void spi_nor_unlock_and_unprep(struct spi_nor *nor);
 int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor);
 int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor);
 int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor);
+int spi_nor_read_id(struct spi_nor *nor, u8 naddr, u8 ndummy, u8 *id,
+                   enum spi_nor_protocol reg_proto);
 int spi_nor_read_sr(struct spi_nor *nor, u8 *sr);
 int spi_nor_sr_ready(struct spi_nor *nor);
 int spi_nor_read_cr(struct spi_nor *nor, u8 *cr);
@@ -545,6 +651,10 @@ ssize_t spi_nor_read_data(struct spi_nor *nor, loff_t from, size_t len,
                          u8 *buf);
 ssize_t spi_nor_write_data(struct spi_nor *nor, loff_t to, size_t len,
                           const u8 *buf);
+int spi_nor_read_any_reg(struct spi_nor *nor, struct spi_mem_op *op,
+                        enum spi_nor_protocol proto);
+int spi_nor_write_any_volatile_reg(struct spi_nor *nor, struct spi_mem_op *op,
+                                  enum spi_nor_protocol proto);
 int spi_nor_erase_sector(struct spi_nor *nor, u32 addr);
 
 int spi_nor_otp_read_secr(struct spi_nor *nor, loff_t addr, size_t len, u8 *buf);
@@ -555,6 +665,7 @@ int spi_nor_otp_lock_sr2(struct spi_nor *nor, unsigned int region);
 int spi_nor_otp_is_locked_sr2(struct spi_nor *nor, unsigned int region);
 
 int spi_nor_hwcaps_read2cmd(u32 hwcaps);
+int spi_nor_hwcaps_pp2cmd(u32 hwcaps);
 u8 spi_nor_convert_3to4_read(u8 opcode);
 void spi_nor_set_read_settings(struct spi_nor_read_command *read,
                               u8 num_mode_clocks,
@@ -590,4 +701,10 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
        return container_of(mtd, struct spi_nor, mtd);
 }
 
+#ifdef CONFIG_DEBUG_FS
+void spi_nor_debugfs_register(struct spi_nor *nor);
+#else
+static inline void spi_nor_debugfs_register(struct spi_nor *nor) {}
+#endif
+
 #endif /* __LINUX_MTD_SPI_NOR_INTERNAL_H */
diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c
new file mode 100644 (file)
index 0000000..eaf84f7
--- /dev/null
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mtd/spi-nor.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/debugfs.h>
+
+#include "core.h"
+
+#define SPI_NOR_DEBUGFS_ROOT "spi-nor"
+
+#define SNOR_F_NAME(name) [ilog2(SNOR_F_##name)] = #name
+static const char *const snor_f_names[] = {
+       SNOR_F_NAME(HAS_SR_TB),
+       SNOR_F_NAME(NO_OP_CHIP_ERASE),
+       SNOR_F_NAME(BROKEN_RESET),
+       SNOR_F_NAME(4B_OPCODES),
+       SNOR_F_NAME(HAS_4BAIT),
+       SNOR_F_NAME(HAS_LOCK),
+       SNOR_F_NAME(HAS_16BIT_SR),
+       SNOR_F_NAME(NO_READ_CR),
+       SNOR_F_NAME(HAS_SR_TB_BIT6),
+       SNOR_F_NAME(HAS_4BIT_BP),
+       SNOR_F_NAME(HAS_SR_BP3_BIT6),
+       SNOR_F_NAME(IO_MODE_EN_VOLATILE),
+       SNOR_F_NAME(SOFT_RESET),
+       SNOR_F_NAME(SWP_IS_VOLATILE),
+};
+#undef SNOR_F_NAME
+
+static const char *spi_nor_protocol_name(enum spi_nor_protocol proto)
+{
+       switch (proto) {
+       case SNOR_PROTO_1_1_1:     return "1S-1S-1S";
+       case SNOR_PROTO_1_1_2:     return "1S-1S-2S";
+       case SNOR_PROTO_1_1_4:     return "1S-1S-4S";
+       case SNOR_PROTO_1_1_8:     return "1S-1S-8S";
+       case SNOR_PROTO_1_2_2:     return "1S-2S-2S";
+       case SNOR_PROTO_1_4_4:     return "1S-4S-4S";
+       case SNOR_PROTO_1_8_8:     return "1S-8S-8S";
+       case SNOR_PROTO_2_2_2:     return "2S-2S-2S";
+       case SNOR_PROTO_4_4_4:     return "4S-4S-4S";
+       case SNOR_PROTO_8_8_8:     return "8S-8S-8S";
+       case SNOR_PROTO_1_1_1_DTR: return "1D-1D-1D";
+       case SNOR_PROTO_1_2_2_DTR: return "1D-2D-2D";
+       case SNOR_PROTO_1_4_4_DTR: return "1D-4D-4D";
+       case SNOR_PROTO_1_8_8_DTR: return "1D-8D-8D";
+       case SNOR_PROTO_8_8_8_DTR: return "8D-8D-8D";
+       }
+
+       return "<unknown>";
+}
+
+static void spi_nor_print_flags(struct seq_file *s, unsigned long flags,
+                               const char *const *names, int names_len)
+{
+       bool sep = false;
+       int i;
+
+       for (i = 0; i < sizeof(flags) * BITS_PER_BYTE; i++) {
+               if (!(flags & BIT(i)))
+                       continue;
+               if (sep)
+                       seq_puts(s, " | ");
+               sep = true;
+               if (i < names_len && names[i])
+                       seq_puts(s, names[i]);
+               else
+                       seq_printf(s, "1<<%d", i);
+       }
+}
+
+static int spi_nor_params_show(struct seq_file *s, void *data)
+{
+       struct spi_nor *nor = s->private;
+       struct spi_nor_flash_parameter *params = nor->params;
+       struct spi_nor_erase_map *erase_map = &params->erase_map;
+       struct spi_nor_erase_region *region;
+       const struct flash_info *info = nor->info;
+       char buf[16], *str;
+       int i;
+
+       seq_printf(s, "name\t\t%s\n", info->name);
+       seq_printf(s, "id\t\t%*ph\n", info->id_len, info->id);
+       string_get_size(params->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+       seq_printf(s, "size\t\t%s\n", buf);
+       seq_printf(s, "write size\t%u\n", params->writesize);
+       seq_printf(s, "page size\t%u\n", params->page_size);
+       seq_printf(s, "address width\t%u\n", nor->addr_width);
+
+       seq_puts(s, "flags\t\t");
+       spi_nor_print_flags(s, nor->flags, snor_f_names, sizeof(snor_f_names));
+       seq_puts(s, "\n");
+
+       seq_puts(s, "\nopcodes\n");
+       seq_printf(s, " read\t\t0x%02x\n", nor->read_opcode);
+       seq_printf(s, "  dummy cycles\t%u\n", nor->read_dummy);
+       seq_printf(s, " erase\t\t0x%02x\n", nor->erase_opcode);
+       seq_printf(s, " program\t0x%02x\n", nor->program_opcode);
+
+       switch (nor->cmd_ext_type) {
+       case SPI_NOR_EXT_NONE:
+               str = "none";
+               break;
+       case SPI_NOR_EXT_REPEAT:
+               str = "repeat";
+               break;
+       case SPI_NOR_EXT_INVERT:
+               str = "invert";
+               break;
+       default:
+               str = "<unknown>";
+               break;
+       }
+       seq_printf(s, " 8D extension\t%s\n", str);
+
+       seq_puts(s, "\nprotocols\n");
+       seq_printf(s, " read\t\t%s\n",
+                  spi_nor_protocol_name(nor->read_proto));
+       seq_printf(s, " write\t\t%s\n",
+                  spi_nor_protocol_name(nor->write_proto));
+       seq_printf(s, " register\t%s\n",
+                  spi_nor_protocol_name(nor->reg_proto));
+
+       seq_puts(s, "\nerase commands\n");
+       for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) {
+               struct spi_nor_erase_type *et = &erase_map->erase_type[i];
+
+               if (et->size) {
+                       string_get_size(et->size, 1, STRING_UNITS_2, buf,
+                                       sizeof(buf));
+                       seq_printf(s, " %02x (%s) [%d]\n", et->opcode, buf, i);
+               }
+       }
+
+       if (!(nor->flags & SNOR_F_NO_OP_CHIP_ERASE)) {
+               string_get_size(params->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+               seq_printf(s, " %02x (%s)\n", SPINOR_OP_CHIP_ERASE, buf);
+       }
+
+       seq_puts(s, "\nsector map\n");
+       seq_puts(s, " region (in hex)   | erase mask | flags\n");
+       seq_puts(s, " ------------------+------------+----------\n");
+       for (region = erase_map->regions;
+            region;
+            region = spi_nor_region_next(region)) {
+               u64 start = region->offset & ~SNOR_ERASE_FLAGS_MASK;
+               u64 flags = region->offset & SNOR_ERASE_FLAGS_MASK;
+               u64 end = start + region->size - 1;
+
+               seq_printf(s, " %08llx-%08llx |     [%c%c%c%c] | %s\n",
+                          start, end,
+                          flags & BIT(0) ? '0' : ' ',
+                          flags & BIT(1) ? '1' : ' ',
+                          flags & BIT(2) ? '2' : ' ',
+                          flags & BIT(3) ? '3' : ' ',
+                          flags & SNOR_OVERLAID_REGION ? "overlaid" : "");
+       }
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(spi_nor_params);
+
+static void spi_nor_print_read_cmd(struct seq_file *s, u32 cap,
+                                  struct spi_nor_read_command *cmd)
+{
+       seq_printf(s, " %s%s\n", spi_nor_protocol_name(cmd->proto),
+                  cap == SNOR_HWCAPS_READ_FAST ? " (fast read)" : "");
+       seq_printf(s, "  opcode\t0x%02x\n", cmd->opcode);
+       seq_printf(s, "  mode cycles\t%u\n", cmd->num_mode_clocks);
+       seq_printf(s, "  dummy cycles\t%u\n", cmd->num_wait_states);
+}
+
+static void spi_nor_print_pp_cmd(struct seq_file *s,
+                                struct spi_nor_pp_command *cmd)
+{
+       seq_printf(s, " %s\n", spi_nor_protocol_name(cmd->proto));
+       seq_printf(s, "  opcode\t0x%02x\n", cmd->opcode);
+}
+
+static int spi_nor_capabilities_show(struct seq_file *s, void *data)
+{
+       struct spi_nor *nor = s->private;
+       struct spi_nor_flash_parameter *params = nor->params;
+       u32 hwcaps = params->hwcaps.mask;
+       int i, cmd;
+
+       seq_puts(s, "Supported read modes by the flash\n");
+       for (i = 0; i < sizeof(hwcaps) * BITS_PER_BYTE; i++) {
+               if (!(hwcaps & BIT(i)))
+                       continue;
+
+               cmd = spi_nor_hwcaps_read2cmd(BIT(i));
+               if (cmd < 0)
+                       continue;
+
+               spi_nor_print_read_cmd(s, BIT(i), &params->reads[cmd]);
+               hwcaps &= ~BIT(i);
+       }
+
+       seq_puts(s, "\nSupported page program modes by the flash\n");
+       for (i = 0; i < sizeof(hwcaps) * BITS_PER_BYTE; i++) {
+               if (!(hwcaps & BIT(i)))
+                       continue;
+
+               cmd = spi_nor_hwcaps_pp2cmd(BIT(i));
+               if (cmd < 0)
+                       continue;
+
+               spi_nor_print_pp_cmd(s, &params->page_programs[cmd]);
+               hwcaps &= ~BIT(i);
+       }
+
+       if (hwcaps)
+               seq_printf(s, "\nunknown hwcaps 0x%x\n", hwcaps);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(spi_nor_capabilities);
+
+static void spi_nor_debugfs_unregister(void *data)
+{
+       struct spi_nor *nor = data;
+
+       debugfs_remove(nor->debugfs_root);
+       nor->debugfs_root = NULL;
+}
+
+void spi_nor_debugfs_register(struct spi_nor *nor)
+{
+       struct dentry *rootdir, *d;
+       int ret;
+
+       /* Create rootdir once. Will never be deleted again. */
+       rootdir = debugfs_lookup(SPI_NOR_DEBUGFS_ROOT, NULL);
+       if (!rootdir)
+               rootdir = debugfs_create_dir(SPI_NOR_DEBUGFS_ROOT, NULL);
+
+       ret = devm_add_action(nor->dev, spi_nor_debugfs_unregister, nor);
+       if (ret)
+               return;
+
+       d = debugfs_create_dir(dev_name(nor->dev), rootdir);
+       nor->debugfs_root = d;
+
+       debugfs_create_file("params", 0444, d, nor, &spi_nor_params_fops);
+       debugfs_create_file("capabilities", 0444, d, nor,
+                           &spi_nor_capabilities_fops);
+}
index 8c1c575302813061ec4e707078c8566bda1660c5..50a11053711f7bcd467f740f46bdeb1ad3d816b1 100644 (file)
@@ -25,7 +25,8 @@ static const struct flash_info eon_nor_parts[] = {
        { "en25qh64",   INFO(0x1c7017, 0, 64 * 1024,  128)
                NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ) },
        { "en25qh128",  INFO(0x1c7018, 0, 64 * 1024,  256) },
-       { "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512) },
+       { "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512)
+               PARSE_SFDP },
        { "en25s64",    INFO(0x1c3817, 0, 64 * 1024,  128)
                NO_SFDP_FLAGS(SECT_4K) },
 };
index 8a20475ce77a1693f865b53a0067199ebafa9ba9..a96f74e0f568a50ce544ce0f13d4892e599fff56 100644 (file)
 #define FSR_P_ERR              BIT(4)  /* Program operation status */
 #define FSR_PT_ERR             BIT(1)  /* Protection error bit */
 
-static int micron_st_nor_octal_dtr_enable(struct spi_nor *nor, bool enable)
+/* Micron ST SPI NOR flash operations. */
+#define MICRON_ST_NOR_WR_ANY_REG_OP(naddr, addr, ndata, buf)           \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_MT_WR_ANY_REG, 0),          \
+                  SPI_MEM_OP_ADDR(naddr, addr, 0),                     \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(ndata, buf, 0))
+
+#define MICRON_ST_RDFSR_OP(buf)                                                \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 0),                  \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_IN(1, buf, 0))
+
+#define MICRON_ST_CLFSR_OP                                             \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLFSR, 0),                  \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+static int micron_st_nor_octal_dtr_en(struct spi_nor *nor)
 {
        struct spi_mem_op op;
        u8 *buf = nor->bouncebuf;
        int ret;
 
-       if (enable) {
-               /* Use 20 dummy cycles for memory array reads. */
-               ret = spi_nor_write_enable(nor);
-               if (ret)
-                       return ret;
-
-               *buf = 20;
-               op = (struct spi_mem_op)
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_MT_WR_ANY_REG, 1),
-                                  SPI_MEM_OP_ADDR(3, SPINOR_REG_MT_CFR1V, 1),
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(1, buf, 1));
-
-               ret = spi_mem_exec_op(nor->spimem, &op);
-               if (ret)
-                       return ret;
-
-               ret = spi_nor_wait_till_ready(nor);
-               if (ret)
-                       return ret;
-       }
+       /* Use 20 dummy cycles for memory array reads. */
+       *buf = 20;
+       op = (struct spi_mem_op)
+               MICRON_ST_NOR_WR_ANY_REG_OP(3, SPINOR_REG_MT_CFR1V, 1, buf);
+       ret = spi_nor_write_any_volatile_reg(nor, &op, nor->reg_proto);
+       if (ret)
+               return ret;
 
-       ret = spi_nor_write_enable(nor);
+       buf[0] = SPINOR_MT_OCT_DTR;
+       op = (struct spi_mem_op)
+               MICRON_ST_NOR_WR_ANY_REG_OP(3, SPINOR_REG_MT_CFR0V, 1, buf);
+       ret = spi_nor_write_any_volatile_reg(nor, &op, nor->reg_proto);
        if (ret)
                return ret;
 
-       if (enable) {
-               buf[0] = SPINOR_MT_OCT_DTR;
-       } else {
-               /*
-                * The register is 1-byte wide, but 1-byte transactions are not
-                * allowed in 8D-8D-8D mode. The next register is the dummy
-                * cycle configuration register. Since the transaction needs to
-                * be at least 2 bytes wide, set the next register to its
-                * default value. This also makes sense because the value was
-                * changed when enabling 8D-8D-8D mode, it should be reset when
-                * disabling.
-                */
-               buf[0] = SPINOR_MT_EXSPI;
-               buf[1] = SPINOR_REG_MT_CFR1V_DEF;
+       /* Read flash ID to make sure the switch was successful. */
+       ret = spi_nor_read_id(nor, 0, 8, buf, SNOR_PROTO_8_8_8_DTR);
+       if (ret) {
+               dev_dbg(nor->dev, "error %d reading JEDEC ID after enabling 8D-8D-8D mode\n", ret);
+               return ret;
        }
 
-       op = (struct spi_mem_op)
-               SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_MT_WR_ANY_REG, 1),
-                          SPI_MEM_OP_ADDR(enable ? 3 : 4,
-                                          SPINOR_REG_MT_CFR0V, 1),
-                          SPI_MEM_OP_NO_DUMMY,
-                          SPI_MEM_OP_DATA_OUT(enable ? 1 : 2, buf, 1));
+       if (memcmp(buf, nor->info->id, nor->info->id_len))
+               return -EINVAL;
 
-       if (!enable)
-               spi_nor_spimem_setup_op(nor, &op, SNOR_PROTO_8_8_8_DTR);
+       return 0;
+}
 
-       ret = spi_mem_exec_op(nor->spimem, &op);
+static int micron_st_nor_octal_dtr_dis(struct spi_nor *nor)
+{
+       struct spi_mem_op op;
+       u8 *buf = nor->bouncebuf;
+       int ret;
+
+       /*
+        * The register is 1-byte wide, but 1-byte transactions are not allowed
+        * in 8D-8D-8D mode. The next register is the dummy cycle configuration
+        * register. Since the transaction needs to be at least 2 bytes wide,
+        * set the next register to its default value. This also makes sense
+        * because the value was changed when enabling 8D-8D-8D mode, it should
+        * be reset when disabling.
+        */
+       buf[0] = SPINOR_MT_EXSPI;
+       buf[1] = SPINOR_REG_MT_CFR1V_DEF;
+       op = (struct spi_mem_op)
+               MICRON_ST_NOR_WR_ANY_REG_OP(4, SPINOR_REG_MT_CFR0V, 2, buf);
+       ret = spi_nor_write_any_volatile_reg(nor, &op, SNOR_PROTO_8_8_8_DTR);
        if (ret)
                return ret;
 
        /* Read flash ID to make sure the switch was successful. */
-       op = (struct spi_mem_op)
-               SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1),
-                          SPI_MEM_OP_NO_ADDR,
-                          SPI_MEM_OP_DUMMY(enable ? 8 : 0, 1),
-                          SPI_MEM_OP_DATA_IN(round_up(nor->info->id_len, 2),
-                                             buf, 1));
-
-       if (enable)
-               spi_nor_spimem_setup_op(nor, &op, SNOR_PROTO_8_8_8_DTR);
-
-       ret = spi_mem_exec_op(nor->spimem, &op);
-       if (ret)
+       ret = spi_nor_read_id(nor, 0, 0, buf, SNOR_PROTO_1_1_1);
+       if (ret) {
+               dev_dbg(nor->dev, "error %d reading JEDEC ID after disabling 8D-8D-8D mode\n", ret);
                return ret;
+       }
 
        if (memcmp(buf, nor->info->id, nor->info->id_len))
                return -EINVAL;
@@ -111,6 +116,12 @@ static int micron_st_nor_octal_dtr_enable(struct spi_nor *nor, bool enable)
        return 0;
 }
 
+static int micron_st_nor_octal_dtr_enable(struct spi_nor *nor, bool enable)
+{
+       return enable ? micron_st_nor_octal_dtr_en(nor) :
+                       micron_st_nor_octal_dtr_dis(nor);
+}
+
 static void mt35xu512aba_default_init(struct spi_nor *nor)
 {
        nor->params->octal_dtr_enable = micron_st_nor_octal_dtr_enable;
@@ -322,11 +333,7 @@ static int micron_st_nor_read_fsr(struct spi_nor *nor, u8 *fsr)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(1, fsr, 0));
+               struct spi_mem_op op = MICRON_ST_RDFSR_OP(fsr);
 
                if (nor->reg_proto == SNOR_PROTO_8_8_8_DTR) {
                        op.addr.nbytes = nor->params->rdsr_addr_nbytes;
@@ -361,11 +368,7 @@ static void micron_st_nor_clear_fsr(struct spi_nor *nor)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLFSR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = MICRON_ST_CLFSR_OP;
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
index f24e546e04a550e8e586e5afcf6eb1edc5b120dc..43cd6cd925371637194af418941e04468a15a56b 100644 (file)
 #define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS    0
 #define SPINOR_OP_CYPRESS_RD_FAST              0xee
 
-/**
- * cypress_nor_octal_dtr_enable() - Enable octal DTR on Cypress flashes.
- * @nor:               pointer to a 'struct spi_nor'
- * @enable:              whether to enable or disable Octal DTR
- *
- * This also sets the memory access latency cycles to 24 to allow the flash to
- * run at up to 200MHz.
- *
- * Return: 0 on success, -errno otherwise.
- */
-static int cypress_nor_octal_dtr_enable(struct spi_nor *nor, bool enable)
+/* Cypress SPI NOR flash operations. */
+#define CYPRESS_NOR_WR_ANY_REG_OP(naddr, addr, ndata, buf)             \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WR_ANY_REG, 0),             \
+                  SPI_MEM_OP_ADDR(naddr, addr, 0),                     \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(ndata, buf, 0))
+
+#define CYPRESS_NOR_RD_ANY_REG_OP(naddr, addr, buf)                    \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RD_ANY_REG, 0),             \
+                  SPI_MEM_OP_ADDR(naddr, addr, 0),                     \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_IN(1, buf, 0))
+
+#define SPANSION_CLSR_OP                                               \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLSR, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_NO_DATA)
+
+static int cypress_nor_octal_dtr_en(struct spi_nor *nor)
 {
        struct spi_mem_op op;
        u8 *buf = nor->bouncebuf;
        int ret;
 
-       if (enable) {
-               /* Use 24 dummy cycles for memory array reads. */
-               ret = spi_nor_write_enable(nor);
-               if (ret)
-                       return ret;
-
-               *buf = SPINOR_REG_CYPRESS_CFR2V_MEMLAT_11_24;
-               op = (struct spi_mem_op)
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WR_ANY_REG, 1),
-                                  SPI_MEM_OP_ADDR(3, SPINOR_REG_CYPRESS_CFR2V,
-                                                  1),
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_OUT(1, buf, 1));
+       /* Use 24 dummy cycles for memory array reads. */
+       *buf = SPINOR_REG_CYPRESS_CFR2V_MEMLAT_11_24;
+       op = (struct spi_mem_op)
+               CYPRESS_NOR_WR_ANY_REG_OP(3, SPINOR_REG_CYPRESS_CFR2V, 1, buf);
 
-               ret = spi_mem_exec_op(nor->spimem, &op);
-               if (ret)
-                       return ret;
+       ret = spi_nor_write_any_volatile_reg(nor, &op, nor->reg_proto);
+       if (ret)
+               return ret;
 
-               ret = spi_nor_wait_till_ready(nor);
-               if (ret)
-                       return ret;
+       nor->read_dummy = 24;
 
-               nor->read_dummy = 24;
-       }
+       /* Set the octal and DTR enable bits. */
+       buf[0] = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN;
+       op = (struct spi_mem_op)
+               CYPRESS_NOR_WR_ANY_REG_OP(3, SPINOR_REG_CYPRESS_CFR5V, 1, buf);
 
-       /* Set/unset the octal and DTR enable bits. */
-       ret = spi_nor_write_enable(nor);
+       ret = spi_nor_write_any_volatile_reg(nor, &op, nor->reg_proto);
        if (ret)
                return ret;
 
-       if (enable) {
-               buf[0] = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN;
-       } else {
-               /*
-                * The register is 1-byte wide, but 1-byte transactions are not
-                * allowed in 8D-8D-8D mode. Since there is no register at the
-                * next location, just initialize the value to 0 and let the
-                * transaction go on.
-                */
-               buf[0] = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS;
-               buf[1] = 0;
+       /* Read flash ID to make sure the switch was successful. */
+       ret = spi_nor_read_id(nor, 4, 3, buf, SNOR_PROTO_8_8_8_DTR);
+       if (ret) {
+               dev_dbg(nor->dev, "error %d reading JEDEC ID after enabling 8D-8D-8D mode\n", ret);
+               return ret;
        }
 
-       op = (struct spi_mem_op)
-               SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WR_ANY_REG, 1),
-                          SPI_MEM_OP_ADDR(enable ? 3 : 4,
-                                          SPINOR_REG_CYPRESS_CFR5V,
-                                          1),
-                          SPI_MEM_OP_NO_DUMMY,
-                          SPI_MEM_OP_DATA_OUT(enable ? 1 : 2, buf, 1));
+       if (memcmp(buf, nor->info->id, nor->info->id_len))
+               return -EINVAL;
 
-       if (!enable)
-               spi_nor_spimem_setup_op(nor, &op, SNOR_PROTO_8_8_8_DTR);
+       return 0;
+}
 
-       ret = spi_mem_exec_op(nor->spimem, &op);
+static int cypress_nor_octal_dtr_dis(struct spi_nor *nor)
+{
+       struct spi_mem_op op;
+       u8 *buf = nor->bouncebuf;
+       int ret;
+
+       /*
+        * The register is 1-byte wide, but 1-byte transactions are not allowed
+        * in 8D-8D-8D mode. Since there is no register at the next location,
+        * just initialize the value to 0 and let the transaction go on.
+        */
+       buf[0] = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS;
+       buf[1] = 0;
+       op = (struct spi_mem_op)
+               CYPRESS_NOR_WR_ANY_REG_OP(4, SPINOR_REG_CYPRESS_CFR5V, 2, buf);
+       ret = spi_nor_write_any_volatile_reg(nor, &op, SNOR_PROTO_8_8_8_DTR);
        if (ret)
                return ret;
 
        /* Read flash ID to make sure the switch was successful. */
-       op = (struct spi_mem_op)
-               SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1),
-                          SPI_MEM_OP_ADDR(enable ? 4 : 0, 0, 1),
-                          SPI_MEM_OP_DUMMY(enable ? 3 : 0, 1),
-                          SPI_MEM_OP_DATA_IN(round_up(nor->info->id_len, 2),
-                                             buf, 1));
-
-       if (enable)
-               spi_nor_spimem_setup_op(nor, &op, SNOR_PROTO_8_8_8_DTR);
-
-       ret = spi_mem_exec_op(nor->spimem, &op);
-       if (ret)
+       ret = spi_nor_read_id(nor, 0, 0, buf, SNOR_PROTO_1_1_1);
+       if (ret) {
+               dev_dbg(nor->dev, "error %d reading JEDEC ID after disabling 8D-8D-8D mode\n", ret);
                return ret;
+       }
 
        if (memcmp(buf, nor->info->id, nor->info->id_len))
                return -EINVAL;
@@ -118,6 +113,22 @@ static int cypress_nor_octal_dtr_enable(struct spi_nor *nor, bool enable)
        return 0;
 }
 
+/**
+ * cypress_nor_octal_dtr_enable() - Enable octal DTR on Cypress flashes.
+ * @nor:               pointer to a 'struct spi_nor'
+ * @enable:              whether to enable or disable Octal DTR
+ *
+ * This also sets the memory access latency cycles to 24 to allow the flash to
+ * run at up to 200MHz.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int cypress_nor_octal_dtr_enable(struct spi_nor *nor, bool enable)
+{
+       return enable ? cypress_nor_octal_dtr_en(nor) :
+                       cypress_nor_octal_dtr_dis(nor);
+}
+
 static void s28hs512t_default_init(struct spi_nor *nor)
 {
        nor->params->octal_dtr_enable = cypress_nor_octal_dtr_enable;
@@ -162,12 +173,12 @@ static int s28hs512t_post_bfpt_fixup(struct spi_nor *nor,
         * CFR3V[4] and set the correct size.
         */
        struct spi_mem_op op =
-               SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RD_ANY_REG, 1),
-                          SPI_MEM_OP_ADDR(3, SPINOR_REG_CYPRESS_CFR3V, 1),
-                          SPI_MEM_OP_NO_DUMMY,
-                          SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+               CYPRESS_NOR_RD_ANY_REG_OP(3, SPINOR_REG_CYPRESS_CFR3V,
+                                         nor->bouncebuf);
        int ret;
 
+       spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
+
        ret = spi_mem_exec_op(nor->spimem, &op);
        if (ret)
                return ret;
@@ -317,11 +328,7 @@ static void spansion_nor_clear_sr(struct spi_nor *nor)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLSR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_NO_DATA);
+               struct spi_mem_op op = SPANSION_CLSR_OP;
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
index fe80dffc2e7038b3c2b875a73ed08181e662cb49..ffaa2405525989b9ebde133a5f287a859bc217a7 100644 (file)
@@ -8,6 +8,15 @@
 
 #include "core.h"
 
+#define WINBOND_NOR_OP_RDEAR   0xc8    /* Read Extended Address Register */
+#define WINBOND_NOR_OP_WREAR   0xc5    /* Write Extended Address Register */
+
+#define WINBOND_NOR_WREAR_OP(buf)                                      \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(WINBOND_NOR_OP_WREAR, 0),             \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_OUT(1, buf, 0))
+
 static int
 w25q256_post_bfpt_fixups(struct spi_nor *nor,
                         const struct sfdp_parameter_header *bfpt_header,
@@ -124,11 +133,45 @@ static const struct flash_info winbond_nor_parts[] = {
        { "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024)
                NO_SFDP_FLAGS(SECT_4K | SPI_NOR_QUAD_READ |
                              SPI_NOR_DUAL_READ) },
+       { "w25q512nwm", INFO(0xef8020, 0, 64 * 1024, 1024)
+               PARSE_SFDP
+               OTP_INFO(256, 3, 0x1000, 0x1000) },
        { "w25q512jvq", INFO(0xef4020, 0, 64 * 1024, 1024)
                NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ |
                              SPI_NOR_QUAD_READ) },
 };
 
+/**
+ * winbond_nor_write_ear() - Write Extended Address Register.
+ * @nor:       pointer to 'struct spi_nor'.
+ * @ear:       value to write to the Extended Address Register.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int winbond_nor_write_ear(struct spi_nor *nor, u8 ear)
+{
+       int ret;
+
+       nor->bouncebuf[0] = ear;
+
+       if (nor->spimem) {
+               struct spi_mem_op op = WINBOND_NOR_WREAR_OP(nor->bouncebuf);
+
+               spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
+
+               ret = spi_mem_exec_op(nor->spimem, &op);
+       } else {
+               ret = spi_nor_controller_ops_write_reg(nor,
+                                                      WINBOND_NOR_OP_WREAR,
+                                                      nor->bouncebuf, 1);
+       }
+
+       if (ret)
+               dev_dbg(nor->dev, "error %d writing EAR\n", ret);
+
+       return ret;
+}
+
 /**
  * winbond_nor_set_4byte_addr_mode() - Set 4-byte address mode for Winbond
  * flashes.
@@ -155,7 +198,7 @@ static int winbond_nor_set_4byte_addr_mode(struct spi_nor *nor, bool enable)
        if (ret)
                return ret;
 
-       ret = spi_nor_write_ear(nor, 0);
+       ret = winbond_nor_write_ear(nor, 0);
        if (ret)
                return ret;
 
index 9459ac2609dc0252fddf1d4714bf7f9084559224..1d2f5db047bd002267ab71ba582538990ccda4e3 100644 (file)
 #define XSR_PAGESIZE           BIT(0)  /* Page size in Po2 or Linear */
 #define XSR_RDY                        BIT(7)  /* Ready */
 
+#define XILINX_RDSR_OP(buf)                                            \
+       SPI_MEM_OP(SPI_MEM_OP_CMD(XILINX_OP_RDSR, 0),                   \
+                  SPI_MEM_OP_NO_ADDR,                                  \
+                  SPI_MEM_OP_NO_DUMMY,                                 \
+                  SPI_MEM_OP_DATA_IN(1, buf, 0))
+
 #define S3AN_INFO(_jedec_id, _n_sectors, _page_size)                   \
                .id = {                                                 \
                        ((_jedec_id) >> 16) & 0xff,                     \
@@ -72,11 +78,7 @@ static int xilinx_nor_read_sr(struct spi_nor *nor, u8 *sr)
        int ret;
 
        if (nor->spimem) {
-               struct spi_mem_op op =
-                       SPI_MEM_OP(SPI_MEM_OP_CMD(XILINX_OP_RDSR, 0),
-                                  SPI_MEM_OP_NO_ADDR,
-                                  SPI_MEM_OP_NO_DUMMY,
-                                  SPI_MEM_OP_DATA_IN(1, sr, 0));
+               struct spi_mem_op op = XILINX_RDSR_OP(sr);
 
                spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
 
index b3b5bc1c803b3c4fd6d0428b2372a64b82de85f0..088bb1bcf1efb6cd68867efdeeb6a37ad727be29 100644 (file)
@@ -1495,34 +1495,22 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
                err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
                if (err)
                        return err;
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_30X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_30X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_30X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X;
                break;
        case 31:
                /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */
                err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
                if (err)
                        return err;
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_31X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_31X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
                break;
        case 32:
        case 33:
                /* Support both MCAN version v3.2.x and v3.3.0 */
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_31X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_31X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
 
                cdev->can.ctrlmode_supported |=
                        (m_can_niso_supported(cdev) ?
index 2c5d40997168616ca1ab85d1235560c654f2dfe7..d18b515e6ccc76c33660ce9fd0bed098ebf18559 100644 (file)
@@ -85,9 +85,6 @@ struct m_can_classdev {
        struct sk_buff *tx_skb;
        struct phy *transceiver;
 
-       const struct can_bittiming_const *bit_timing;
-       const struct can_bittiming_const *data_timing;
-
        struct m_can_ops *ops;
 
        int version;
index b56a54d6c5a9c4d274ecc76b637fbaa02ff5cf84..8f184a852a0a7c7476eaa1cf231ab1e4d46a5dc0 100644 (file)
 
 #define M_CAN_PCI_MMIO_BAR             0
 
+#define M_CAN_CLOCK_FREQ_EHL           200000000
 #define CTL_CSR_INT_CTL_OFFSET         0x508
 
-struct m_can_pci_config {
-       const struct can_bittiming_const *bit_timing;
-       const struct can_bittiming_const *data_timing;
-       unsigned int clock_freq;
-};
-
 struct m_can_pci_priv {
        struct m_can_classdev cdev;
 
@@ -89,40 +84,9 @@ static struct m_can_ops m_can_pci_ops = {
        .read_fifo = iomap_read_fifo,
 };
 
-static const struct can_bittiming_const m_can_bittiming_const_ehl = {
-       .name = KBUILD_MODNAME,
-       .tseg1_min = 2,         /* Time segment 1 = prop_seg + phase_seg1 */
-       .tseg1_max = 64,
-       .tseg2_min = 1,         /* Time segment 2 = phase_seg2 */
-       .tseg2_max = 128,
-       .sjw_max = 128,
-       .brp_min = 1,
-       .brp_max = 512,
-       .brp_inc = 1,
-};
-
-static const struct can_bittiming_const m_can_data_bittiming_const_ehl = {
-       .name = KBUILD_MODNAME,
-       .tseg1_min = 2,         /* Time segment 1 = prop_seg + phase_seg1 */
-       .tseg1_max = 16,
-       .tseg2_min = 1,         /* Time segment 2 = phase_seg2 */
-       .tseg2_max = 8,
-       .sjw_max = 4,
-       .brp_min = 1,
-       .brp_max = 32,
-       .brp_inc = 1,
-};
-
-static const struct m_can_pci_config m_can_pci_ehl = {
-       .bit_timing = &m_can_bittiming_const_ehl,
-       .data_timing = &m_can_data_bittiming_const_ehl,
-       .clock_freq = 200000000,
-};
-
 static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
        struct device *dev = &pci->dev;
-       const struct m_can_pci_config *cfg;
        struct m_can_classdev *mcan_class;
        struct m_can_pci_priv *priv;
        void __iomem *base;
@@ -150,8 +114,6 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
        if (!mcan_class)
                return -ENOMEM;
 
-       cfg = (const struct m_can_pci_config *)id->driver_data;
-
        priv = cdev_to_priv(mcan_class);
 
        priv->base = base;
@@ -163,9 +125,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
        mcan_class->dev = &pci->dev;
        mcan_class->net->irq = pci_irq_vector(pci, 0);
        mcan_class->pm_clock_support = 1;
-       mcan_class->bit_timing = cfg->bit_timing;
-       mcan_class->data_timing = cfg->data_timing;
-       mcan_class->can.clock.freq = cfg->clock_freq;
+       mcan_class->can.clock.freq = id->driver_data;
        mcan_class->ops = &m_can_pci_ops;
 
        pci_set_drvdata(pci, mcan_class);
@@ -218,8 +178,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops,
                         m_can_pci_suspend, m_can_pci_resume);
 
 static const struct pci_device_id m_can_pci_id_table[] = {
-       { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, },
-       { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, },
+       { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, },
+       { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, },
        {  }    /* Terminating Entry */
 };
 MODULE_DEVICE_TABLE(pci, m_can_pci_id_table);
index cf82b1fa972529494eaaa525140eb85e766d983a..87e81c636339f9720260807521f917ff57f74fb9 100644 (file)
@@ -809,6 +809,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 reg, offset;
 
+       if (priv->wol_ports_mask & BIT(port))
+               return;
+
        if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
                if (priv->type == BCM4908_DEVICE_ID ||
                    priv->type == BCM7445_DEVICE_ID)
index 9e28219b223df95473dd118af43cc63b85fa4a3b..faccfb3f015836e76ba787e68a557eff3d22c834 100644 (file)
@@ -403,6 +403,7 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds,
 {
        struct ocelot *ocelot = ds->priv;
        struct felix *felix = ocelot_to_felix(ocelot);
+       struct ocelot_vcap_block *block_vcap_is2;
        struct ocelot_vcap_filter *trap;
        enum ocelot_mask_mode mask_mode;
        unsigned long port_mask;
@@ -422,9 +423,13 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds,
        /* We are sure that "cpu" was found, otherwise
         * dsa_tree_setup_default_cpu() would have failed earlier.
         */
+       block_vcap_is2 = &ocelot->block[VCAP_IS2];
 
        /* Make sure all traps are set up for that destination */
-       list_for_each_entry(trap, &ocelot->traps, trap_list) {
+       list_for_each_entry(trap, &block_vcap_is2->rules, list) {
+               if (!trap->is_trap)
+                       continue;
+
                /* Figure out the current trapping destination */
                if (using_tag_8021q) {
                        /* Redirect to the tag_8021q CPU port. If timestamps
index 3a529ee8c834061666a55bb2bb1ff62853780876..831833911a52562a5bbb4f14020b9ad2d5c8f1ea 100644 (file)
@@ -449,7 +449,7 @@ static int aq_pm_freeze(struct device *dev)
 
 static int aq_pm_suspend_poweroff(struct device *dev)
 {
-       return aq_suspend_common(dev, false);
+       return aq_suspend_common(dev, true);
 }
 
 static int aq_pm_thaw(struct device *dev)
@@ -459,7 +459,7 @@ static int aq_pm_thaw(struct device *dev)
 
 static int aq_pm_resume_restore(struct device *dev)
 {
-       return atl_resume_common(dev, false);
+       return atl_resume_common(dev, true);
 }
 
 static const struct dev_pm_ops aq_pm_ops = {
index 77e76c9efd32f70fa509ad9f2f4a3f7fe8acc1b0..8201ce7adb7777eea0390f615fe7b26b7ed06c3c 100644 (file)
@@ -346,7 +346,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                     int budget)
 {
        struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
-       bool is_rsc_completed = true;
        int err = 0;
 
        for (; (self->sw_head != self->hw_head) && budget;
@@ -364,12 +363,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                        continue;
 
                if (!buff->is_eop) {
+                       unsigned int frag_cnt = 0U;
                        buff_ = buff;
                        do {
+                               bool is_rsc_completed = true;
+
                                if (buff_->next >= self->size) {
                                        err = -EIO;
                                        goto err_exit;
                                }
+
+                               frag_cnt++;
                                next_ = buff_->next,
                                buff_ = &self->buff_ring[next_];
                                is_rsc_completed =
@@ -377,18 +381,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                                            next_,
                                                            self->hw_head);
 
-                               if (unlikely(!is_rsc_completed))
-                                       break;
+                               if (unlikely(!is_rsc_completed) ||
+                                               frag_cnt > MAX_SKB_FRAGS) {
+                                       err = 0;
+                                       goto err_exit;
+                               }
 
                                buff->is_error |= buff_->is_error;
                                buff->is_cso_err |= buff_->is_cso_err;
 
                        } while (!buff_->is_eop);
 
-                       if (!is_rsc_completed) {
-                               err = 0;
-                               goto err_exit;
-                       }
                        if (buff->is_error ||
                            (buff->is_lro && buff->is_cso_err)) {
                                buff_ = buff;
@@ -446,7 +449,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                       ALIGN(hdr_len, sizeof(long)));
 
                if (buff->len - hdr_len > 0) {
-                       skb_add_rx_frag(skb, 0, buff->rxdata.page,
+                       skb_add_rx_frag(skb, i++, buff->rxdata.page,
                                        buff->rxdata.pg_off + hdr_len,
                                        buff->len - hdr_len,
                                        AQ_CFG_RX_FRAME_MAX);
@@ -455,7 +458,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 
                if (!buff->is_eop) {
                        buff_ = buff;
-                       i = 1U;
                        do {
                                next_ = buff_->next;
                                buff_ = &self->buff_ring[next_];
index d875ce3ec759bbd0e557ce97f6c7f9cef96af8b4..15ede7285fb5d1140cd0afec5fcfbd85d25ef965 100644 (file)
@@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
                err = -ENXIO;
                goto err_exit;
        }
+
+       /* Validate that the new hw_head_ is reasonable. */
+       if (hw_head_ >= ring->size) {
+               err = -ENXIO;
+               goto err_exit;
+       }
+
        ring->hw_head = hw_head_;
        err = aq_hw_err_from_flags(self);
 
index 60dde29974bfea53f8092fda078f348d36c6a6ef..df51be3cbe06906550c627c3265657fc1828a92a 100644 (file)
@@ -2585,8 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
                device_set_wakeup_capable(&pdev->dev, 1);
 
        priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol");
-       if (IS_ERR(priv->wol_clk))
-               return PTR_ERR(priv->wol_clk);
+       if (IS_ERR(priv->wol_clk)) {
+               ret = PTR_ERR(priv->wol_clk);
+               goto err_deregister_fixed_link;
+       }
 
        /* Set the needed headroom once and for all */
        BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
index bf1ec8fdc2adc0a62e06393c41bca7e1afa70c54..e87e46c47387ed5235a84cc2451631fb76ab398f 100644 (file)
@@ -3999,6 +3999,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
                goto err;
        }
        priv->wol_irq = platform_get_irq_optional(pdev, 2);
+       if (priv->wol_irq == -EPROBE_DEFER) {
+               err = priv->wol_irq;
+               goto err;
+       }
 
        priv->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->base)) {
index e475be29845c669a0bf734ddc654790339bf96aa..61284baa0496ecef02de133435964e88debab07c 100644 (file)
@@ -1219,7 +1219,6 @@ static void gem_rx_refill(struct macb_queue *queue)
                /* Make hw descriptor updates visible to CPU */
                rmb();
 
-               queue->rx_prepared_head++;
                desc = macb_rx_desc(queue, entry);
 
                if (!queue->rx_skbuff[entry]) {
@@ -1258,6 +1257,7 @@ static void gem_rx_refill(struct macb_queue *queue)
                        dma_wmb();
                        desc->addr &= ~MACB_BIT(RX_USED);
                }
+               queue->rx_prepared_head++;
        }
 
        /* Make descriptor updates visible to hardware */
index e7b4e3ed056c725cb832302c2efec6d0416a283d..8d719f82854a9d3acb582ba3b8af13479d29771d 100644 (file)
@@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
                goto out;
        na = ret;
 
-       memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN));
+       memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN));
        strim(p->id);
-       memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN));
+       memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN));
        strim(p->sn);
-       memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN));
+       memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN));
        strim(p->pn);
-       memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN));
-       strim((char *)p->na);
+       memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN));
+       strim(p->na);
 
 out:
        vfree(vpd);
index 79df5a72877b83b4a01429c0e929ff86aa706e08..0040dcaab9455f7cac35286e0cb507ab6a1bed2b 100644 (file)
@@ -1399,8 +1399,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        /* alloc_etherdev ensures aligned and zeroed private structures */
        dev = alloc_etherdev (sizeof (*tp));
-       if (!dev)
+       if (!dev) {
+               pci_disable_device(pdev);
                return -ENOMEM;
+       }
 
        SET_NETDEV_DEV(dev, &pdev->dev);
        if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
@@ -1785,6 +1787,7 @@ err_out_free_res:
 
 err_out_free_netdev:
        free_netdev (dev);
+       pci_disable_device(pdev);
        return -ENODEV;
 }
 
index caf48023f8ea5419cf5aec5ddd096f11e8ae43b3..5231818943c6e12f8cad1dd065129a538acf83c8 100644 (file)
@@ -1928,6 +1928,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
        /* AST2400  doesn't have working HW checksum generation */
        if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
                netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
+       /* AST2600 tx checksum with NCSI is broken */
+       if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac"))
+               netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
        if (np && of_get_property(np, "no-hw-checksum", NULL))
                netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM);
        netdev->features |= netdev->hw_features;
index 6778df2177a114a63114b9709610d3bdddc73778..98871f01499469332b03b38a278e89d1532ccff0 100644 (file)
@@ -7549,42 +7549,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
 static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
                            struct i40e_fwd_adapter *fwd)
 {
+       struct i40e_channel *ch = NULL, *ch_tmp, *iter;
        int ret = 0, num_tc = 1,  i, aq_err;
-       struct i40e_channel *ch, *ch_tmp;
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
 
-       if (list_empty(&vsi->macvlan_list))
-               return -EINVAL;
-
        /* Go through the list and find an available channel */
-       list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
-               if (!i40e_is_channel_macvlan(ch)) {
-                       ch->fwd = fwd;
+       list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
+               if (!i40e_is_channel_macvlan(iter)) {
+                       iter->fwd = fwd;
                        /* record configuration for macvlan interface in vdev */
                        for (i = 0; i < num_tc; i++)
                                netdev_bind_sb_channel_queue(vsi->netdev, vdev,
                                                             i,
-                                                            ch->num_queue_pairs,
-                                                            ch->base_queue);
-                       for (i = 0; i < ch->num_queue_pairs; i++) {
+                                                            iter->num_queue_pairs,
+                                                            iter->base_queue);
+                       for (i = 0; i < iter->num_queue_pairs; i++) {
                                struct i40e_ring *tx_ring, *rx_ring;
                                u16 pf_q;
 
-                               pf_q = ch->base_queue + i;
+                               pf_q = iter->base_queue + i;
 
                                /* Get to TX ring ptr */
                                tx_ring = vsi->tx_rings[pf_q];
-                               tx_ring->ch = ch;
+                               tx_ring->ch = iter;
 
                                /* Get the RX ring ptr */
                                rx_ring = vsi->rx_rings[pf_q];
-                               rx_ring->ch = ch;
+                               rx_ring->ch = iter;
                        }
+                       ch = iter;
                        break;
                }
        }
 
+       if (!ch)
+               return -EINVAL;
+
        /* Guarantee all rings are updated before we update the
         * MAC address filter.
         */
index 8ed3c9ab7ff7282d23a7bab330c1eb634b14a3fa..a895e3a8e988c842f47e55341fa97f07f5c2d81e 100644 (file)
@@ -540,6 +540,7 @@ struct ice_pf {
        struct mutex avail_q_mutex;     /* protects access to avail_[rx|tx]qs */
        struct mutex sw_mutex;          /* lock for protecting VSI alloc flow */
        struct mutex tc_mutex;          /* lock to protect TC changes */
+       struct mutex adev_mutex;        /* lock to protect aux device access */
        u32 msg_enable;
        struct ice_ptp ptp;
        struct tty_driver *ice_gnss_tty_driver;
index 25a436d342c29094e2f72410d30737ae36af6753..3e3b2ed4cd5d9ec50ba46f5c14f18702505a9bcb 100644 (file)
@@ -37,14 +37,17 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
        if (WARN_ON_ONCE(!in_task()))
                return;
 
+       mutex_lock(&pf->adev_mutex);
        if (!pf->adev)
-               return;
+               goto finish;
 
        device_lock(&pf->adev->dev);
        iadrv = ice_get_auxiliary_drv(pf);
        if (iadrv && iadrv->event_handler)
                iadrv->event_handler(pf, event);
        device_unlock(&pf->adev->dev);
+finish:
+       mutex_unlock(&pf->adev_mutex);
 }
 
 /**
@@ -290,7 +293,6 @@ int ice_plug_aux_dev(struct ice_pf *pf)
                return -ENOMEM;
 
        adev = &iadev->adev;
-       pf->adev = adev;
        iadev->pf = pf;
 
        adev->id = pf->aux_idx;
@@ -300,18 +302,20 @@ int ice_plug_aux_dev(struct ice_pf *pf)
 
        ret = auxiliary_device_init(adev);
        if (ret) {
-               pf->adev = NULL;
                kfree(iadev);
                return ret;
        }
 
        ret = auxiliary_device_add(adev);
        if (ret) {
-               pf->adev = NULL;
                auxiliary_device_uninit(adev);
                return ret;
        }
 
+       mutex_lock(&pf->adev_mutex);
+       pf->adev = adev;
+       mutex_unlock(&pf->adev_mutex);
+
        return 0;
 }
 
@@ -320,12 +324,17 @@ int ice_plug_aux_dev(struct ice_pf *pf)
  */
 void ice_unplug_aux_dev(struct ice_pf *pf)
 {
-       if (!pf->adev)
-               return;
+       struct auxiliary_device *adev;
 
-       auxiliary_device_delete(pf->adev);
-       auxiliary_device_uninit(pf->adev);
+       mutex_lock(&pf->adev_mutex);
+       adev = pf->adev;
        pf->adev = NULL;
+       mutex_unlock(&pf->adev_mutex);
+
+       if (adev) {
+               auxiliary_device_delete(adev);
+               auxiliary_device_uninit(adev);
+       }
 }
 
 /**
index 6d19c58ccacd46cf485529feb47ccf83c8129ef3..454e01ae09b970638c93243a9a1d8c9a5cec2696 100644 (file)
@@ -3043,8 +3043,8 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
        ice_for_each_q_vector(vsi, i) {
                struct ice_q_vector *q_vector = vsi->q_vectors[i];
 
-               coalesce[i].itr_tx = q_vector->tx.itr_setting;
-               coalesce[i].itr_rx = q_vector->rx.itr_setting;
+               coalesce[i].itr_tx = q_vector->tx.itr_settings;
+               coalesce[i].itr_rx = q_vector->rx.itr_settings;
                coalesce[i].intrl = q_vector->intrl;
 
                if (i < vsi->num_txq)
@@ -3100,21 +3100,21 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
                 */
                if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
                        rc = &vsi->q_vectors[i]->rx;
-                       rc->itr_setting = coalesce[i].itr_rx;
+                       rc->itr_settings = coalesce[i].itr_rx;
                        ice_write_itr(rc, rc->itr_setting);
                } else if (i < vsi->alloc_rxq) {
                        rc = &vsi->q_vectors[i]->rx;
-                       rc->itr_setting = coalesce[0].itr_rx;
+                       rc->itr_settings = coalesce[0].itr_rx;
                        ice_write_itr(rc, rc->itr_setting);
                }
 
                if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
                        rc = &vsi->q_vectors[i]->tx;
-                       rc->itr_setting = coalesce[i].itr_tx;
+                       rc->itr_settings = coalesce[i].itr_tx;
                        ice_write_itr(rc, rc->itr_setting);
                } else if (i < vsi->alloc_txq) {
                        rc = &vsi->q_vectors[i]->tx;
-                       rc->itr_setting = coalesce[0].itr_tx;
+                       rc->itr_settings = coalesce[0].itr_tx;
                        ice_write_itr(rc, rc->itr_setting);
                }
 
@@ -3128,12 +3128,12 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
        for (; i < vsi->num_q_vectors; i++) {
                /* transmit */
                rc = &vsi->q_vectors[i]->tx;
-               rc->itr_setting = coalesce[0].itr_tx;
+               rc->itr_settings = coalesce[0].itr_tx;
                ice_write_itr(rc, rc->itr_setting);
 
                /* receive */
                rc = &vsi->q_vectors[i]->rx;
-               rc->itr_setting = coalesce[0].itr_rx;
+               rc->itr_settings = coalesce[0].itr_rx;
                ice_write_itr(rc, rc->itr_setting);
 
                vsi->q_vectors[i]->intrl = coalesce[0].intrl;
index 9a0a358a15c254979d884cfd5512d07c3c68c021..963a5f40e071b5bb68e15e357e500fc54948051d 100644 (file)
@@ -3769,6 +3769,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf)
 static void ice_deinit_pf(struct ice_pf *pf)
 {
        ice_service_task_stop(pf);
+       mutex_destroy(&pf->adev_mutex);
        mutex_destroy(&pf->sw_mutex);
        mutex_destroy(&pf->tc_mutex);
        mutex_destroy(&pf->avail_q_mutex);
@@ -3847,6 +3848,7 @@ static int ice_init_pf(struct ice_pf *pf)
 
        mutex_init(&pf->sw_mutex);
        mutex_init(&pf->tc_mutex);
+       mutex_init(&pf->adev_mutex);
 
        INIT_HLIST_HEAD(&pf->aq_wait_list);
        spin_lock_init(&pf->aq_wait_lock);
@@ -6170,9 +6172,10 @@ static int ice_up_complete(struct ice_vsi *vsi)
                        ice_ptp_link_change(pf, pf->hw.pf_id, true);
        }
 
-       /* clear this now, and the first stats read will be used as baseline */
-       vsi->stat_offsets_loaded = false;
-
+       /* Perform an initial read of the statistics registers now to
+        * set the baseline so counters are ready when interface is up
+        */
+       ice_update_eth_stats(vsi);
        ice_service_task_schedule(pf);
 
        return 0;
index a1cd33273ca49e1fbba6159cf2b127b55c224338..662947c882e8b1c84439eec7310a55c4893395ea 100644 (file)
@@ -500,12 +500,19 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
  * This function must be called periodically to ensure that the cached value
  * is never more than 2 seconds old. It must also be called whenever the PHC
  * time has been changed.
+ *
+ * Return:
+ * * 0 - OK, successfully updated
+ * * -EAGAIN - PF was busy, need to reschedule the update
  */
-static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
+static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
 {
        u64 systime;
        int i;
 
+       if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+               return -EAGAIN;
+
        /* Read the current PHC time */
        systime = ice_ptp_read_src_clk_reg(pf, NULL);
 
@@ -528,6 +535,9 @@ static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
                        WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
                }
        }
+       clear_bit(ICE_CFG_BUSY, pf->state);
+
+       return 0;
 }
 
 /**
@@ -2287,6 +2297,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
 
 /**
  * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
+ * @hw: pointer to the hw struct
  * @tx: PTP Tx tracker to clean up
  *
  * Loop through the Tx timestamp requests and see if any of them have been
@@ -2295,7 +2306,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
  * timestamp will never be captured. This might happen if the packet gets
  * discarded before it reaches the PHY timestamping block.
  */
-static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
+static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx)
 {
        u8 idx;
 
@@ -2304,11 +2315,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
 
        for_each_set_bit(idx, tx->in_use, tx->len) {
                struct sk_buff *skb;
+               u64 raw_tstamp;
 
                /* Check if this SKB has been waiting for too long */
                if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
                        continue;
 
+               /* Read tstamp to be able to use this register again */
+               ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
+                                   &raw_tstamp);
+
                spin_lock(&tx->lock);
                skb = tx->tstamps[idx].skb;
                tx->tstamps[idx].skb = NULL;
@@ -2324,17 +2340,18 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
 {
        struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
        struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+       int err;
 
        if (!test_bit(ICE_FLAG_PTP, pf->flags))
                return;
 
-       ice_ptp_update_cached_phctime(pf);
+       err = ice_ptp_update_cached_phctime(pf);
 
-       ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx);
+       ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx);
 
-       /* Run twice a second */
+       /* Run twice a second or reschedule if phc update failed */
        kthread_queue_delayed_work(ptp->kworker, &ptp->work,
-                                  msecs_to_jiffies(500));
+                                  msecs_to_jiffies(err ? 10 : 500));
 }
 
 /**
index cead3eb149bd5eb4b7bc21a6ca748fb0cbc9caf8..ffb3f6a589da4ce201907521a2b8eb792375387d 100644 (file)
@@ -384,9 +384,14 @@ struct ice_ring_container {
        /* this matches the maximum number of ITR bits, but in usec
         * values, so it is shifted left one bit (bit zero is ignored)
         */
-       u16 itr_setting:13;
-       u16 itr_reserved:2;
-       u16 itr_mode:1;
+       union {
+               struct {
+                       u16 itr_setting:13;
+                       u16 itr_reserved:2;
+                       u16 itr_mode:1;
+               };
+               u16 itr_settings;
+       };
        enum ice_container_type type;
 };
 
index b72606c9e6d03a6e494feaf435fe9ab9ea246f87..2889e050a4c9384e0e0a1f814c25a1808ab23d6b 100644 (file)
@@ -1307,13 +1307,52 @@ error_param:
                                     NULL, 0);
 }
 
+/**
+ * ice_vf_vsi_dis_single_txq - disable a single Tx queue
+ * @vf: VF to disable queue for
+ * @vsi: VSI for the VF
+ * @q_id: VF relative (0-based) queue ID
+ *
+ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
+ * disabled then clear q_id bit in the enabled queues bitmap and return
+ * success. Otherwise return error.
+ */
+static int
+ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
+{
+       struct ice_txq_meta txq_meta = { 0 };
+       struct ice_tx_ring *ring;
+       int err;
+
+       if (!test_bit(q_id, vf->txq_ena))
+               dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+                       q_id, vsi->vsi_num);
+
+       ring = vsi->tx_rings[q_id];
+       if (!ring)
+               return -EINVAL;
+
+       ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+       err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
+       if (err) {
+               dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+                       q_id, vsi->vsi_num);
+               return err;
+       }
+
+       /* Clear enabled queues flag */
+       clear_bit(q_id, vf->txq_ena);
+
+       return 0;
+}
+
 /**
  * ice_vc_dis_qs_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
  *
- * called from the VF to disable all or specific
- * queue(s)
+ * called from the VF to disable all or specific queue(s)
  */
 static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 {
@@ -1350,30 +1389,15 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
                q_map = vqs->tx_queues;
 
                for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
-                       struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id];
-                       struct ice_txq_meta txq_meta = { 0 };
-
                        if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
                        }
 
-                       if (!test_bit(vf_q_id, vf->txq_ena))
-                               dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
-                                       vf_q_id, vsi->vsi_num);
-
-                       ice_fill_txq_meta(vsi, ring, &txq_meta);
-
-                       if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
-                                                ring, &txq_meta)) {
-                               dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
-                                       vf_q_id, vsi->vsi_num);
+                       if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
                        }
-
-                       /* Clear enabled queues flag */
-                       clear_bit(vf_q_id, vf->txq_ena);
                }
        }
 
@@ -1622,6 +1646,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
                if (qpi->txq.ring_len > 0) {
                        vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
                        vsi->tx_rings[i]->count = qpi->txq.ring_len;
+
+                       /* Disable any existing queue first */
+                       if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {
+                               v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+                               goto error_param;
+                       }
+
+                       /* Configure a queue with the requested settings */
                        if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
index 34b33b21e0dcdc507827a4ce4524865cf4fd6232..68be2976f539f723f3909e3f7f8a3ce7a0fa929b 100644 (file)
@@ -5505,7 +5505,8 @@ static void igb_watchdog_task(struct work_struct *work)
                                break;
                        }
 
-                       if (adapter->link_speed != SPEED_1000)
+                       if (adapter->link_speed != SPEED_1000 ||
+                           !hw->phy.ops.read_reg)
                                goto no_wait;
 
                        /* wait for Remote receiver status OK */
index 3ad10c793308e6ee9c5a49a3bd6bb943c1bcd9b1..66298e2235c912de5556634819f67099329e62a8 100644 (file)
@@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
        static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
        int i, k;
 
-       memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
+       memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
 
        if (!IS_ENABLED(CONFIG_SOC_MT7621))
                return;
index 59988e24b70410ef013fe58f621cdc074d0a9900..bec9ed0103a939d23dc57666ea40cd80e57ce3ad 100644 (file)
@@ -23,7 +23,7 @@ struct mlx5_ct_fs_smfs_matcher {
 };
 
 struct mlx5_ct_fs_smfs_matchers {
-       struct mlx5_ct_fs_smfs_matcher smfs_matchers[4];
+       struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
        struct list_head used;
 };
 
@@ -44,7 +44,8 @@ struct mlx5_ct_fs_smfs_rule {
 };
 
 static inline void
-mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+                         bool gre)
 {
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
 
@@ -77,7 +78,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
                         ntohs(MLX5_CT_TCP_FLAGS_MASK));
-       } else {
+       } else if (!gre) {
                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
        }
@@ -87,7 +88,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
 
 static struct mlx5dr_matcher *
 mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
-                              bool tcp, u32 priority)
+                              bool tcp, bool gre, u32 priority)
 {
        struct mlx5dr_matcher *dr_matcher;
        struct mlx5_flow_spec *spec;
@@ -96,7 +97,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
        if (!spec)
                return ERR_PTR(-ENOMEM);
 
-       mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp);
+       mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
 
        dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
@@ -108,7 +109,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
 }
 
 static struct mlx5_ct_fs_smfs_matcher *
-mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
 {
        struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
        struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
@@ -119,7 +120,7 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
        int prio;
 
        matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
-       smfs_matcher = &matchers->smfs_matchers[ipv4 * 2 + tcp];
+       smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
 
        if (refcount_inc_not_zero(&smfs_matcher->ref))
                return smfs_matcher;
@@ -145,11 +146,11 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
        }
 
        tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
-       dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, prio);
+       dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
        if (IS_ERR(dr_matcher)) {
                netdev_warn(fs->netdev,
-                           "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d), err: %ld\n",
-                           nat, ipv4, tcp, PTR_ERR(dr_matcher));
+                           "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+                           nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
 
                smfs_matcher = ERR_CAST(dr_matcher);
                goto out_unlock;
@@ -222,16 +223,17 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
 static inline bool
 mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
 {
-#define DISSECTOR_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
-       const u32 basic_keys = DISSECTOR_BIT(BASIC) | DISSECTOR_BIT(CONTROL) |
-                              DISSECTOR_BIT(PORTS) | DISSECTOR_BIT(META);
-       const u32 ipv4_tcp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS) | DISSECTOR_BIT(TCP);
-       const u32 ipv4_udp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS);
-       const u32 ipv6_tcp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS) | DISSECTOR_BIT(TCP);
-       const u32 ipv6_udp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS);
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+       const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+       const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+       const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+       const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+       const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+       const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+       const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
 
        return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
-               used_keys == ipv6_udp);
+               used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
 }
 
 static bool
@@ -254,20 +256,24 @@ mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *f
        flow_rule_match_control(flow_rule, &control);
        flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
        flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
-       flow_rule_match_ports(flow_rule, &ports);
-       flow_rule_match_tcp(flow_rule, &tcp);
+       if (basic.key->ip_proto != IPPROTO_GRE)
+               flow_rule_match_ports(flow_rule, &ports);
+       if (basic.key->ip_proto == IPPROTO_TCP)
+               flow_rule_match_tcp(flow_rule, &tcp);
 
        if (basic.mask->n_proto != htons(0xFFFF) ||
            (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
            basic.mask->ip_proto != 0xFF ||
-           (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP)) {
+           (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+            basic.key->ip_proto != IPPROTO_GRE)) {
                ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
                       ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
                       basic.key->ip_proto, basic.mask->ip_proto);
                return false;
        }
 
-       if (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF)) {
+       if (basic.key->ip_proto != IPPROTO_GRE &&
+           (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
                ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
                       ports.mask->src, ports.mask->dst);
                return false;
@@ -291,7 +297,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
        struct mlx5dr_action *actions[5];
        struct mlx5dr_rule *rule;
        int num_actions = 0, err;
-       bool nat, tcp, ipv4;
+       bool nat, tcp, ipv4, gre;
 
        if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
                return ERR_PTR(-EOPNOTSUPP);
@@ -314,15 +320,17 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
        ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
        tcp = MLX5_GET(fte_match_param, spec->match_value,
                       outer_headers.ip_protocol) == IPPROTO_TCP;
+       gre = MLX5_GET(fte_match_param, spec->match_value,
+                      outer_headers.ip_protocol) == IPPROTO_GRE;
 
-       smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp);
+       smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
        if (IS_ERR(smfs_matcher)) {
                err = PTR_ERR(smfs_matcher);
                goto err_matcher;
        }
 
        rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
-                                    MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT);
+                                    spec->flow_context.flow_source);
        if (!rule) {
                err = -EINVAL;
                goto err_create;
index a55b066746cb763dddd2b4577b47342856ea0319..857840ab1e91885d7a22810fc9b86505663f0299 100644 (file)
@@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
        bool busy = false;
        int work_done = 0;
 
+       rcu_read_lock();
+
        ch_stats->poll++;
 
        work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
        busy |= work_done == budget;
        busy |= rq->post_wqes(rq);
 
-       if (busy)
-               return budget;
+       if (busy) {
+               work_done = budget;
+               goto out;
+       }
 
        if (unlikely(!napi_complete_done(napi, work_done)))
-               return work_done;
+               goto out;
 
        mlx5e_cq_arm(&rq->cq);
+
+out:
+       rcu_read_unlock();
        return work_done;
 }
 
index 2f1dedc721d1e8b0344c6981227fbdc095340786..fa229998606c2ac49111257922fffea11191c250 100644 (file)
@@ -3864,6 +3864,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
        if (netdev->features & NETIF_F_NTUPLE)
                netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
 
+       features &= ~NETIF_F_GRO_HW;
+       if (netdev->features & NETIF_F_GRO_HW)
+               netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
        return features;
 }
 
@@ -3896,6 +3900,25 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                }
        }
 
+       if (params->xdp_prog) {
+               if (features & NETIF_F_LRO) {
+                       netdev_warn(netdev, "LRO is incompatible with XDP\n");
+                       features &= ~NETIF_F_LRO;
+               }
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
+
+       if (priv->xsk.refcnt) {
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+                                   priv->xsk.refcnt);
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
+
        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
                features &= ~NETIF_F_RXHASH;
                if (netdev->features & NETIF_F_RXHASH)
@@ -4850,10 +4873,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-       if (!!MLX5_CAP_GEN(mdev, shampo) &&
-           mlx5e_check_fragmented_striding_rq_cap(mdev))
-               netdev->hw_features    |= NETIF_F_GRO_HW;
-
        if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
                netdev->hw_enc_features |= NETIF_F_HW_CSUM;
                netdev->hw_enc_features |= NETIF_F_TSO;
index 816d991f762106496ae504765dafa39d8eda4fd8..3ad67e6b5586d143bae5773fdd1f2467f3a58b22 100644 (file)
@@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
        clean_tree(&root_ns->ns.node);
 }
 
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_flow_steering *steering = dev->priv.steering;
-
-       cleanup_root_ns(steering->root_ns);
-       cleanup_root_ns(steering->fdb_root_ns);
-       steering->fdb_root_ns = NULL;
-       kfree(steering->fdb_sub_ns);
-       steering->fdb_sub_ns = NULL;
-       cleanup_root_ns(steering->port_sel_root_ns);
-       cleanup_root_ns(steering->sniffer_rx_root_ns);
-       cleanup_root_ns(steering->sniffer_tx_root_ns);
-       cleanup_root_ns(steering->rdma_rx_root_ns);
-       cleanup_root_ns(steering->rdma_tx_root_ns);
-       cleanup_root_ns(steering->egress_root_ns);
-       mlx5_cleanup_fc_stats(dev);
-       kmem_cache_destroy(steering->ftes_cache);
-       kmem_cache_destroy(steering->fgs_cache);
-       mlx5_ft_pool_destroy(dev);
-       kfree(steering);
-}
-
 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
 {
        struct fs_prio *prio;
@@ -3086,42 +3064,27 @@ cleanup:
        return err;
 }
 
-int mlx5_init_fs(struct mlx5_core_dev *dev)
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
 {
-       struct mlx5_flow_steering *steering;
-       int err = 0;
-
-       err = mlx5_init_fc_stats(dev);
-       if (err)
-               return err;
-
-       err = mlx5_ft_pool_init(dev);
-       if (err)
-               return err;
-
-       steering = kzalloc(sizeof(*steering), GFP_KERNEL);
-       if (!steering) {
-               err = -ENOMEM;
-               goto err;
-       }
-
-       steering->dev = dev;
-       dev->priv.steering = steering;
+       struct mlx5_flow_steering *steering = dev->priv.steering;
 
-       if (mlx5_fs_dr_is_supported(dev))
-               steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
-       else
-               steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+       cleanup_root_ns(steering->root_ns);
+       cleanup_root_ns(steering->fdb_root_ns);
+       steering->fdb_root_ns = NULL;
+       kfree(steering->fdb_sub_ns);
+       steering->fdb_sub_ns = NULL;
+       cleanup_root_ns(steering->port_sel_root_ns);
+       cleanup_root_ns(steering->sniffer_rx_root_ns);
+       cleanup_root_ns(steering->sniffer_tx_root_ns);
+       cleanup_root_ns(steering->rdma_rx_root_ns);
+       cleanup_root_ns(steering->rdma_tx_root_ns);
+       cleanup_root_ns(steering->egress_root_ns);
+}
 
-       steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
-                                               sizeof(struct mlx5_flow_group), 0,
-                                               0, NULL);
-       steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
-                                                0, NULL);
-       if (!steering->ftes_cache || !steering->fgs_cache) {
-               err = -ENOMEM;
-               goto err;
-       }
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+       struct mlx5_flow_steering *steering = dev->priv.steering;
+       int err = 0;
 
        if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
              (MLX5_CAP_GEN(dev, nic_flow_table))) ||
@@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
        }
 
        return 0;
+
+err:
+       mlx5_fs_core_cleanup(dev);
+       return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+       struct mlx5_flow_steering *steering = dev->priv.steering;
+
+       kmem_cache_destroy(steering->ftes_cache);
+       kmem_cache_destroy(steering->fgs_cache);
+       kfree(steering);
+       mlx5_ft_pool_destroy(dev);
+       mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+       struct mlx5_flow_steering *steering;
+       int err = 0;
+
+       err = mlx5_init_fc_stats(dev);
+       if (err)
+               return err;
+
+       err = mlx5_ft_pool_init(dev);
+       if (err)
+               goto err;
+
+       steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+       if (!steering) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       steering->dev = dev;
+       dev->priv.steering = steering;
+
+       if (mlx5_fs_dr_is_supported(dev))
+               steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+       else
+               steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+       steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+                                               sizeof(struct mlx5_flow_group), 0,
+                                               0, NULL);
+       steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+                                                0, NULL);
+       if (!steering->ftes_cache || !steering->fgs_cache) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       return 0;
+
 err:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_free(dev);
        return err;
 }
 
index c488a7c5b07e9f6e7ea3570061397ec50fe9250f..3f20523e514fd0aee50ac3079ea3bcf70bb2bf5e 100644 (file)
@@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
                                 enum mlx5_flow_steering_mode mode);
 
-int mlx5_init_fs(struct mlx5_core_dev *dev);
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
 void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
index ca1aba845dd6da173c3f172d8daed199d8c40503..81eb67fb95b04a1157dbe9765154907f4726f0aa 100644 (file)
@@ -8,7 +8,8 @@
 enum {
        MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
        MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
-       MLX5_FW_RESET_FLAGS_PENDING_COMP
+       MLX5_FW_RESET_FLAGS_PENDING_COMP,
+       MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
 };
 
 struct mlx5_fw_reset {
@@ -208,7 +209,10 @@ static void poll_sync_reset(struct timer_list *t)
 
        if (fatal_error) {
                mlx5_core_warn(dev, "Got Device Reset\n");
-               queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+               if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+                       queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+               else
+                       mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
                return;
        }
 
@@ -433,9 +437,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
        struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
        struct mlx5_eqe *eqe = data;
 
+       if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+               return NOTIFY_DONE;
+
        switch (eqe->sub_type) {
        case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
-                       queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+               queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
                break;
        case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
                mlx5_sync_reset_events_handle(fw_reset, eqe);
@@ -479,6 +486,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
        mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
 }
 
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+       set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+       cancel_work_sync(&fw_reset->fw_live_patch_work);
+       cancel_work_sync(&fw_reset->reset_request_work);
+       cancel_work_sync(&fw_reset->reset_reload_work);
+       cancel_work_sync(&fw_reset->reset_now_work);
+       cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
 int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
 {
        struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
index 694fc7cb268457e460374c7d014fc530f524b549..dc141c7e641a307b9579c1509a650538c7371c06 100644 (file)
@@ -16,6 +16,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
 int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
 
index 2589e39eb9c72604df2c6afa29d88002dfb8a46d..ef196cb764e2a3edaf640c29e928954f1c1d1475 100644 (file)
@@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
                goto err_sf_table_cleanup;
        }
 
+       err = mlx5_fs_core_alloc(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc flow steering\n");
+               goto err_fs;
+       }
+
        dev->dm = mlx5_dm_create(dev);
        if (IS_ERR(dev->dm))
                mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
@@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 
        return 0;
 
+err_fs:
+       mlx5_sf_table_cleanup(dev);
 err_sf_table_cleanup:
        mlx5_sf_hw_table_cleanup(dev);
 err_sf_hw_table_cleanup:
@@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
        mlx5_hv_vhca_destroy(dev->hv_vhca);
        mlx5_fw_tracer_destroy(dev->tracer);
        mlx5_dm_cleanup(dev);
+       mlx5_fs_core_free(dev);
        mlx5_sf_table_cleanup(dev);
        mlx5_sf_hw_table_cleanup(dev);
        mlx5_vhca_event_cleanup(dev);
@@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
                goto err_tls_start;
        }
 
-       err = mlx5_init_fs(dev);
+       err = mlx5_fs_core_init(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to init flow steering\n");
                goto err_fs;
@@ -1236,7 +1245,7 @@ err_ec:
 err_vhca:
        mlx5_vhca_event_stop(dev);
 err_set_hca:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
 err_fs:
        mlx5_accel_tls_cleanup(dev);
 err_tls_start:
@@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
        mlx5_ec_cleanup(dev);
        mlx5_sf_hw_table_destroy(dev);
        mlx5_vhca_event_stop(dev);
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
        mlx5_accel_ipsec_cleanup(dev);
        mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
@@ -1618,6 +1627,10 @@ static void remove_one(struct pci_dev *pdev)
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct devlink *devlink = priv_to_devlink(dev);
 
+       /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+        * fw_reset before unregistering the devlink.
+        */
+       mlx5_drain_fw_reset(dev);
        devlink_unregister(devlink);
        mlx5_sriov_disable(pdev);
        mlx5_crdump_disable(dev);
index 850937cd8bf9cd6eccafdacd4c2d845115e2af07..1383550f44c1297e25159b0cdb7fc6f81a947d51 100644 (file)
@@ -530,6 +530,37 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
        return 0;
 }
 
+static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn,
+                                       struct mlx5dr_ste_actions_attr *attr,
+                                       bool rx_rule,
+                                       bool *recalc_cs_required)
+{
+       *recalc_cs_required = false;
+
+       /* if device supports csum recalculation - no adjustment needed */
+       if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps))
+               return;
+
+       /* no adjustment needed on TX rules */
+       if (!rx_rule)
+               return;
+
+       if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) {
+               /* Ignore the modify TTL action.
+                * It is always kept as last HW action.
+                */
+               attr->modify_actions--;
+               return;
+       }
+
+       if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+               /* Due to a HW bug on some devices, modifying TTL on RX flows
+                * will cause an incorrect checksum calculation. In such cases
+                * we will use a FW table to recalculate the checksum.
+                */
+               *recalc_cs_required = true;
+}
+
 static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
                                     struct mlx5dr_action *actions[],
                                     int last_idx)
@@ -650,8 +681,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                case DR_ACTION_TYP_MODIFY_HDR:
                        attr.modify_index = action->rewrite->index;
                        attr.modify_actions = action->rewrite->num_of_actions;
-                       recalc_cs_required = action->rewrite->modify_ttl &&
-                                            !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps);
+                       if (action->rewrite->modify_ttl)
+                               dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
+                                                           &recalc_cs_required);
                        break;
                case DR_ACTION_TYP_L2_TO_TNL_L2:
                case DR_ACTION_TYP_L2_TO_TNL_L3:
@@ -732,12 +764,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
        *new_hw_ste_arr_sz = nic_matcher->num_of_builders;
        last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
 
-       /* Due to a HW bug in some devices, modifying TTL on RX flows will
-        * cause an incorrect checksum calculation. In this case we will
-        * use a FW table to recalculate.
-        */
-       if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
-           rx_rule && recalc_cs_required && dest_action) {
+       if (recalc_cs_required && dest_action) {
                ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
                if (ret) {
                        mlx5dr_err(dmn,
@@ -842,7 +869,8 @@ struct mlx5dr_action *
 mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_action_dest *dests,
                                   u32 num_of_dests,
-                                  bool ignore_flow_level)
+                                  bool ignore_flow_level,
+                                  u32 flow_source)
 {
        struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
        struct mlx5dr_action **ref_actions;
@@ -914,7 +942,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                      reformat_req,
                                      &action->dest_tbl->fw_tbl.id,
                                      &action->dest_tbl->fw_tbl.group_id,
-                                     ignore_flow_level);
+                                     ignore_flow_level,
+                                     flow_source);
        if (ret)
                goto free_action;
 
@@ -1556,12 +1585,6 @@ dr_action_modify_check_is_ttl_modify(const void *sw_action)
        return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
 }
 
-static bool dr_action_modify_ttl_ignore(struct mlx5dr_domain *dmn)
-{
-       return !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps) &&
-              !MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify);
-}
-
 static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
                                            u32 max_hw_actions,
                                            u32 num_sw_actions,
@@ -1573,6 +1596,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
        const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
        const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
        struct mlx5dr_domain *dmn = action->rewrite->dmn;
+       __be64 *modify_ttl_sw_action = NULL;
        int ret, i, hw_idx = 0;
        __be64 *sw_action;
        __be64 hw_action;
@@ -1585,8 +1609,14 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
        action->rewrite->allow_rx = 1;
        action->rewrite->allow_tx = 1;
 
-       for (i = 0; i < num_sw_actions; i++) {
-               sw_action = &sw_actions[i];
+       for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) {
+               /* modify TTL is handled separately, as a last action */
+               if (i == num_sw_actions) {
+                       sw_action = modify_ttl_sw_action;
+                       modify_ttl_sw_action = NULL;
+               } else {
+                       sw_action = &sw_actions[i];
+               }
 
                ret = dr_action_modify_check_field_limitation(action,
                                                              sw_action);
@@ -1595,10 +1625,9 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
 
                if (!(*modify_ttl) &&
                    dr_action_modify_check_is_ttl_modify(sw_action)) {
-                       if (dr_action_modify_ttl_ignore(dmn))
-                               continue;
-
+                       modify_ttl_sw_action = sw_action;
                        *modify_ttl = true;
+                       continue;
                }
 
                /* Convert SW action to HW action */
index 68a4c32d5f34c535f557cd199fb85dc011302527..f05ef0cd54baca456a72d063f0da3fa01843af30 100644 (file)
@@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
                            bool reformat_req,
                            u32 *tbl_id,
                            u32 *group_id,
-                           bool ignore_flow_level)
+                           bool ignore_flow_level,
+                           u32 flow_source)
 {
        struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
        struct mlx5dr_cmd_fte_info fte_info = {};
@@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
        fte_info.val = val;
        fte_info.dest_arr = dest;
        fte_info.ignore_flow_level = ignore_flow_level;
+       fte_info.flow_context.flow_source = flow_source;
 
        ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
        if (ret) {
index 5a322335f2043d570302fd810ca7f5a06737d80e..2010d4ac651909e1ff2d5d8a7ca2a485ae13e50c 100644 (file)
@@ -420,7 +420,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
         * encapsulation. The reason for that is that we support
         * modify headers for outer headers only
         */
-       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
                dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
                dr_ste_v0_set_rewrite_actions(last_ste,
                                              attr->modify_actions,
@@ -513,7 +513,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
                }
        }
 
-       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
                if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
                        dr_ste_v0_arr_init_next(&last_ste,
                                                added_stes,
index 46866a5fc5ca3b402d282546a33eabf4a85f3717..98320e3945adb3028b4ccb9cb379b6b0097ba577 100644 (file)
@@ -1461,7 +1461,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
                            bool reformat_req,
                            u32 *tbl_id,
                            u32 *group_id,
-                           bool ignore_flow_level);
+                           bool ignore_flow_level,
+                           u32 flow_source);
 void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
                              u32 group_id);
 #endif  /* _DR_TYPES_H_ */
index 045b0cf90063b1b1b45081465102c9038eb9ace9..728f8188258929dcc900f42049575e7d8215898c 100644 (file)
@@ -520,6 +520,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
        } else if (num_term_actions > 1) {
                bool ignore_flow_level =
                        !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+               u32 flow_source = fte->flow_context.flow_source;
 
                if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
                    fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
@@ -529,7 +530,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
                tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
                                                                term_actions,
                                                                num_term_actions,
-                                                               ignore_flow_level);
+                                                               ignore_flow_level,
+                                                               flow_source);
                if (!tmp_action) {
                        err = -EOPNOTSUPP;
                        goto free_actions;
index ec5cbec0d4553d22eb2c90ccb4954498e6a325fd..7626c85643b1f171d46f11a5b9d7f696fc847dd6 100644 (file)
@@ -99,7 +99,8 @@ struct mlx5dr_action *
 mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_action_dest *dests,
                                   u32 num_of_dests,
-                                  bool ignore_flow_level);
+                                  bool ignore_flow_level,
+                                  u32 flow_source);
 
 struct mlx5dr_action *mlx5dr_action_create_drop(void);
 
index 01cf5a6a26bd323aa583f17684f739da10cbe660..a2ee695a3f17856d23afff3979d842ff11ad38d9 100644 (file)
@@ -568,10 +568,8 @@ static int
 mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
                                 struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-       struct __ip6_tnl_parm parms6;
-
-       parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev);
-       return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &parms6.raddr,
+       return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp,
+                                                &ipip_entry->parms.daddr.addr6,
                                                 &ipip_entry->dip_kvdl_index);
 }
 
@@ -579,10 +577,7 @@ static void
 mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp,
                                   const struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-       struct __ip6_tnl_parm parms6;
-
-       parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev);
-       mlxsw_sp_ipv6_addr_put(mlxsw_sp, &parms6.raddr);
+       mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6);
 }
 
 static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = {
index 95830e3e2b1fbfd5e6392d70cf4de9fa594eb75a..05f6dcc9dfd5295a51807073095b9ec3bdbfafc3 100644 (file)
@@ -103,6 +103,24 @@ static int lan966x_create_targets(struct platform_device *pdev,
        return 0;
 }
 
+static bool lan966x_port_unique_address(struct net_device *dev)
+{
+       struct lan966x_port *port = netdev_priv(dev);
+       struct lan966x *lan966x = port->lan966x;
+       int p;
+
+       for (p = 0; p < lan966x->num_phys_ports; ++p) {
+               port = lan966x->ports[p];
+               if (!port || port->dev == dev)
+                       continue;
+
+               if (ether_addr_equal(dev->dev_addr, port->dev->dev_addr))
+                       return false;
+       }
+
+       return true;
+}
+
 static int lan966x_port_set_mac_address(struct net_device *dev, void *p)
 {
        struct lan966x_port *port = netdev_priv(dev);
@@ -110,16 +128,26 @@ static int lan966x_port_set_mac_address(struct net_device *dev, void *p)
        const struct sockaddr *addr = p;
        int ret;
 
+       if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+               return 0;
+
        /* Learn the new net device MAC address in the mac table. */
        ret = lan966x_mac_cpu_learn(lan966x, addr->sa_data, HOST_PVID);
        if (ret)
                return ret;
 
+       /* If there is another port with the same address as the dev, then don't
+        * delete it from the MAC table
+        */
+       if (!lan966x_port_unique_address(dev))
+               goto out;
+
        /* Then forget the previous one. */
        ret = lan966x_mac_cpu_forget(lan966x, dev->dev_addr, HOST_PVID);
        if (ret)
                return ret;
 
+out:
        eth_hw_addr_set(dev, addr->sa_data);
        return ret;
 }
index ca71b62a44dc3b84e8990aae13d355170726ea54..20ceac81a2c2c3b19f0f969f5dc277f0e8a73789 100644 (file)
@@ -1622,7 +1622,7 @@ int ocelot_trap_add(struct ocelot *ocelot, int port,
                trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
                trap->action.port_mask = 0;
                trap->take_ts = take_ts;
-               list_add_tail(&trap->trap_list, &ocelot->traps);
+               trap->is_trap = true;
                new = true;
        }
 
@@ -1634,10 +1634,8 @@ int ocelot_trap_add(struct ocelot *ocelot, int port,
                err = ocelot_vcap_filter_replace(ocelot, trap);
        if (err) {
                trap->ingress_port_mask &= ~BIT(port);
-               if (!trap->ingress_port_mask) {
-                       list_del(&trap->trap_list);
+               if (!trap->ingress_port_mask)
                        kfree(trap);
-               }
                return err;
        }
 
@@ -1657,11 +1655,8 @@ int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie)
                return 0;
 
        trap->ingress_port_mask &= ~BIT(port);
-       if (!trap->ingress_port_mask) {
-               list_del(&trap->trap_list);
-
+       if (!trap->ingress_port_mask)
                return ocelot_vcap_filter_del(ocelot, trap);
-       }
 
        return ocelot_vcap_filter_replace(ocelot, trap);
 }
index 03b5e59d033e43d7166ada44495ace28de3b6fcc..51cf241ff7d07a289bdceebe0fea5baadfc1da2e 100644 (file)
@@ -280,9 +280,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
                        filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
                        break;
                case FLOW_ACTION_TRAP:
-                       if (filter->block_id != VCAP_IS2) {
+                       if (filter->block_id != VCAP_IS2 ||
+                           filter->lookup != 0) {
                                NL_SET_ERR_MSG_MOD(extack,
-                                                  "Trap action can only be offloaded to VCAP IS2");
+                                                  "Trap action can only be offloaded to VCAP IS2 lookup 0");
                                return -EOPNOTSUPP;
                        }
                        if (filter->goto_target != -1) {
@@ -295,7 +296,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
                        filter->action.cpu_copy_ena = true;
                        filter->action.cpu_qu_num = 0;
                        filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
-                       list_add_tail(&filter->trap_list, &ocelot->traps);
+                       filter->is_trap = true;
                        break;
                case FLOW_ACTION_POLICE:
                        if (filter->block_id == PSFP_BLOCK_ID) {
@@ -878,8 +879,6 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
 
        ret = ocelot_flower_parse(ocelot, port, ingress, f, filter);
        if (ret) {
-               if (!list_empty(&filter->trap_list))
-                       list_del(&filter->trap_list);
                kfree(filter);
                return ret;
        }
index c8701ac955a8ff927bb6ef75641d78db0f0deb36..eeb4cc07dd16f407604fb3790bafaf2886cc09cf 100644 (file)
@@ -374,7 +374,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
                         OCELOT_VCAP_BIT_0);
        vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0,
                     ~filter->ingress_port_mask);
-       vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY);
        vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH,
                         OCELOT_VCAP_BIT_ANY);
        vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc);
@@ -1217,6 +1216,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
                struct ocelot_vcap_filter *tmp;
 
                tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+               /* Read back the filter's counters before moving it */
+               vcap_entry_get(ocelot, i - 1, tmp);
                vcap_entry_set(ocelot, i, tmp);
        }
 
@@ -1250,7 +1251,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
        struct ocelot_vcap_filter del_filter;
        int i, index;
 
+       /* Need to inherit the block_id so that vcap_entry_set()
+        * does not get confused and knows where to install it.
+        */
        memset(&del_filter, 0, sizeof(del_filter));
+       del_filter.block_id = filter->block_id;
 
        /* Gets index of the filter */
        index = ocelot_vcap_block_get_filter_index(block, filter);
@@ -1265,6 +1270,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
                struct ocelot_vcap_filter *tmp;
 
                tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+               /* Read back the filter's counters before moving it */
+               vcap_entry_get(ocelot, i + 1, tmp);
                vcap_entry_set(ocelot, i, tmp);
        }
 
index 6ffc62c411655c0ba7a4e693db339ab36e9944b9..0a7a757494bc5f8448a03e4a82ad10aff462d0f5 100644 (file)
@@ -256,7 +256,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        err = ionic_map_bars(ionic);
        if (err)
-               goto err_out_pci_disable_device;
+               goto err_out_pci_release_regions;
 
        /* Configure the device */
        err = ionic_setup(ionic);
@@ -360,6 +360,7 @@ err_out_teardown:
 
 err_out_unmap_bars:
        ionic_unmap_bars(ionic);
+err_out_pci_release_regions:
        pci_release_regions(pdev);
 err_out_pci_disable_device:
        pci_disable_device(pdev);
index b30589a135c248254cf3fa2eda98e871b34781cc..06f4d9a9e9388634ca3d08e3911efa4cd09e4d25 100644 (file)
@@ -3614,7 +3614,8 @@ static void ql_reset_work(struct work_struct *work)
                qdev->mem_map_registers;
        unsigned long hw_flags;
 
-       if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) {
+       if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) ||
+           test_bit(QL_RESET_START, &qdev->flags)) {
                clear_bit(QL_LINK_MASTER, &qdev->flags);
 
                /*
index 50d535981a35f02f90bfb58f2faf522176d49f90..f8edb3f1b73ad7c174588a1a9702086252db1fc4 100644 (file)
@@ -3579,6 +3579,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx)
                n_parts++;
        }
 
+       if (!n_parts) {
+               kfree(parts);
+               return 0;
+       }
+
        rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
 fail:
        if (rc)
index 377df8b7f0159b257659fb5c2e034e771126c32c..40df910aa1401c266f03aa7b42173ad2663481fc 100644 (file)
@@ -867,7 +867,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx)
 
 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 {
-       struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+       struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel,
+                          *ptp_channel = efx_ptp_channel(efx);
+       struct efx_ptp_data *ptp_data = efx->ptp_data;
        unsigned int i, next_buffer_table = 0;
        u32 old_rxq_entries, old_txq_entries;
        int rc, rc2;
@@ -938,6 +940,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 
        efx_set_xdp_channels(efx);
 out:
+       efx->ptp_data = NULL;
        /* Destroy unused channel structures */
        for (i = 0; i < efx->n_channels; i++) {
                channel = other_channel[i];
@@ -948,6 +951,7 @@ out:
                }
        }
 
+       efx->ptp_data = ptp_data;
        rc2 = efx_soft_enable_interrupts(efx);
        if (rc2) {
                rc = rc ? rc : rc2;
@@ -966,6 +970,7 @@ rollback:
        efx->txq_entries = old_txq_entries;
        for (i = 0; i < efx->n_channels; i++)
                swap(efx->channel[i], other_channel[i]);
+       efx_ptp_update_channel(efx, ptp_channel);
        goto out;
 }
 
index f0ef515e2ade51d3715473616032edadad6ec2f4..4625f85acab2ea9b2016a89a0ca146dab13a52bb 100644 (file)
@@ -45,6 +45,7 @@
 #include "farch_regs.h"
 #include "tx.h"
 #include "nic.h" /* indirectly includes ptp.h */
+#include "efx_channels.h"
 
 /* Maximum number of events expected to make up a PTP event */
 #define        MAX_EVENT_FRAGS                 3
@@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
        return efx->ptp_data ? efx->ptp_data->channel : NULL;
 }
 
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel)
+{
+       if (efx->ptp_data)
+               efx->ptp_data->channel = channel;
+}
+
 static u32 last_sync_timestamp_major(struct efx_nic *efx)
 {
        struct efx_channel *channel = efx_ptp_channel(efx);
@@ -1443,6 +1450,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
        int rc = 0;
        unsigned int pos;
 
+       if (efx->ptp_data) {
+               efx->ptp_data->channel = channel;
+               return 0;
+       }
+
        ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
        efx->ptp_data = ptp;
        if (!efx->ptp_data)
@@ -2176,7 +2188,7 @@ static const struct efx_channel_type efx_ptp_channel_type = {
        .pre_probe              = efx_ptp_probe_channel,
        .post_remove            = efx_ptp_remove_channel,
        .get_name               = efx_ptp_get_channel_name,
-       /* no copy operation; there is no need to reallocate this channel */
+       .copy                   = efx_copy_channel,
        .receive_skb            = efx_ptp_rx,
        .want_txqs              = efx_ptp_want_txqs,
        .keep_eventq            = false,
index 9855e8c9e544d7a90c1fd6c375d8911d1d8d9672..7b1ef7002b3f047c6d049e8b8625979b4138aea5 100644 (file)
@@ -16,6 +16,7 @@ struct ethtool_ts_info;
 int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
 void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
 struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel);
 void efx_ptp_remove(struct efx_nic *efx);
 int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
 int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
index fcf17d8a0494b7a464e2e704bf049d44738640fe..644bb54f5f0204bc117e112810603eccef1a1b7b 100644 (file)
@@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
                return -ENOMEM;
 
        /* Enable pci device */
-       ret = pci_enable_device(pdev);
+       ret = pcim_enable_device(pdev);
        if (ret) {
                dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
                        __func__);
@@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev)
                pcim_iounmap_regions(pdev, BIT(i));
                break;
        }
-
-       pci_disable_device(pdev);
 }
 
 static int __maybe_unused stmmac_pci_suspend(struct device *dev)
index 42460c0885fc31b4585d0b450579cd9135965c2b..df70df29deeaa9228e23baf1005ff785c104ac80 100644 (file)
 
 #include "niu.h"
 
+/* This driver wants to store a link to a "next page" within the
+ * page struct itself by overloading the content of the "mapping"
+ * member. This is not expected by the page API, but does currently
+ * work. However, the randstruct plugin gets very bothered by this
+ * case because "mapping" (struct address_space) is randomized, so
+ * casts to/from it trigger warnings. Hide this by way of a union,
+ * to create a typed alias of "mapping", since that's how it is
+ * actually being used here.
+ */
+union niu_page {
+       struct page page;
+       struct {
+               unsigned long __flags;  /* unused alias of "flags" */
+               struct list_head __lru; /* unused alias of "lru" */
+               struct page *next;      /* alias of "mapping" */
+       };
+};
+#define niu_next_page(p)       container_of(p, union niu_page, page)->next
+
 #define DRV_MODULE_NAME                "niu"
 #define DRV_MODULE_VERSION     "1.1"
 #define DRV_MODULE_RELDATE     "Apr 22, 2010"
@@ -3283,7 +3302,7 @@ static struct page *niu_find_rxpage(struct rx_ring_info *rp, u64 addr,
 
        addr &= PAGE_MASK;
        pp = &rp->rxhash[h];
-       for (; (p = *pp) != NULL; pp = (struct page **) &p->mapping) {
+       for (; (p = *pp) != NULL; pp = &niu_next_page(p)) {
                if (p->index == addr) {
                        *link = pp;
                        goto found;
@@ -3300,7 +3319,7 @@ static void niu_hash_page(struct rx_ring_info *rp, struct page *page, u64 base)
        unsigned int h = niu_hash_rxaddr(rp, base);
 
        page->index = base;
-       page->mapping = (struct address_space *) rp->rxhash[h];
+       niu_next_page(page) = rp->rxhash[h];
        rp->rxhash[h] = page;
 }
 
@@ -3382,11 +3401,11 @@ static int niu_rx_pkt_ignore(struct niu *np, struct rx_ring_info *rp)
                rcr_size = rp->rbr_sizes[(val & RCR_ENTRY_PKTBUFSZ) >>
                                         RCR_ENTRY_PKTBUFSZ_SHIFT];
                if ((page->index + PAGE_SIZE) - rcr_size == addr) {
-                       *link = (struct page *) page->mapping;
+                       *link = niu_next_page(page);
                        np->ops->unmap_page(np->device, page->index,
                                            PAGE_SIZE, DMA_FROM_DEVICE);
                        page->index = 0;
-                       page->mapping = NULL;
+                       niu_next_page(page) = NULL;
                        __free_page(page);
                        rp->rbr_refill_pending++;
                }
@@ -3451,11 +3470,11 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 
                niu_rx_skb_append(skb, page, off, append_size, rcr_size);
                if ((page->index + rp->rbr_block_size) - rcr_size == addr) {
-                       *link = (struct page *) page->mapping;
+                       *link = niu_next_page(page);
                        np->ops->unmap_page(np->device, page->index,
                                            PAGE_SIZE, DMA_FROM_DEVICE);
                        page->index = 0;
-                       page->mapping = NULL;
+                       niu_next_page(page) = NULL;
                        rp->rbr_refill_pending++;
                } else
                        get_page(page);
@@ -3518,13 +3537,13 @@ static void niu_rbr_free(struct niu *np, struct rx_ring_info *rp)
 
                page = rp->rxhash[i];
                while (page) {
-                       struct page *next = (struct page *) page->mapping;
+                       struct page *next = niu_next_page(page);
                        u64 base = page->index;
 
                        np->ops->unmap_page(np->device, base, PAGE_SIZE,
                                            DMA_FROM_DEVICE);
                        page->index = 0;
-                       page->mapping = NULL;
+                       niu_next_page(page) = NULL;
 
                        __free_page(page);
 
@@ -6440,8 +6459,7 @@ static void niu_reset_buffers(struct niu *np)
 
                                page = rp->rxhash[j];
                                while (page) {
-                                       struct page *next =
-                                               (struct page *) page->mapping;
+                                       struct page *next = niu_next_page(page);
                                        u64 base = page->index;
                                        base = base >> RBR_DESCR_ADDR_SHIFT;
                                        rp->rbr[k++] = cpu_to_le32(base);
@@ -10176,6 +10194,9 @@ static int __init niu_init(void)
 
        BUILD_BUG_ON(PAGE_SIZE < 4 * 1024);
 
+       BUILD_BUG_ON(offsetof(struct page, mapping) !=
+                    offsetof(union niu_page, next));
+
        niu_debug = netif_msg_init(debug, NIU_MSG_DEFAULT);
 
 #ifdef CONFIG_SPARC64
index bc981043cc808287608b271f7b0cf8a421ed3966..a701178a1d139a98ecf643d0c7d133fe2f99020a 100644 (file)
@@ -1367,9 +1367,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
        struct gsi_event *event_done;
        struct gsi_event *event;
        struct gsi_trans *trans;
+       u32 trans_count = 0;
        u32 byte_count = 0;
-       u32 old_index;
        u32 event_avail;
+       u32 old_index;
 
        trans_info = &channel->trans_info;
 
@@ -1390,6 +1391,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
        do {
                trans->len = __le16_to_cpu(event->len);
                byte_count += trans->len;
+               trans_count++;
 
                /* Move on to the next event and transaction */
                if (--event_avail)
@@ -1401,7 +1403,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
 
        /* We record RX bytes when they are received */
        channel->byte_count += byte_count;
-       channel->trans_count++;
+       channel->trans_count += trans_count;
 }
 
 /* Initialize a ring, including allocating DMA memory for its entries */
index 888e94278a84fa1b6724560cc420b7558779cd59..cea7b2e2ce969831c9a351c6d2b30bd0edf23b0d 100644 (file)
@@ -1150,13 +1150,12 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
                return;
 
        skb = __dev_alloc_skb(len, GFP_ATOMIC);
-       if (!skb)
-               return;
-
-       /* Copy the data into the socket buffer and receive it */
-       skb_put(skb, len);
-       memcpy(skb->data, data, len);
-       skb->truesize += extra;
+       if (skb) {
+               /* Copy the data into the socket buffer and receive it */
+               skb_put(skb, len);
+               memcpy(skb->data, data, len);
+               skb->truesize += extra;
+       }
 
        ipa_modem_skb_rx(endpoint->netdev, skb);
 }
index 90f3aec55b365735051400f322cabc42e77155d8..ec010cf2e816a9f46336a66a3d52f4bb827c20fd 100644 (file)
@@ -125,7 +125,7 @@ static void ipa_qmi_indication(struct ipa_qmi *ipa_qmi)
  */
 static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi)
 {
-       struct ipa *ipa = container_of(ipa_qmi, struct ipa, qmi);
+       struct ipa *ipa;
        int ret;
 
        /* We aren't ready until the modem and microcontroller are */
index fc53b71dc872ba3523b6ca5821fb9d2c27f8dbd2..cd9aa353b653f62a4735c0a51bdf4db2d06986ca 100644 (file)
@@ -1743,7 +1743,7 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev,
 
 static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr)
 {
-       u32 data;
+       int data;
 
        phy_lock_mdio_bus(phydev);
        __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page);
@@ -2444,8 +2444,7 @@ static int lan8804_config_init(struct phy_device *phydev)
 
 static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev)
 {
-       u16 tsu_irq_status;
-       int irq_status;
+       int irq_status, tsu_irq_status;
 
        irq_status = phy_read(phydev, LAN8814_INTS);
        if (irq_status > 0 && (irq_status & LAN8814_INT_LINK))
@@ -2657,6 +2656,7 @@ static struct phy_driver ksphy_driver[] = {
        .name           = "Micrel KS8737",
        /* PHY_BASIC_FEATURES */
        .driver_data    = &ks8737_type,
+       .probe          = kszphy_probe,
        .config_init    = kszphy_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
@@ -2782,8 +2782,8 @@ static struct phy_driver ksphy_driver[] = {
        .config_init    = ksz8061_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
-       .suspend        = kszphy_suspend,
-       .resume         = kszphy_resume,
+       .suspend        = genphy_suspend,
+       .resume         = genphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ9021,
        .phy_id_mask    = 0x000ffffe,
index beb2b66da13246db6f668ae65f2037a693b44ff1..f122026c4682674f7e7b0c1c0832972339483341 100644 (file)
@@ -970,8 +970,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 {
        struct phy_device *phydev = phy_dat;
        struct phy_driver *drv = phydev->drv;
+       irqreturn_t ret;
 
-       return drv->handle_interrupt(phydev);
+       mutex_lock(&phydev->lock);
+       ret = drv->handle_interrupt(phydev);
+       mutex_unlock(&phydev->lock);
+
+       return ret;
 }
 
 /**
index 3619520340b746faba44a5dd86ef2bea9665b178..e172743948ed777d4672b85f3584e1f03ba3bd53 100644 (file)
@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
        path->encap.proto = htons(ETH_P_PPP_SES);
        path->encap.id = be16_to_cpu(po->num);
        memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+       memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
        path->dev = ctx->dev;
        ctx->dev = dev;
 
index d9d90baac72a2a783f7f14411d462ab4cf937ff8..93e8d119d45f6b88f6237425a20497127bef2d1a 100644 (file)
@@ -589,6 +589,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      rbi->dma_addr)) {
                                        dev_kfree_skb_any(rbi->skb);
+                                       rbi->skb = NULL;
                                        rq->stats.rx_buf_alloc_failure++;
                                        break;
                                }
@@ -613,6 +614,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      rbi->dma_addr)) {
                                        put_page(rbi->page);
+                                       rbi->page = NULL;
                                        rq->stats.rx_buf_alloc_failure++;
                                        break;
                                }
@@ -1666,6 +1668,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
        u32 i, ring_idx;
        struct Vmxnet3_RxDesc *rxd;
 
+       /* ring has already been cleaned up */
+       if (!rq->rx_ring[0].base)
+               return;
+
        for (ring_idx = 0; ring_idx < 2; ring_idx++) {
                for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
 #ifdef __BIG_ENDIAN_BITFIELD
index 71eb7d04c3bf22f320358a0962a720f557ec0c46..90a5df1fbdbd27219dd30f802b9e8768e9cb13ba 100644 (file)
@@ -1288,6 +1288,7 @@ static void ath11k_core_restart(struct work_struct *work)
 
                ieee80211_stop_queues(ar->hw);
                ath11k_mac_drain_tx(ar);
+               complete(&ar->completed_11d_scan);
                complete(&ar->scan.started);
                complete(&ar->scan.completed);
                complete(&ar->peer_assoc_done);
index c0228e91a596b1f486efe04f0d1cc7548675205f..b8634eddf49aa3c1fc6026f1795e48d904d1bf47 100644 (file)
@@ -38,6 +38,8 @@
 
 extern unsigned int ath11k_frame_mode;
 
+#define ATH11K_SCAN_TIMEOUT_HZ (20 * HZ)
+
 #define ATH11K_MON_TIMER_INTERVAL  10
 
 enum ath11k_supported_bw {
@@ -189,6 +191,12 @@ enum ath11k_scan_state {
        ATH11K_SCAN_ABORTING,
 };
 
+enum ath11k_11d_state {
+       ATH11K_11D_IDLE,
+       ATH11K_11D_PREPARING,
+       ATH11K_11D_RUNNING,
+};
+
 enum ath11k_dev_flags {
        ATH11K_CAC_RUNNING,
        ATH11K_FLAG_CORE_REGISTERED,
@@ -607,9 +615,8 @@ struct ath11k {
        bool dfs_block_radar_events;
        struct ath11k_thermal thermal;
        u32 vdev_id_11d_scan;
-       struct completion finish_11d_scan;
-       struct completion finish_11d_ch_list;
-       bool pending_11d;
+       struct completion completed_11d_scan;
+       enum ath11k_11d_state state_11d;
        bool regdom_set_by_user;
        int hw_rate_code;
        u8 twt_enabled;
index e6b34b0d61bd3919532419f9cf2ab122197fac94..58ff761393db19803acbe8dd640c797c30bb6d68 100644 (file)
@@ -3601,26 +3601,6 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw,
        if (ret)
                goto exit;
 
-       /* Currently the pending_11d=true only happened 1 time while
-        * wlan interface up in ath11k_mac_11d_scan_start(), it is called by
-        * ath11k_mac_op_add_interface(), after wlan interface up,
-        * pending_11d=false always.
-        * If remove below wait, it always happened scan fail and lead connect
-        * fail while wlan interface up, because it has a 11d scan which is running
-        * in firmware, and lead this scan failed.
-        */
-       if (ar->pending_11d) {
-               long time_left;
-               unsigned long timeout = 5 * HZ;
-
-               if (ar->supports_6ghz)
-                       timeout += 5 * HZ;
-
-               time_left = wait_for_completion_timeout(&ar->finish_11d_ch_list, timeout);
-               ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
-                          "mac wait 11d channel list time left %ld\n", time_left);
-       }
-
        memset(&arg, 0, sizeof(arg));
        ath11k_wmi_start_scan_init(ar, &arg);
        arg.vdev_id = arvif->vdev_id;
@@ -3686,6 +3666,10 @@ exit:
                kfree(arg.extraie.ptr);
 
        mutex_unlock(&ar->conf_mutex);
+
+       if (ar->state_11d == ATH11K_11D_PREPARING)
+               ath11k_mac_11d_scan_start(ar, arvif->vdev_id);
+
        return ret;
 }
 
@@ -5814,7 +5798,7 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw)
 
        /* TODO: Do we need to enable ANI? */
 
-       ath11k_reg_update_chan_list(ar);
+       ath11k_reg_update_chan_list(ar, false);
 
        ar->num_started_vdevs = 0;
        ar->num_created_vdevs = 0;
@@ -5881,6 +5865,11 @@ static void ath11k_mac_op_stop(struct ieee80211_hw *hw)
        cancel_work_sync(&ar->ab->update_11d_work);
        cancel_work_sync(&ar->ab->rfkill_work);
 
+       if (ar->state_11d == ATH11K_11D_PREPARING) {
+               ar->state_11d = ATH11K_11D_IDLE;
+               complete(&ar->completed_11d_scan);
+       }
+
        spin_lock_bh(&ar->data_lock);
        list_for_each_entry_safe(ppdu_stats, tmp, &ar->ppdu_stats_info, list) {
                list_del(&ppdu_stats->list);
@@ -6051,7 +6040,7 @@ static bool ath11k_mac_vif_ap_active_any(struct ath11k_base *ab)
        return false;
 }
 
-void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait)
+void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id)
 {
        struct wmi_11d_scan_start_params param;
        int ret;
@@ -6079,28 +6068,22 @@ void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait)
 
        ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "mac start 11d scan\n");
 
-       if (wait)
-               reinit_completion(&ar->finish_11d_scan);
-
        ret = ath11k_wmi_send_11d_scan_start_cmd(ar, &param);
        if (ret) {
                ath11k_warn(ar->ab, "failed to start 11d scan vdev %d ret: %d\n",
                            vdev_id, ret);
        } else {
                ar->vdev_id_11d_scan = vdev_id;
-               if (wait) {
-                       ar->pending_11d = true;
-                       ret = wait_for_completion_timeout(&ar->finish_11d_scan,
-                                                         5 * HZ);
-                       ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
-                                  "mac 11d scan left time %d\n", ret);
-
-                       if (!ret)
-                               ar->pending_11d = false;
-               }
+               if (ar->state_11d == ATH11K_11D_PREPARING)
+                       ar->state_11d = ATH11K_11D_RUNNING;
        }
 
 fin:
+       if (ar->state_11d == ATH11K_11D_PREPARING) {
+               ar->state_11d = ATH11K_11D_IDLE;
+               complete(&ar->completed_11d_scan);
+       }
+
        mutex_unlock(&ar->ab->vdev_id_11d_lock);
 }
 
@@ -6123,12 +6106,15 @@ void ath11k_mac_11d_scan_stop(struct ath11k *ar)
                vdev_id = ar->vdev_id_11d_scan;
 
                ret = ath11k_wmi_send_11d_scan_stop_cmd(ar, vdev_id);
-               if (ret)
+               if (ret) {
                        ath11k_warn(ar->ab,
                                    "failed to stopt 11d scan vdev %d ret: %d\n",
                                    vdev_id, ret);
-               else
+               } else {
                        ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID;
+                       ar->state_11d = ATH11K_11D_IDLE;
+                       complete(&ar->completed_11d_scan);
+               }
        }
        mutex_unlock(&ar->ab->vdev_id_11d_lock);
 }
@@ -6324,8 +6310,10 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
                        goto err_peer_del;
                }
 
-               ath11k_mac_11d_scan_start(ar, arvif->vdev_id, true);
-
+               if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map)) {
+                       reinit_completion(&ar->completed_11d_scan);
+                       ar->state_11d = ATH11K_11D_PREPARING;
+               }
                break;
        case WMI_VDEV_TYPE_MONITOR:
                set_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags);
@@ -7190,7 +7178,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
        }
 
        if (arvif->vdev_type == WMI_VDEV_TYPE_STA)
-               ath11k_mac_11d_scan_start(ar, arvif->vdev_id, false);
+               ath11k_mac_11d_scan_start(ar, arvif->vdev_id);
 
        mutex_unlock(&ar->conf_mutex);
 }
@@ -8671,8 +8659,7 @@ int ath11k_mac_allocate(struct ath11k_base *ab)
                ar->monitor_vdev_id = -1;
                clear_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags);
                ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID;
-               init_completion(&ar->finish_11d_scan);
-               init_completion(&ar->finish_11d_ch_list);
+               init_completion(&ar->completed_11d_scan);
        }
 
        return 0;
index 0e6c870b09c887679eff6832f2efc1185ff69b5c..29b523af66dd2d731983001665ce1d767676e2ed 100644 (file)
@@ -130,7 +130,7 @@ extern const struct htt_rx_ring_tlv_filter ath11k_mac_mon_status_filter_default;
 #define ATH11K_SCAN_11D_INTERVAL               600000
 #define ATH11K_11D_INVALID_VDEV_ID             0xFFFF
 
-void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait);
+void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id);
 void ath11k_mac_11d_scan_stop(struct ath11k *ar);
 void ath11k_mac_11d_scan_stop_all(struct ath11k_base *ab);
 
index 81e11cde31d7b24c5470c94837ad88355cc30779..80a6977713932c18811f2258940cd0a6fc68c489 100644 (file)
@@ -102,7 +102,7 @@ ath11k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request)
        ar->regdom_set_by_user = true;
 }
 
-int ath11k_reg_update_chan_list(struct ath11k *ar)
+int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait)
 {
        struct ieee80211_supported_band **bands;
        struct scan_chan_list_params *params;
@@ -111,7 +111,32 @@ int ath11k_reg_update_chan_list(struct ath11k *ar)
        struct channel_param *ch;
        enum nl80211_band band;
        int num_channels = 0;
-       int i, ret;
+       int i, ret, left;
+
+       if (wait && ar->state_11d != ATH11K_11D_IDLE) {
+               left = wait_for_completion_timeout(&ar->completed_11d_scan,
+                                                  ATH11K_SCAN_TIMEOUT_HZ);
+               if (!left) {
+                       ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                                  "failed to receive 11d scan complete: timed out\n");
+                       ar->state_11d = ATH11K_11D_IDLE;
+               }
+               ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                          "reg 11d scan wait left time %d\n", left);
+       }
+
+       if (wait &&
+           (ar->scan.state == ATH11K_SCAN_STARTING ||
+           ar->scan.state == ATH11K_SCAN_RUNNING)) {
+               left = wait_for_completion_timeout(&ar->scan.completed,
+                                                  ATH11K_SCAN_TIMEOUT_HZ);
+               if (!left)
+                       ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                                  "failed to receive hw scan complete: timed out\n");
+
+               ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                          "reg hw scan wait left time %d\n", left);
+       }
 
        bands = hw->wiphy->bands;
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
@@ -193,11 +218,6 @@ int ath11k_reg_update_chan_list(struct ath11k *ar)
        ret = ath11k_wmi_send_scan_chan_list_cmd(ar, params);
        kfree(params);
 
-       if (ar->pending_11d) {
-               complete(&ar->finish_11d_ch_list);
-               ar->pending_11d = false;
-       }
-
        return ret;
 }
 
@@ -263,15 +283,8 @@ int ath11k_regd_update(struct ath11k *ar)
                goto err;
        }
 
-       if (ar->pending_11d)
-               complete(&ar->finish_11d_scan);
-
        rtnl_lock();
        wiphy_lock(ar->hw->wiphy);
-
-       if (ar->pending_11d)
-               reinit_completion(&ar->finish_11d_ch_list);
-
        ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy);
        wiphy_unlock(ar->hw->wiphy);
        rtnl_unlock();
@@ -282,7 +295,7 @@ int ath11k_regd_update(struct ath11k *ar)
                goto err;
 
        if (ar->state == ATH11K_STATE_ON) {
-               ret = ath11k_reg_update_chan_list(ar);
+               ret = ath11k_reg_update_chan_list(ar, true);
                if (ret)
                        goto err;
        }
index 5fb9dc03a74e82a2a76048ca35ee236b950f895b..2f284f26378d1f6df1c0afd957bf844b2cdd9126 100644 (file)
@@ -32,5 +32,5 @@ struct ieee80211_regdomain *
 ath11k_reg_build_regd(struct ath11k_base *ab,
                      struct cur_regulatory_info *reg_info, bool intersect);
 int ath11k_regd_update(struct ath11k *ar);
-int ath11k_reg_update_chan_list(struct ath11k *ar);
+int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait);
 #endif
index b4f86c45d81f8a30d72d53a9060add05300ff220..2751fe8814df79e79ae4419f04e329702b64ce1c 100644 (file)
@@ -2015,7 +2015,10 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar,
 {
        /* setup commonly used values */
        arg->scan_req_id = 1;
-       arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
+       if (ar->state_11d == ATH11K_11D_PREPARING)
+               arg->scan_priority = WMI_SCAN_PRIORITY_MEDIUM;
+       else
+               arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
        arg->dwell_time_active = 50;
        arg->dwell_time_active_2g = 0;
        arg->dwell_time_passive = 150;
@@ -6350,8 +6353,10 @@ static void ath11k_wmi_op_ep_tx_credits(struct ath11k_base *ab)
 static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *skb)
 {
        const struct wmi_11d_new_cc_ev *ev;
+       struct ath11k *ar;
+       struct ath11k_pdev *pdev;
        const void **tb;
-       int ret;
+       int ret, i;
 
        tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
        if (IS_ERR(tb)) {
@@ -6377,6 +6382,13 @@ static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *s
 
        kfree(tb);
 
+       for (i = 0; i < ab->num_radios; i++) {
+               pdev = &ab->pdevs[i];
+               ar = pdev->ar;
+               ar->state_11d = ATH11K_11D_IDLE;
+               complete(&ar->completed_11d_scan);
+       }
+
        queue_work(ab->workqueue, &ab->update_11d_work);
 
        return 0;
index 866a33f49915f16b7a4f32187ba654e03b33fb26..3237d4b528b5d9ed74ce74ce7c1b3a237d848fc1 100644 (file)
@@ -371,7 +371,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans)
        struct iwl_dbg_tlv_timer_node *node, *tmp;
 
        list_for_each_entry_safe(node, tmp, timer_list, list) {
-               del_timer(&node->timer);
+               del_timer_sync(&node->timer);
                list_del(&node->list);
                kfree(node);
        }
index 28bfa7b7b73c09ab0e6cad98b9587fc7b4bea39f..e9ec63e0e395ba0615092cf7f28eb268604188c3 100644 (file)
@@ -2202,11 +2202,14 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw,
        if (!data->use_chanctx) {
                confbw = data->bw;
        } else {
-               struct ieee80211_chanctx_conf *chanctx_conf =
-                       rcu_dereference(vif->chanctx_conf);
+               struct ieee80211_chanctx_conf *chanctx_conf;
+
+               rcu_read_lock();
+               chanctx_conf = rcu_dereference(vif->chanctx_conf);
 
                if (!WARN_ON(!chanctx_conf))
                        confbw = chanctx_conf->def.width;
+               rcu_read_unlock();
        }
 
        WARN(bw > hwsim_get_chanwidth(confbw),
@@ -2475,11 +2478,13 @@ static void hw_scan_work(struct work_struct *work)
                        if (req->ie_len)
                                skb_put_data(probe, req->ie, req->ie_len);
 
+                       rcu_read_lock();
                        if (!ieee80211_tx_prepare_skb(hwsim->hw,
                                                      hwsim->hw_scan_vif,
                                                      probe,
                                                      hwsim->tmp_chan->band,
                                                      NULL)) {
+                               rcu_read_unlock();
                                kfree_skb(probe);
                                continue;
                        }
@@ -2487,6 +2492,7 @@ static void hw_scan_work(struct work_struct *work)
                        local_bh_disable();
                        mac80211_hwsim_tx_frame(hwsim->hw, probe,
                                                hwsim->tmp_chan);
+                       rcu_read_unlock();
                        local_bh_enable();
                }
        }
index e2b4a1893a1321adaa31a92467afe6d7650cb6b5..65ab907aca5ad9e9a7ec1cd67469e7dab333617c 100644 (file)
@@ -78,8 +78,6 @@ struct netfront_cb {
 
 #define RX_COPY_THRESHOLD 256
 
-#define GRANT_INVALID_REF      0
-
 #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
 #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
 
@@ -224,7 +222,7 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 {
        int i = xennet_rxidx(ri);
        grant_ref_t ref = queue->grant_rx_ref[i];
-       queue->grant_rx_ref[i] = GRANT_INVALID_REF;
+       queue->grant_rx_ref[i] = INVALID_GRANT_REF;
        return ref;
 }
 
@@ -432,7 +430,7 @@ static bool xennet_tx_buf_gc(struct netfront_queue *queue)
                        }
                        gnttab_release_grant_reference(
                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
-                       queue->grant_tx_ref[id] = GRANT_INVALID_REF;
+                       queue->grant_tx_ref[id] = INVALID_GRANT_REF;
                        queue->grant_tx_page[id] = NULL;
                        add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
                        dev_kfree_skb_irq(skb);
@@ -868,7 +866,7 @@ static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
 
        spin_lock_irqsave(&queue->rx_cons_lock, flags);
        queue->rx.rsp_cons = val;
-       queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
+       queue->rx_rsp_unconsumed = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
        spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
 }
 
@@ -1021,7 +1019,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
                 * the backend driver. In future this should flag the bad
                 * situation to the system controller to reboot the backend.
                 */
-               if (ref == GRANT_INVALID_REF) {
+               if (ref == INVALID_GRANT_REF) {
                        if (net_ratelimit())
                                dev_warn(dev, "Bad rx response id %d.\n",
                                         rx->id);
@@ -1390,7 +1388,7 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue)
                gnttab_end_foreign_access(queue->grant_tx_ref[i],
                                          (unsigned long)page_address(queue->grant_tx_page[i]));
                queue->grant_tx_page[i] = NULL;
-               queue->grant_tx_ref[i] = GRANT_INVALID_REF;
+               queue->grant_tx_ref[i] = INVALID_GRANT_REF;
                add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
                dev_kfree_skb_irq(skb);
        }
@@ -1411,7 +1409,7 @@ static void xennet_release_rx_bufs(struct netfront_queue *queue)
                        continue;
 
                ref = queue->grant_rx_ref[id];
-               if (ref == GRANT_INVALID_REF)
+               if (ref == INVALID_GRANT_REF)
                        continue;
 
                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
@@ -1422,7 +1420,7 @@ static void xennet_release_rx_bufs(struct netfront_queue *queue)
                get_page(page);
                gnttab_end_foreign_access(ref,
                                          (unsigned long)page_address(page));
-               queue->grant_rx_ref[id] = GRANT_INVALID_REF;
+               queue->grant_rx_ref[id] = INVALID_GRANT_REF;
 
                kfree_skb(skb);
        }
@@ -1500,7 +1498,7 @@ static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
                return false;
 
        spin_lock_irqsave(&queue->rx_cons_lock, flags);
-       work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
+       work_queued = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
        if (work_queued > queue->rx_rsp_unconsumed) {
                queue->rx_rsp_unconsumed = work_queued;
                *eoi = 0;
@@ -1761,7 +1759,7 @@ static int netfront_probe(struct xenbus_device *dev,
 static void xennet_end_access(int ref, void *page)
 {
        /* This frees the page as a side-effect */
-       if (ref != GRANT_INVALID_REF)
+       if (ref != INVALID_GRANT_REF)
                gnttab_end_foreign_access(ref, (unsigned long)page);
 }
 
@@ -1798,8 +1796,8 @@ static void xennet_disconnect_backend(struct netfront_info *info)
                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
 
-               queue->tx_ring_ref = GRANT_INVALID_REF;
-               queue->rx_ring_ref = GRANT_INVALID_REF;
+               queue->tx_ring_ref = INVALID_GRANT_REF;
+               queue->rx_ring_ref = INVALID_GRANT_REF;
                queue->tx.sring = NULL;
                queue->rx.sring = NULL;
 
@@ -1923,42 +1921,27 @@ static int setup_netfront(struct xenbus_device *dev,
                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
 {
        struct xen_netif_tx_sring *txs;
-       struct xen_netif_rx_sring *rxs = NULL;
-       grant_ref_t gref;
+       struct xen_netif_rx_sring *rxs;
        int err;
 
-       queue->tx_ring_ref = GRANT_INVALID_REF;
-       queue->rx_ring_ref = GRANT_INVALID_REF;
+       queue->tx_ring_ref = INVALID_GRANT_REF;
+       queue->rx_ring_ref = INVALID_GRANT_REF;
        queue->rx.sring = NULL;
        queue->tx.sring = NULL;
 
-       txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
-       if (!txs) {
-               err = -ENOMEM;
-               xenbus_dev_fatal(dev, err, "allocating tx ring page");
+       err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&txs,
+                               1, &queue->tx_ring_ref);
+       if (err)
                goto fail;
-       }
-       SHARED_RING_INIT(txs);
-       FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
 
-       err = xenbus_grant_ring(dev, txs, 1, &gref);
-       if (err < 0)
-               goto fail;
-       queue->tx_ring_ref = gref;
+       XEN_FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
 
-       rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
-       if (!rxs) {
-               err = -ENOMEM;
-               xenbus_dev_fatal(dev, err, "allocating rx ring page");
+       err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&rxs,
+                               1, &queue->rx_ring_ref);
+       if (err)
                goto fail;
-       }
-       SHARED_RING_INIT(rxs);
-       FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
 
-       err = xenbus_grant_ring(dev, rxs, 1, &gref);
-       if (err < 0)
-               goto fail;
-       queue->rx_ring_ref = gref;
+       XEN_FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
 
        if (feature_split_evtchn)
                err = setup_netfront_split(queue);
@@ -1974,24 +1957,10 @@ static int setup_netfront(struct xenbus_device *dev,
 
        return 0;
 
-       /* If we fail to setup netfront, it is safe to just revoke access to
-        * granted pages because backend is not accessing it at this point.
-        */
  fail:
-       if (queue->rx_ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(queue->rx_ring_ref,
-                                         (unsigned long)rxs);
-               queue->rx_ring_ref = GRANT_INVALID_REF;
-       } else {
-               free_page((unsigned long)rxs);
-       }
-       if (queue->tx_ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(queue->tx_ring_ref,
-                                         (unsigned long)txs);
-               queue->tx_ring_ref = GRANT_INVALID_REF;
-       } else {
-               free_page((unsigned long)txs);
-       }
+       xenbus_teardown_ring((void **)&queue->rx.sring, 1, &queue->rx_ring_ref);
+       xenbus_teardown_ring((void **)&queue->tx.sring, 1, &queue->tx_ring_ref);
+
        return err;
 }
 
@@ -2020,7 +1989,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
        queue->tx_pend_queue = TX_LINK_NONE;
        for (i = 0; i < NET_TX_RING_SIZE; i++) {
                queue->tx_link[i] = i + 1;
-               queue->grant_tx_ref[i] = GRANT_INVALID_REF;
+               queue->grant_tx_ref[i] = INVALID_GRANT_REF;
                queue->grant_tx_page[i] = NULL;
        }
        queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
@@ -2028,7 +1997,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
        /* Clear out rx_skbs */
        for (i = 0; i < NET_RX_RING_SIZE; i++) {
                queue->rx_skbs[i] = NULL;
-               queue->grant_rx_ref[i] = GRANT_INVALID_REF;
+               queue->grant_rx_ref[i] = INVALID_GRANT_REF;
        }
 
        /* A grant for every tx ring slot */
index a491db46e3bd468e0c835b6c706a1252ea110987..d9f6367b9993dd4b1880cce6625b8f6e2d985af1 100644 (file)
@@ -2787,13 +2787,14 @@ void pn53x_common_clean(struct pn533 *priv)
 {
        struct pn533_cmd *cmd, *n;
 
+       /* delete the timer before cleanup the worker */
+       del_timer_sync(&priv->listen_timer);
+
        flush_delayed_work(&priv->poll_work);
        destroy_workqueue(priv->wq);
 
        skb_queue_purge(&priv->resp_q);
 
-       del_timer(&priv->listen_timer);
-
        list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) {
                list_del(&cmd->queue);
                kfree(cmd);
index 7d49eb34b348e73fc4552107ae21f1ad39beb9c2..4910543f00ff969515e10c2c8d5d1af378804d1d 100644 (file)
@@ -4,7 +4,6 @@
  * Copyright (c) 2022, Oracle and/or its affiliates
  */
 
-#include <linux/blkdev.h>
 #include "nvme.h"
 
 #ifdef CONFIG_NVME_VERBOSE_ERRORS
@@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = {
        [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
        [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
        [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
+       [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
        [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
        [NVME_SC_LBA_RANGE] = "LBA Out of Range",
        [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
@@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = {
        [NVME_SC_COMPARE_FAILED] = "Compare Failure",
        [NVME_SC_ACCESS_DENIED] = "Access Denied",
        [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
+       [NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error",
        [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
        [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
        [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
+       [NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error",
        [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
+       [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
 };
 
 const unsigned char *nvme_get_error_status_str(u16 status)
index e1846d04817f373d89c4e1cf28095ac9eb6346e8..72f7c955c7078548403c49471ead75e21c8c8ab4 100644 (file)
@@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
 
        rq->timeout = ctrl->kato * HZ;
        rq->end_io_data = ctrl;
+       rq->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
 }
 
@@ -1426,6 +1427,32 @@ out_free_id:
        return error;
 }
 
+static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
+                       struct nvme_id_ns_cs_indep **id)
+{
+       struct nvme_command c = {
+               .identify.opcode        = nvme_admin_identify,
+               .identify.nsid          = cpu_to_le32(nsid),
+               .identify.cns           = NVME_ID_CNS_NS_CS_INDEP,
+       };
+       int ret;
+
+       *id = kmalloc(sizeof(**id), GFP_KERNEL);
+       if (!*id)
+               return -ENOMEM;
+
+       ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
+       if (ret) {
+               dev_warn(ctrl->device,
+                        "Identify namespace (CS independent) failed (%d)\n",
+                        ret);
+               kfree(*id);
+               return ret;
+       }
+
+       return 0;
+}
+
 static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
                unsigned int dword11, void *buffer, size_t buflen, u32 *result)
 {
@@ -1621,20 +1648,22 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
        u32 size = queue_logical_block_size(queue);
 
        if (ctrl->max_discard_sectors == 0) {
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
+               blk_queue_max_discard_sectors(queue, 0);
                return;
        }
 
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       queue->limits.discard_alignment = 0;
        queue->limits.discard_granularity = size;
 
        /* If discard is already enabled, don't reset queue limits */
-       if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
+       if (queue->limits.max_discard_sectors)
                return;
 
+       if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
+               ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
+
        blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
        blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
 
@@ -1771,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
        blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
-       blk_queue_dma_alignment(q, 7);
+       blk_queue_dma_alignment(q, 3);
        blk_queue_write_cache(q, vwc, vwc);
 }
 
@@ -2100,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
        .pr_ops         = &nvme_pr_ops,
 };
 
-static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
 {
-       unsigned long timeout =
-               ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+       unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
        u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
        int ret;
 
@@ -2116,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
                usleep_range(1000, 2000);
                if (fatal_signal_pending(current))
                        return -EINTR;
-               if (time_after(jiffies, timeout)) {
+               if (time_after(jiffies, timeout_jiffies)) {
                        dev_err(ctrl->device,
                                "Device not ready; aborting %s, CSTS=0x%x\n",
                                enabled ? "initialisation" : "reset", csts);
@@ -2147,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
        if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
                msleep(NVME_QUIRK_DELAY_AMOUNT);
 
-       return nvme_wait_ready(ctrl, ctrl->cap, false);
+       return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
 }
 EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
 
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 {
        unsigned dev_page_min;
+       u32 timeout;
        int ret;
 
        ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
@@ -2174,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
                ctrl->ctrl_config = NVME_CC_CSS_CSI;
        else
                ctrl->ctrl_config = NVME_CC_CSS_NVM;
+
+       if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
+               u32 crto;
+
+               ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
+               if (ret) {
+                       dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
+                               ret);
+                       return ret;
+               }
+
+               if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
+                       ctrl->ctrl_config |= NVME_CC_CRIME;
+                       timeout = NVME_CRTO_CRIMT(crto);
+               } else {
+                       timeout = NVME_CRTO_CRWMT(crto);
+               }
+       } else {
+               timeout = NVME_CAP_TIMEOUT(ctrl->cap);
+       }
+
        ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
        ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
        ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
@@ -2182,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
        ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
        if (ret)
                return ret;
-       return nvme_wait_ready(ctrl, ctrl->cap, true);
+       return nvme_wait_ready(ctrl, timeout, true);
 }
 EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
@@ -2894,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
 
        if (id->dmrl)
                ctrl->max_discard_segments = id->dmrl;
-       if (id->dmrsl)
-               ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
+       ctrl->dmrsl = le32_to_cpu(id->dmrsl);
        if (id->wzsl)
                ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
 
@@ -3080,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
        if (ret)
                return ret;
 
-       ret = nvme_init_non_mdts_limits(ctrl);
-       if (ret < 0)
-               return ret;
-
        ret = nvme_configure_apst(ctrl);
        if (ret < 0)
                return ret;
@@ -3146,6 +3191,7 @@ static const struct file_operations nvme_dev_fops = {
        .release        = nvme_dev_release,
        .unlocked_ioctl = nvme_dev_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = nvme_dev_uring_cmd,
 };
 
 static ssize_t nvme_sysfs_reset(struct device *dev,
@@ -3699,6 +3745,7 @@ static const struct file_operations nvme_ns_chr_fops = {
        .release        = nvme_ns_chr_release,
        .unlocked_ioctl = nvme_ns_chr_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = nvme_ns_chr_uring_cmd,
 };
 
 static int nvme_add_ns_cdev(struct nvme_ns *ns)
@@ -4090,11 +4137,26 @@ out:
 static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
        struct nvme_ns_ids ids = { };
+       struct nvme_id_ns_cs_indep *id;
        struct nvme_ns *ns;
+       bool ready = true;
 
        if (nvme_identify_ns_descs(ctrl, nsid, &ids))
                return;
 
+       /*
+        * Check if the namespace is ready.  If not ignore it, we will get an
+        * AEN once it becomes ready and restart the scan.
+        */
+       if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
+           !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
+               ready = id->nstat & NVME_NSTAT_NRDY;
+               kfree(id);
+       }
+
+       if (!ready)
+               return;
+
        ns = nvme_find_get_ns(ctrl, nsid);
        if (ns) {
                nvme_validate_ns(ns, &ids);
@@ -4237,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work)
 {
        struct nvme_ctrl *ctrl =
                container_of(work, struct nvme_ctrl, scan_work);
+       int ret;
 
        /* No tagset on a live ctrl means IO queues could not created */
        if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
                return;
 
+       /*
+        * Identify controller limits can change at controller reset due to
+        * new firmware download, even though it is not common we cannot ignore
+        * such scenario. Controller's non-mdts limits are reported in the unit
+        * of logical blocks that is dependent on the format of attached
+        * namespace. Hence re-read the limits at the time of ns allocation.
+        */
+       ret = nvme_init_non_mdts_limits(ctrl);
+       if (ret < 0) {
+               dev_warn(ctrl->device,
+                       "reading non-mdts-limits failed: %d\n", ret);
+               return;
+       }
+
        if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
                dev_info(ctrl->device, "rescanning namespaces.\n");
                nvme_clear_changed_ns_log(ctrl);
@@ -4839,6 +4916,8 @@ static inline void _nvme_check_size(void)
        BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
        BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
+       BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
+                       NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
index 1e3a09cad96113971b78c8ecda5b4a5d7c4ffb8a..46d6e194ac2be5d886f8f9a47e28be69c1295326 100644 (file)
@@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
        return ctrl->subsys->subnqn;
 }
 
+static inline void nvmf_complete_timed_out_request(struct request *rq)
+{
+       if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
+               nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+               blk_mq_complete_request(rq);
+       }
+}
+
 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
index 080f85f4105f3dc7a1d9215175727414ef9ed8f5..7ae72c7a211b975ecba3009cb337211dbacbee5d 100644 (file)
@@ -3831,6 +3831,9 @@ process_local_list:
        return count;
 }
 
+static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
+
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
 /* Parse the cgroup id from a buf and return the length of cgrpid */
 static int fc_parse_cgrpid(const char *buf, u64 *id)
 {
@@ -3854,12 +3857,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id)
 }
 
 /*
- * fc_update_appid: Parse and update the appid in the blkcg associated with
- * cgroupid.
- * @buf: buf contains both cgrpid and appid info
- * @count: size of the buffer
+ * Parse and update the appid in the blkcg associated with the cgroupid.
  */
-static int fc_update_appid(const char *buf, size_t count)
+static ssize_t fc_appid_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
 {
        u64 cgrp_id;
        int appid_len = 0;
@@ -3887,23 +3888,14 @@ static int fc_update_appid(const char *buf, size_t count)
                return ret;
        return count;
 }
-
-static ssize_t fc_appid_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t count)
-{
-       int ret  = 0;
-
-       ret = fc_update_appid(buf, count);
-       if (ret < 0)
-               return -EINVAL;
-       return count;
-}
-static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
 static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
+#endif /* CONFIG_BLK_CGROUP_FC_APPID */
 
 static struct attribute *nvme_fc_attrs[] = {
        &dev_attr_nvme_discovery.attr,
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
        &dev_attr_appid_store.attr,
+#endif
        NULL
 };
 
index 554566371ffa49cdc5629bd27a8dc5dd25318ccb..096b1b47d750e93744fdd26622e52f0d639ff10e 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include <linux/ptrace.h>      /* for force_successful_syscall_return */
 #include <linux/nvme_ioctl.h>
+#include <linux/io_uring.h>
 #include "nvme.h"
 
 /*
@@ -53,10 +54,21 @@ out:
        return ERR_PTR(ret);
 }
 
-static int nvme_submit_user_cmd(struct request_queue *q,
+static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
+               void *meta, unsigned len, int ret)
+{
+       if (!ret && req_op(req) == REQ_OP_DRV_IN &&
+           copy_to_user(ubuf, meta, len))
+               ret = -EFAULT;
+       kfree(meta);
+       return ret;
+}
+
+static struct request *nvme_alloc_user_request(struct request_queue *q,
                struct nvme_command *cmd, void __user *ubuffer,
                unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
-               u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+               u32 meta_seed, void **metap, unsigned timeout, bool vec,
+               unsigned int rq_flags, blk_mq_req_flags_t blk_flags)
 {
        bool write = nvme_is_write(cmd);
        struct nvme_ns *ns = q->queuedata;
@@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
        void *meta = NULL;
        int ret;
 
-       req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
+       req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
        if (IS_ERR(req))
-               return PTR_ERR(req);
+               return req;
        nvme_init_request(req, cmd);
 
        if (timeout)
@@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q,
                                goto out_unmap;
                        }
                        req->cmd_flags |= REQ_INTEGRITY;
+                       *metap = meta;
                }
        }
 
+       return req;
+
+out_unmap:
+       if (bio)
+               blk_rq_unmap_user(bio);
+out:
+       blk_mq_free_request(req);
+       return ERR_PTR(ret);
+}
+
+static int nvme_submit_user_cmd(struct request_queue *q,
+               struct nvme_command *cmd, void __user *ubuffer,
+               unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
+               u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+{
+       struct request *req;
+       void *meta = NULL;
+       struct bio *bio;
+       int ret;
+
+       req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer,
+                       meta_len, meta_seed, &meta, timeout, vec, 0, 0);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       bio = req->bio;
+
        ret = nvme_execute_passthru_rq(req);
+
        if (result)
                *result = le64_to_cpu(nvme_req(req)->result.u64);
-       if (meta && !ret && !write) {
-               if (copy_to_user(meta_buffer, meta, meta_len))
-                       ret = -EFAULT;
-       }
-       kfree(meta);
- out_unmap:
+       if (meta)
+               ret = nvme_finish_user_metadata(req, meta_buffer, meta,
+                                               meta_len, ret);
        if (bio)
                blk_rq_unmap_user(bio);
- out:
        blk_mq_free_request(req);
        return ret;
 }
 
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
        struct nvme_user_io io;
@@ -296,6 +332,139 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        return status;
 }
 
+struct nvme_uring_data {
+       __u64   metadata;
+       __u64   addr;
+       __u32   data_len;
+       __u32   metadata_len;
+       __u32   timeout_ms;
+};
+
+/*
+ * This overlays struct io_uring_cmd pdu.
+ * Expect build errors if this grows larger than that.
+ */
+struct nvme_uring_cmd_pdu {
+       union {
+               struct bio *bio;
+               struct request *req;
+       };
+       void *meta; /* kernel-resident buffer */
+       void __user *meta_buffer;
+       u32 meta_len;
+};
+
+static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
+               struct io_uring_cmd *ioucmd)
+{
+       return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
+}
+
+static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
+{
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+       struct request *req = pdu->req;
+       struct bio *bio = req->bio;
+       int status;
+       u64 result;
+
+       if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+               status = -EINTR;
+       else
+               status = nvme_req(req)->status;
+
+       result = le64_to_cpu(nvme_req(req)->result.u64);
+
+       if (pdu->meta)
+               status = nvme_finish_user_metadata(req, pdu->meta_buffer,
+                                       pdu->meta, pdu->meta_len, status);
+       if (bio)
+               blk_rq_unmap_user(bio);
+       blk_mq_free_request(req);
+
+       io_uring_cmd_done(ioucmd, status, result);
+}
+
+static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err)
+{
+       struct io_uring_cmd *ioucmd = req->end_io_data;
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+       /* extract bio before reusing the same field for request */
+       struct bio *bio = pdu->bio;
+
+       pdu->req = req;
+       req->bio = bio;
+       /* this takes care of moving rest of completion-work to task context */
+       io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
+}
+
+static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+               struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
+{
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+       const struct nvme_uring_cmd *cmd = ioucmd->cmd;
+       struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
+       struct nvme_uring_data d;
+       struct nvme_command c;
+       struct request *req;
+       unsigned int rq_flags = 0;
+       blk_mq_req_flags_t blk_flags = 0;
+       void *meta = NULL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       c.common.opcode = READ_ONCE(cmd->opcode);
+       c.common.flags = READ_ONCE(cmd->flags);
+       if (c.common.flags)
+               return -EINVAL;
+
+       c.common.command_id = 0;
+       c.common.nsid = cpu_to_le32(cmd->nsid);
+       if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
+               return -EINVAL;
+
+       c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
+       c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
+       c.common.metadata = 0;
+       c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
+       c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
+       c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
+       c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
+       c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
+       c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
+       c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
+
+       d.metadata = READ_ONCE(cmd->metadata);
+       d.addr = READ_ONCE(cmd->addr);
+       d.data_len = READ_ONCE(cmd->data_len);
+       d.metadata_len = READ_ONCE(cmd->metadata_len);
+       d.timeout_ms = READ_ONCE(cmd->timeout_ms);
+
+       if (issue_flags & IO_URING_F_NONBLOCK) {
+               rq_flags = REQ_NOWAIT;
+               blk_flags = BLK_MQ_REQ_NOWAIT;
+       }
+
+       req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr),
+                       d.data_len, nvme_to_user_ptr(d.metadata),
+                       d.metadata_len, 0, &meta, d.timeout_ms ?
+                       msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags,
+                       blk_flags);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+       req->end_io_data = ioucmd;
+
+       /* to free bio on completion, as req->bio will be null at that time */
+       pdu->bio = req->bio;
+       pdu->meta = meta;
+       pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
+       pdu->meta_len = d.metadata_len;
+
+       blk_execute_rq_nowait(req, 0, nvme_uring_cmd_end_io);
+       return -EIOCBQUEUED;
+}
+
 static bool is_ctrl_ioctl(unsigned int cmd)
 {
        if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
@@ -387,6 +556,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return __nvme_ioctl(ns, cmd, (void __user *)arg);
 }
 
+static int nvme_uring_cmd_checks(unsigned int issue_flags)
+{
+       /* IOPOLL not supported yet */
+       if (issue_flags & IO_URING_F_IOPOLL)
+               return -EOPNOTSUPP;
+
+       /* NVMe passthrough requires big SQE/CQE support */
+       if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
+           (IO_URING_F_SQE128|IO_URING_F_CQE32))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
+                            unsigned int issue_flags)
+{
+       struct nvme_ctrl *ctrl = ns->ctrl;
+       int ret;
+
+       BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
+
+       ret = nvme_uring_cmd_checks(issue_flags);
+       if (ret)
+               return ret;
+
+       switch (ioucmd->cmd_op) {
+       case NVME_URING_CMD_IO:
+               ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false);
+               break;
+       case NVME_URING_CMD_IO_VEC:
+               ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true);
+               break;
+       default:
+               ret = -ENOTTY;
+       }
+
+       return ret;
+}
+
+int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+       struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev,
+                       struct nvme_ns, cdev);
+
+       return nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
+}
+
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
                void __user *argp, struct nvme_ns_head *head, int srcu_idx)
@@ -453,8 +669,46 @@ out_unlock:
        srcu_read_unlock(&head->srcu, srcu_idx);
        return ret;
 }
+
+int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags)
+{
+       struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+       struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
+       int srcu_idx = srcu_read_lock(&head->srcu);
+       struct nvme_ns *ns = nvme_find_path(head);
+       int ret = -EINVAL;
+
+       if (ns)
+               ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
+       srcu_read_unlock(&head->srcu, srcu_idx);
+       return ret;
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
+int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+       struct nvme_ctrl *ctrl = ioucmd->file->private_data;
+       int ret;
+
+       ret = nvme_uring_cmd_checks(issue_flags);
+       if (ret)
+               return ret;
+
+       switch (ioucmd->cmd_op) {
+       case NVME_URING_CMD_ADMIN:
+               ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false);
+               break;
+       case NVME_URING_CMD_ADMIN_VEC:
+               ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true);
+               break;
+       default:
+               ret = -ENOTTY;
+       }
+
+       return ret;
+}
+
 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
 {
        struct nvme_ns *ns;
index d464fdf978fbaa449cd353f8b4972bfd4d62f29c..d3e2440d8abb059b44dcab8c32a184e7710f0778 100644 (file)
@@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
        .release        = nvme_ns_head_chr_release,
        .unlocked_ioctl = nvme_ns_head_chr_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = nvme_ns_head_chr_uring_cmd,
 };
 
 static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
index a2b53ca6333590dd7e4cb754435761549a3b1105..9b72b6ecf33c9cfc66a5d45c36225b15781773b7 100644 (file)
@@ -284,6 +284,7 @@ struct nvme_ctrl {
 #endif
        u16 crdt[3];
        u16 oncs;
+       u32 dmrsl;
        u16 oacs;
        u16 sqsize;
        u32 max_namespaces;
@@ -782,7 +783,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
                unsigned long arg);
 long nvme_dev_ioctl(struct file *file, unsigned int cmd,
                unsigned long arg);
+int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags);
+int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags);
 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 
 extern const struct attribute_group *nvme_ns_id_attr_groups[];
 extern const struct pr_ops nvme_pr_ops;
index 3aacf1c0d5a5f8cfedb5a03e599f21dd61a0cb0b..5a98a7de09642d974f6c436182b0cc638005defc 100644 (file)
@@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        nvme_init_request(abort_req, &cmd);
 
        abort_req->end_io_data = NULL;
+       abort_req->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(abort_req, false, abort_endio);
 
        /*
@@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
                if (IS_ERR(dev->ctrl.admin_q)) {
                        blk_mq_free_tag_set(&dev->admin_tagset);
+                       dev->ctrl.admin_q = NULL;
                        return -ENOMEM;
                }
                if (!blk_get_queue(dev->ctrl.admin_q)) {
@@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
        req->end_io_data = nvmeq;
 
        init_completion(&nvmeq->delete_done);
+       req->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
                        nvme_del_cq_end : nvme_del_queue_end);
        return 0;
@@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
        mutex_lock(&dev->shutdown_lock);
-       if (pci_is_enabled(pdev)) {
+       if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
                u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
                if (dev->ctrl.state == NVME_CTRL_LIVE ||
index d9f19d90131398f8ab5a12395eb041ff34c0fdd1..b87c8ae41d9be892ded04839c72eb2602466faa9 100644 (file)
@@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
        struct nvme_rdma_queue *queue = req->queue;
 
        nvme_rdma_stop_queue(queue);
-       if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
-               nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
-               blk_mq_complete_request(rq);
-       }
+       nvmf_complete_timed_out_request(rq);
 }
 
 static enum blk_eh_timer_return
index ad3a2bf2f1e9b49088dfebde5768f10fe029d675..bb67538d241b657dec9ee4bac33029920c493d9d 100644 (file)
@@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
        struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
 
        nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
-       if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
-               nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
-               blk_mq_complete_request(rq);
-       }
+       nvmf_complete_timed_out_request(rq);
 }
 
 static enum blk_eh_timer_return
index d886c2c59554f69fdc438aa4596e6a26c01584e9..27a72504d31ce1c81493963d85c1342e05c12c33 100644 (file)
@@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
        ret = __blkdev_issue_discard(ns->bdev,
                        nvmet_lba_to_sect(ns, range->slba),
                        le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-                       GFP_KERNEL, 0, bio);
+                       GFP_KERNEL, bio);
        if (ret && ret != -EOPNOTSUPP) {
                req->error_slba = le64_to_cpu(range->slba);
                return errno_to_nvme_status(req, ret);
index e34718b095504db20c5922112ce687081f5523bd..82b61acf7a72bd7a052b4db760c1731059bfa042 100644 (file)
@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z,
 
 bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
 {
-       struct request_queue *q = ns->bdev->bd_disk->queue;
-       u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
+       u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
        struct gendisk *bd_disk = ns->bdev->bd_disk;
        int ret;
 
index ec315b060cd50d26a6a2e559f5427f79646f2726..2f248d0acc04830156adf4ea0207ae7cbc51a84a 100644 (file)
@@ -973,16 +973,24 @@ static void __init early_init_dt_check_for_elfcorehdr(unsigned long node)
 
 static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND;
 
+/*
+ * The main usage of linux,usable-memory-range is for crash dump kernel.
+ * Originally, the number of usable-memory regions is one. Now there may
+ * be two regions, low region and high region.
+ * To make compatibility with existing user-space and older kdump, the low
+ * region is always the last range of linux,usable-memory-range if exist.
+ */
+#define MAX_USABLE_RANGES              2
+
 /**
  * early_init_dt_check_for_usable_mem_range - Decode usable memory range
  * location from flat tree
  */
 void __init early_init_dt_check_for_usable_mem_range(void)
 {
-       const __be32 *prop;
-       int len;
-       phys_addr_t cap_mem_addr;
-       phys_addr_t cap_mem_size;
+       struct memblock_region rgn[MAX_USABLE_RANGES] = {0};
+       const __be32 *prop, *endp;
+       int len, i;
        unsigned long node = chosen_node_offset;
 
        if ((long)node < 0)
@@ -991,16 +999,21 @@ void __init early_init_dt_check_for_usable_mem_range(void)
        pr_debug("Looking for usable-memory-range property... ");
 
        prop = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
-       if (!prop || (len < (dt_root_addr_cells + dt_root_size_cells)))
+       if (!prop || (len % (dt_root_addr_cells + dt_root_size_cells)))
                return;
 
-       cap_mem_addr = dt_mem_next_cell(dt_root_addr_cells, &prop);
-       cap_mem_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+       endp = prop + (len / sizeof(__be32));
+       for (i = 0; i < MAX_USABLE_RANGES && prop < endp; i++) {
+               rgn[i].base = dt_mem_next_cell(dt_root_addr_cells, &prop);
+               rgn[i].size = dt_mem_next_cell(dt_root_size_cells, &prop);
 
-       pr_debug("cap_mem_start=%pa cap_mem_size=%pa\n", &cap_mem_addr,
-                &cap_mem_size);
+               pr_debug("cap_mem_regions[%d]: base=%pa, size=%pa\n",
+                        i, &rgn[i].base, &rgn[i].size);
+       }
 
-       memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
+       memblock_cap_memory_range(rgn[0].base, rgn[0].size);
+       for (i = 1; i < MAX_USABLE_RANGES && rgn[i].size; i++)
+               memblock_add(rgn[i].base, rgn[i].size);
 }
 
 #ifdef CONFIG_SERIAL_EARLYCON
index b9bd1cff179388c10705ac7cd817f241a8023082..8d374cc552be5f2805855d0953b27f81cba7156e 100644 (file)
@@ -386,6 +386,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
                                crashk_res.end - crashk_res.start + 1);
                if (ret)
                        goto out;
+
+               if (crashk_low_res.end) {
+                       ret = fdt_appendprop_addrrange(fdt, 0, chosen_node,
+                                       "linux,usable-memory-range",
+                                       crashk_low_res.start,
+                                       crashk_low_res.end - crashk_low_res.start + 1);
+                       if (ret)
+                               goto out;
+               }
        }
 
        /* add bootargs */
index a16b74f32aa9dfd0be4c4112932662d99752f6e8..55d62b82c650859eeabd75d9f9e7b76cc3dc435d 100644 (file)
@@ -509,6 +509,7 @@ EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
 #ifndef CONFIG_PPC
 static const struct of_device_id reserved_mem_matches[] = {
+       { .compatible = "phram" },
        { .compatible = "qcom,rmtfs-mem" },
        { .compatible = "qcom,cmd-db" },
        { .compatible = "qcom,smem" },
index 440ab5a03df9f810ef7b2c141b4337bb1f7616a6..485ea980bde7d65841297a2c99c0acb51f1011ab 100644 (file)
@@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
  * Returns 0 on success or a proper -EINVAL value in case of error.
  */
 static int __maybe_unused
-_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
+_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
 {
        struct dev_pm_opp *opp;
        unsigned long opp_freq, opp_power;
@@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
  * Returns -EINVAL if the power calculation failed because of missing
  * parameters, 0 otherwise.
  */
-static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
-                                    struct device *dev)
+static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
+                                    unsigned long *kHz)
 {
        struct dev_pm_opp *opp;
        struct device_node *np;
index 6ab90891801d83e29cd5b2a4ccd3f524c134ad04..816028c0f6edb0bea64c980d3573143388224d67 100644 (file)
@@ -1550,6 +1550,11 @@ static const struct qcom_pcie_cfg sc7280_cfg = {
        .pipe_clk_need_muxing = true,
 };
 
+static const struct qcom_pcie_cfg sc8180x_cfg = {
+       .ops = &ops_1_9_0,
+       .has_tbu_clk = true,
+};
+
 static const struct dw_pcie_ops dw_pcie_ops = {
        .link_up = qcom_pcie_link_up,
        .start_link = qcom_pcie_start_link,
@@ -1656,7 +1661,7 @@ static const struct of_device_id qcom_pcie_match[] = {
        { .compatible = "qcom,pcie-qcs404", .data = &ipq4019_cfg },
        { .compatible = "qcom,pcie-sdm845", .data = &sdm845_cfg },
        { .compatible = "qcom,pcie-sm8250", .data = &sm8250_cfg },
-       { .compatible = "qcom,pcie-sc8180x", .data = &sm8250_cfg },
+       { .compatible = "qcom,pcie-sc8180x", .data = &sc8180x_cfg },
        { .compatible = "qcom,pcie-sm8450-pcie0", .data = &sm8450_pcie0_cfg },
        { .compatible = "qcom,pcie-sm8450-pcie1", .data = &sm8450_pcie1_cfg },
        { .compatible = "qcom,pcie-sc7280", .data = &sc7280_cfg },
index 09d9bf465d727b29461ed9e4e5e1870fe798f189..ffec82c8a523fe52c756bf30fe4e8e4cbdb52794 100644 (file)
@@ -272,7 +272,6 @@ struct advk_pcie {
                u32 actions;
        } wins[OB_WIN_COUNT];
        u8 wins_count;
-       int irq;
        struct irq_domain *rp_irq_domain;
        struct irq_domain *irq_domain;
        struct irq_chip irq_chip;
@@ -1570,26 +1569,21 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
        }
 }
 
-static void advk_pcie_irq_handler(struct irq_desc *desc)
+static irqreturn_t advk_pcie_irq_handler(int irq, void *arg)
 {
-       struct advk_pcie *pcie = irq_desc_get_handler_data(desc);
-       struct irq_chip *chip = irq_desc_get_chip(desc);
-       u32 val, mask, status;
+       struct advk_pcie *pcie = arg;
+       u32 status;
 
-       chained_irq_enter(chip, desc);
+       status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
+       if (!(status & PCIE_IRQ_CORE_INT))
+               return IRQ_NONE;
 
-       val = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
-       mask = advk_readl(pcie, HOST_CTRL_INT_MASK_REG);
-       status = val & ((~mask) & PCIE_IRQ_ALL_MASK);
+       advk_pcie_handle_int(pcie);
 
-       if (status & PCIE_IRQ_CORE_INT) {
-               advk_pcie_handle_int(pcie);
+       /* Clear interrupt */
+       advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
 
-               /* Clear interrupt */
-               advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
-       }
-
-       chained_irq_exit(chip, desc);
+       return IRQ_HANDLED;
 }
 
 static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
@@ -1669,7 +1663,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
        struct advk_pcie *pcie;
        struct pci_host_bridge *bridge;
        struct resource_entry *entry;
-       int ret;
+       int ret, irq;
 
        bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie));
        if (!bridge)
@@ -1755,9 +1749,17 @@ static int advk_pcie_probe(struct platform_device *pdev)
        if (IS_ERR(pcie->base))
                return PTR_ERR(pcie->base);
 
-       pcie->irq = platform_get_irq(pdev, 0);
-       if (pcie->irq < 0)
-               return pcie->irq;
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return irq;
+
+       ret = devm_request_irq(dev, irq, advk_pcie_irq_handler,
+                              IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie",
+                              pcie);
+       if (ret) {
+               dev_err(dev, "Failed to register interrupt\n");
+               return ret;
+       }
 
        pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node,
                                                       "reset-gpios", 0,
@@ -1814,15 +1816,12 @@ static int advk_pcie_probe(struct platform_device *pdev)
                return ret;
        }
 
-       irq_set_chained_handler_and_data(pcie->irq, advk_pcie_irq_handler, pcie);
-
        bridge->sysdata = pcie;
        bridge->ops = &advk_pcie_ops;
        bridge->map_irq = advk_pcie_map_irq;
 
        ret = pci_host_probe(bridge);
        if (ret < 0) {
-               irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
                advk_pcie_remove_rp_irq_domain(pcie);
                advk_pcie_remove_msi_irq_domain(pcie);
                advk_pcie_remove_irq_domain(pcie);
@@ -1871,9 +1870,6 @@ static int advk_pcie_remove(struct platform_device *pdev)
        advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
        advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
 
-       /* Remove IRQ handler */
-       irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
-
        /* Remove IRQ domains */
        advk_pcie_remove_rp_irq_domain(pcie);
        advk_pcie_remove_msi_irq_domain(pcie);
index 1f15ab7eabf81dca8365f26144afbd5ff3ffa40c..3787876ecb245c69d13c20abdf613504c3c9c2e4 100644 (file)
@@ -1374,6 +1374,9 @@ void pci_acpi_setup(struct device *dev, struct acpi_device *adev)
 
        acpi_pci_wakeup(pci_dev, false);
        acpi_device_power_add_dependent(adev, dev);
+
+       if (pci_is_bridge(pci_dev))
+               acpi_dev_power_up_children_with_adr(adev);
 }
 
 void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev)
index 9ecce435fb3f125abb5ba52455d4a6d2f40fdcdf..d25122fbe98ab57ec472aeb61b2d8f90c28a20ec 100644 (file)
@@ -2920,6 +2920,16 @@ static const struct dmi_system_id bridge_d3_blacklist[] = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
                        DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
                },
+               /*
+                * Downstream device is not accessible after putting a root port
+                * into D3cold and back into D0 on Elo i2.
+                */
+               .ident = "Elo i2",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"),
+               },
        },
 #endif
        { }
index 3edc1565a27c5fdfd8fe1728ad83ade669ca8a11..689271c4245c2ef99529e62c8d435cdcbf36ad1a 100644 (file)
@@ -709,9 +709,8 @@ static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
        if (pdev == NULL)
                goto out;
 
-       pdev->sh_info =
-           (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
-       if (pdev->sh_info == NULL) {
+       if (xenbus_setup_ring(xdev, GFP_KERNEL, (void **)&pdev->sh_info, 1,
+                             &pdev->gnt_ref)) {
                kfree(pdev);
                pdev = NULL;
                goto out;
@@ -729,7 +728,6 @@ static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
        spin_lock_init(&pdev->sh_info_lock);
 
        pdev->evtchn = INVALID_EVTCHN;
-       pdev->gnt_ref = INVALID_GRANT_REF;
        pdev->irq = -1;
 
        INIT_WORK(&pdev->op_work, pcifront_do_aer);
@@ -754,11 +752,7 @@ static void free_pdev(struct pcifront_device *pdev)
        if (pdev->evtchn != INVALID_EVTCHN)
                xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
 
-       if (pdev->gnt_ref != INVALID_GRANT_REF)
-               gnttab_end_foreign_access(pdev->gnt_ref,
-                                         (unsigned long)pdev->sh_info);
-       else
-               free_page((unsigned long)pdev->sh_info);
+       xenbus_teardown_ring((void **)&pdev->sh_info, 1, &pdev->gnt_ref);
 
        dev_set_drvdata(&pdev->xdev->dev, NULL);
 
@@ -769,13 +763,6 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
 {
        int err = 0;
        struct xenbus_transaction trans;
-       grant_ref_t gref;
-
-       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &gref);
-       if (err < 0)
-               goto out;
-
-       pdev->gnt_ref = gref;
 
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
index 9c1d82be7a2feea0f9cecf92741446bf644cf35a..80d8309652a4dafcc8dac6c8bf3eff592b2d5254 100644 (file)
@@ -39,7 +39,7 @@
 #define CMN_CHILD_NODE_ADDR            GENMASK(27, 0)
 #define CMN_CHILD_NODE_EXTERNAL                BIT(31)
 
-#define CMN_MAX_DIMENSION              8
+#define CMN_MAX_DIMENSION              12
 #define CMN_MAX_XPS                    (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
 #define CMN_MAX_DTMS                   (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
 
 #define CMN_INFO_RSP_VC_NUM            GENMASK_ULL(53, 52)
 #define CMN_INFO_DAT_VC_NUM            GENMASK_ULL(51, 50)
 
+#define CMN_CFGM_INFO_GLOBAL_1         0x908
+#define CMN_INFO_SNP_VC_NUM            GENMASK_ULL(3, 2)
+#define CMN_INFO_REQ_VC_NUM            GENMASK_ULL(1, 0)
+
 /* XPs also have some local topology info which has uses too */
 #define CMN_MXP__CONNECT_INFO_P0       0x0008
 #define CMN_MXP__CONNECT_INFO_P1       0x0010
 #define CMN_MXP__CONNECT_INFO_P3       0x0030
 #define CMN_MXP__CONNECT_INFO_P4       0x0038
 #define CMN_MXP__CONNECT_INFO_P5       0x0040
+#define CMN__CONNECT_INFO_DEVICE_TYPE  GENMASK_ULL(4, 0)
 
 /* PMU registers occupy the 3rd 4KB page of each node's region */
 #define CMN_PMU_OFFSET                 0x2000
 
 /* For most nodes, this is all there is */
 #define CMN_PMU_EVENT_SEL              0x000
-#define CMN_PMU_EVENTn_ID_SHIFT(n)     ((n) * 8)
+#define CMN__PMU_CBUSY_SNTHROTTLE_SEL  GENMASK_ULL(44, 42)
+#define CMN__PMU_CLASS_OCCUP_ID                GENMASK_ULL(36, 35)
+/* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
+#define CMN__PMU_OCCUP1_ID             GENMASK_ULL(34, 32)
+
+/* HN-Ps are weird... */
+#define CMN_HNP_PMU_EVENT_SEL          0x008
 
 /* DTMs live in the PMU space of XP registers */
 #define CMN_DTM_WPn(n)                 (0x1A0 + (n) * 0x18)
 #define CMN_DTM_WPn_CONFIG(n)          (CMN_DTM_WPn(n) + 0x00)
-#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17)
+#define CMN_DTM_WPn_CONFIG_WP_CHN_NUM  GENMASK_ULL(20, 19)
+#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18, 17)
 #define CMN_DTM_WPn_CONFIG_WP_COMBINE  BIT(9)
 #define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE        BIT(8)
 #define CMN600_WPn_CONFIG_WP_COMBINE   BIT(6)
 
 /* Event attributes */
 #define CMN_CONFIG_TYPE                        GENMASK_ULL(15, 0)
-#define CMN_CONFIG_EVENTID             GENMASK_ULL(23, 16)
-#define CMN_CONFIG_OCCUPID             GENMASK_ULL(27, 24)
+#define CMN_CONFIG_EVENTID             GENMASK_ULL(26, 16)
+#define CMN_CONFIG_OCCUPID             GENMASK_ULL(30, 27)
 #define CMN_CONFIG_BYNODEID            BIT_ULL(31)
 #define CMN_CONFIG_NODEID              GENMASK_ULL(47, 32)
 
 
 
 enum cmn_model {
-       CMN_ANY = -1,
        CMN600 = 1,
-       CI700 = 2,
+       CMN650 = 2,
+       CMN700 = 4,
+       CI700 = 8,
+       /* ...and then we can use bitmap tricks for commonality */
+       CMN_ANY = -1,
+       NOT_CMN600 = -2,
+       CMN_650ON = CMN650 | CMN700,
 };
 
 /* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -191,6 +208,14 @@ enum cmn_revision {
        CMN600_R2P0,
        CMN600_R3P0,
        CMN600_R3P1,
+       CMN650_R0P0 = 0,
+       CMN650_R1P0,
+       CMN650_R1P1,
+       CMN650_R2P0,
+       CMN650_R1P2,
+       CMN700_R0P0 = 0,
+       CMN700_R1P0,
+       CMN700_R2P0,
        CI700_R0P0 = 0,
        CI700_R1P0,
        CI700_R2P0,
@@ -211,13 +236,26 @@ enum cmn_node_type {
        CMN_TYPE_RND = 0xd,
        CMN_TYPE_RNSAM = 0xf,
        CMN_TYPE_MTSX,
+       CMN_TYPE_HNP,
        CMN_TYPE_CXRA = 0x100,
-       CMN_TYPE_CXHA = 0x101,
-       CMN_TYPE_CXLA = 0x102,
+       CMN_TYPE_CXHA,
+       CMN_TYPE_CXLA,
+       CMN_TYPE_CCRA,
+       CMN_TYPE_CCHA,
+       CMN_TYPE_CCLA,
+       CMN_TYPE_CCLA_RNI,
        /* Not a real node type */
        CMN_TYPE_WP = 0x7770
 };
 
+enum cmn_filter_select {
+       SEL_NONE = -1,
+       SEL_OCCUP1ID,
+       SEL_CLASS_OCCUP_ID,
+       SEL_CBUSY_SNTHROTTLE_SEL,
+       SEL_MAX
+};
+
 struct arm_cmn_node {
        void __iomem *pmu_base;
        u16 id, logid;
@@ -227,15 +265,17 @@ struct arm_cmn_node {
        union {
                /* DN/HN-F/CXHA */
                struct {
-                       u8 occupid_val;
-                       u8 occupid_count;
-               };
+                       u8 val : 4;
+                       u8 count : 4;
+               } occupid[SEL_MAX];
                /* XP */
                u8 dtc;
        };
        union {
                u8 event[4];
                __le32 event_sel;
+               u16 event_w[4];
+               __le64 event_sel_w;
        };
 };
 
@@ -278,6 +318,8 @@ struct arm_cmn {
        struct {
                unsigned int rsp_vc_num : 2;
                unsigned int dat_vc_num : 2;
+               unsigned int snp_vc_num : 2;
+               unsigned int req_vc_num : 2;
        };
 
        struct arm_cmn_node *xps;
@@ -307,9 +349,7 @@ struct arm_cmn_nodeid {
 
 static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
 {
-       int dim = max(cmn->mesh_x, cmn->mesh_y);
-
-       return dim > 4 ? 3 : 2;
+       return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
 }
 
 static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
@@ -361,7 +401,8 @@ static struct dentry *arm_cmn_debugfs;
 #ifdef CONFIG_DEBUG_FS
 static const char *arm_cmn_device_type(u8 type)
 {
-       switch(type) {
+       switch(FIELD_GET(CMN__CONNECT_INFO_DEVICE_TYPE, type)) {
+               case 0x00: return "        |";
                case 0x01: return "  RN-I  |";
                case 0x02: return "  RN-D  |";
                case 0x04: return " RN-F_B |";
@@ -371,6 +412,7 @@ static const char *arm_cmn_device_type(u8 type)
                case 0x08: return "  HN-T  |";
                case 0x09: return "  HN-I  |";
                case 0x0a: return "  HN-D  |";
+               case 0x0b: return "  HN-P  |";
                case 0x0c: return "  SN-F  |";
                case 0x0d: return "  SBSX  |";
                case 0x0e: return "  HN-F  |";
@@ -383,8 +425,12 @@ static const char *arm_cmn_device_type(u8 type)
                case 0x15: return "RN-F_D_E|";
                case 0x16: return " RN-F_C |";
                case 0x17: return "RN-F_C_E|";
+               case 0x18: return " RN-F_E |";
+               case 0x19: return "RN-F_E_E|";
                case 0x1c: return "  MTSX  |";
-               default:   return "        |";
+               case 0x1d: return "  HN-V  |";
+               case 0x1e: return "  CCG   |";
+               default:   return "  ????  |";
        }
 }
 
@@ -492,11 +538,13 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
 
 struct arm_cmn_hw_event {
        struct arm_cmn_node *dn;
-       u64 dtm_idx[2];
+       u64 dtm_idx[4];
        unsigned int dtc_idx;
        u8 dtcs_used;
        u8 num_dns;
        u8 dtm_offset;
+       bool wide_sel;
+       enum cmn_filter_select filter_sel;
 };
 
 #define for_each_hw_dn(hw, dn, i) \
@@ -522,7 +570,8 @@ struct arm_cmn_event_attr {
        struct device_attribute attr;
        enum cmn_model model;
        enum cmn_node_type type;
-       u8 eventid;
+       enum cmn_filter_select fsel;
+       u16 eventid;
        u8 occupid;
 };
 
@@ -532,23 +581,17 @@ struct arm_cmn_format_attr {
        int config;
 };
 
-#define CMN_EVENT_ATTR(_model, _name, _type, _eventid, _occupid)       \
+#define _CMN_EVENT_ATTR(_model, _name, _type, _eventid, _occupid, _fsel)\
        (&((struct arm_cmn_event_attr[]) {{                             \
                .attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL),  \
                .model = _model,                                        \
                .type = _type,                                          \
                .eventid = _eventid,                                    \
                .occupid = _occupid,                                    \
+               .fsel = _fsel,                                          \
        }})[0].attr.attr)
-
-static bool arm_cmn_is_occup_event(enum cmn_model model,
-                                  enum cmn_node_type type, unsigned int id)
-{
-       if (type == CMN_TYPE_DVM)
-               return (model == CMN600 && id == 0x05) ||
-                      (model == CI700 && id == 0x0c);
-       return type == CMN_TYPE_HNF && id == 0x0f;
-}
+#define CMN_EVENT_ATTR(_model, _name, _type, _eventid)                 \
+       _CMN_EVENT_ATTR(_model, _name, _type, _eventid, 0, SEL_NONE)
 
 static ssize_t arm_cmn_event_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
@@ -565,7 +608,7 @@ static ssize_t arm_cmn_event_show(struct device *dev,
                                  "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n",
                                  eattr->type, eattr->eventid);
 
-       if (arm_cmn_is_occup_event(eattr->model, eattr->type, eattr->eventid))
+       if (eattr->fsel > SEL_NONE)
                return sysfs_emit(buf, "type=0x%x,eventid=0x%x,occupid=0x%x\n",
                                  eattr->type, eattr->eventid, eattr->occupid);
 
@@ -580,20 +623,25 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
        struct device *dev = kobj_to_dev(kobj);
        struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
        struct arm_cmn_event_attr *eattr;
+       enum cmn_node_type type;
+       u16 eventid;
 
        eattr = container_of(attr, typeof(*eattr), attr.attr);
 
        if (!(eattr->model & cmn->model))
                return 0;
 
+       type = eattr->type;
+       eventid = eattr->eventid;
+
        /* Watchpoints aren't nodes, so avoid confusion */
-       if (eattr->type == CMN_TYPE_WP)
+       if (type == CMN_TYPE_WP)
                return attr->mode;
 
        /* Hide XP events for unused interfaces/channels */
-       if (eattr->type == CMN_TYPE_XP) {
-               unsigned int intf = (eattr->eventid >> 2) & 7;
-               unsigned int chan = eattr->eventid >> 5;
+       if (type == CMN_TYPE_XP) {
+               unsigned int intf = (eventid >> 2) & 7;
+               unsigned int chan = eventid >> 5;
 
                if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
                        return 0;
@@ -602,43 +650,107 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
                        return 0;
 
                if ((chan == 5 && cmn->rsp_vc_num < 2) ||
-                   (chan == 6 && cmn->dat_vc_num < 2))
+                   (chan == 6 && cmn->dat_vc_num < 2) ||
+                   (chan == 7 && cmn->snp_vc_num < 2) ||
+                   (chan == 8 && cmn->req_vc_num < 2))
                        return 0;
        }
 
        /* Revision-specific differences */
-       if (cmn->model == CMN600 && cmn->rev < CMN600_R1P2) {
-               if (eattr->type == CMN_TYPE_HNF && eattr->eventid == 0x1b)
-                       return 0;
+       if (cmn->model == CMN600) {
+               if (cmn->rev < CMN600_R1P3) {
+                       if (type == CMN_TYPE_CXRA && eventid > 0x10)
+                               return 0;
+               }
+               if (cmn->rev < CMN600_R1P2) {
+                       if (type == CMN_TYPE_HNF && eventid == 0x1b)
+                               return 0;
+                       if (type == CMN_TYPE_CXRA || type == CMN_TYPE_CXHA)
+                               return 0;
+               }
+       } else if (cmn->model == CMN650) {
+               if (cmn->rev < CMN650_R2P0 || cmn->rev == CMN650_R1P2) {
+                       if (type == CMN_TYPE_HNF && eventid > 0x22)
+                               return 0;
+                       if (type == CMN_TYPE_SBSX && eventid == 0x17)
+                               return 0;
+                       if (type == CMN_TYPE_RNI && eventid > 0x10)
+                               return 0;
+               }
+       } else if (cmn->model == CMN700) {
+               if (cmn->rev < CMN700_R2P0) {
+                       if (type == CMN_TYPE_HNF && eventid > 0x2c)
+                               return 0;
+                       if (type == CMN_TYPE_CCHA && eventid > 0x74)
+                               return 0;
+                       if (type == CMN_TYPE_CCLA && eventid > 0x27)
+                               return 0;
+               }
+               if (cmn->rev < CMN700_R1P0) {
+                       if (type == CMN_TYPE_HNF && eventid > 0x2b)
+                               return 0;
+               }
        }
 
-       if (!arm_cmn_node(cmn, eattr->type))
+       if (!arm_cmn_node(cmn, type))
                return 0;
 
        return attr->mode;
 }
 
-#define _CMN_EVENT_DVM(_model, _name, _event, _occup)          \
-       CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup)
+#define _CMN_EVENT_DVM(_model, _name, _event, _occup, _fsel)   \
+       _CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel)
 #define CMN_EVENT_DTC(_name)                                   \
-       CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0, 0)
-#define _CMN_EVENT_HNF(_model, _name, _event, _occup)          \
-       CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup)
+       CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0)
+#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel)           \
+       _CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel)
 #define CMN_EVENT_HNI(_name, _event)                           \
-       CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event, 0)
+       CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event)
+#define CMN_EVENT_HNP(_name, _event)                           \
+       CMN_EVENT_ATTR(CMN_ANY, hnp_##_name, CMN_TYPE_HNP, _event)
 #define __CMN_EVENT_XP(_name, _event)                          \
-       CMN_EVENT_ATTR(CMN_ANY, mxp_##_name, CMN_TYPE_XP, _event, 0)
+       CMN_EVENT_ATTR(CMN_ANY, mxp_##_name, CMN_TYPE_XP, _event)
 #define CMN_EVENT_SBSX(_model, _name, _event)                  \
-       CMN_EVENT_ATTR(_model, sbsx_##_name, CMN_TYPE_SBSX, _event, 0)
+       CMN_EVENT_ATTR(_model, sbsx_##_name, CMN_TYPE_SBSX, _event)
 #define CMN_EVENT_RNID(_model, _name, _event)                  \
-       CMN_EVENT_ATTR(_model, rnid_##_name, CMN_TYPE_RNI, _event, 0)
+       CMN_EVENT_ATTR(_model, rnid_##_name, CMN_TYPE_RNI, _event)
 #define CMN_EVENT_MTSX(_name, _event)                          \
-       CMN_EVENT_ATTR(CMN_ANY, mtsx_##_name, CMN_TYPE_MTSX, _event, 0)
+       CMN_EVENT_ATTR(CMN_ANY, mtsx_##_name, CMN_TYPE_MTSX, _event)
+#define CMN_EVENT_CXRA(_model, _name, _event)                          \
+       CMN_EVENT_ATTR(_model, cxra_##_name, CMN_TYPE_CXRA, _event)
+#define CMN_EVENT_CXHA(_name, _event)                          \
+       CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
+#define CMN_EVENT_CCRA(_name, _event)                          \
+       CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
+#define CMN_EVENT_CCHA(_name, _event)                          \
+       CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
+#define CMN_EVENT_CCLA(_name, _event)                          \
+       CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
+#define CMN_EVENT_CCLA_RNI(_name, _event)                              \
+       CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
 
 #define CMN_EVENT_DVM(_model, _name, _event)                   \
-       _CMN_EVENT_DVM(_model, _name, _event, 0)
+       _CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE)
+#define CMN_EVENT_DVM_OCC(_model, _name, _event)                       \
+       _CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID),   \
+       _CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID), \
+       _CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID)
 #define CMN_EVENT_HNF(_model, _name, _event)                   \
-       _CMN_EVENT_HNF(_model, _name, _event, 0)
+       _CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE)
+#define CMN_EVENT_HNF_CLS(_model, _name, _event)                       \
+       _CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \
+       _CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \
+       _CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \
+       _CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID)
+#define CMN_EVENT_HNF_SNT(_model, _name, _event)                       \
+       _CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+
 #define _CMN_EVENT_XP(_name, _event)                           \
        __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),         \
        __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)),         \
@@ -657,7 +769,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
        _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)),        \
        _CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)),        \
        _CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)),       \
-       _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5))
+       _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)),       \
+       _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)),       \
+       _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5))
 
 
 static struct attribute *arm_cmn_event_attrs[] = {
@@ -672,23 +786,27 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_DVM(CMN600, rxreq_dvmsync,            0x02),
        CMN_EVENT_DVM(CMN600, rxreq_dvmop_vmid_filtered, 0x03),
        CMN_EVENT_DVM(CMN600, rxreq_retried,            0x04),
-       _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_all, 0x05, 0),
-       _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmop, 0x05, 1),
-       _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmsync, 0x05, 2),
-       CMN_EVENT_DVM(CI700, dvmop_tlbi,                0x01),
-       CMN_EVENT_DVM(CI700, dvmop_bpi,                 0x02),
-       CMN_EVENT_DVM(CI700, dvmop_pici,                0x03),
-       CMN_EVENT_DVM(CI700, dvmop_vici,                0x04),
-       CMN_EVENT_DVM(CI700, dvmsync,                   0x05),
-       CMN_EVENT_DVM(CI700, vmid_filtered,             0x06),
-       CMN_EVENT_DVM(CI700, rndop_filtered,            0x07),
-       CMN_EVENT_DVM(CI700, retry,                     0x08),
-       CMN_EVENT_DVM(CI700, txsnp_flitv,               0x09),
-       CMN_EVENT_DVM(CI700, txsnp_stall,               0x0a),
-       CMN_EVENT_DVM(CI700, trkfull,                   0x0b),
-       _CMN_EVENT_DVM(CI700, trk_occupancy_all,        0x0c, 0),
-       _CMN_EVENT_DVM(CI700, trk_occupancy_dvmop,      0x0c, 1),
-       _CMN_EVENT_DVM(CI700, trk_occupancy_dvmsync,    0x0c, 2),
+       CMN_EVENT_DVM_OCC(CMN600, rxreq_trk_occupancy,  0x05),
+       CMN_EVENT_DVM(NOT_CMN600, dvmop_tlbi,           0x01),
+       CMN_EVENT_DVM(NOT_CMN600, dvmop_bpi,            0x02),
+       CMN_EVENT_DVM(NOT_CMN600, dvmop_pici,           0x03),
+       CMN_EVENT_DVM(NOT_CMN600, dvmop_vici,           0x04),
+       CMN_EVENT_DVM(NOT_CMN600, dvmsync,              0x05),
+       CMN_EVENT_DVM(NOT_CMN600, vmid_filtered,        0x06),
+       CMN_EVENT_DVM(NOT_CMN600, rndop_filtered,       0x07),
+       CMN_EVENT_DVM(NOT_CMN600, retry,                0x08),
+       CMN_EVENT_DVM(NOT_CMN600, txsnp_flitv,          0x09),
+       CMN_EVENT_DVM(NOT_CMN600, txsnp_stall,          0x0a),
+       CMN_EVENT_DVM(NOT_CMN600, trkfull,              0x0b),
+       CMN_EVENT_DVM_OCC(NOT_CMN600, trk_occupancy,    0x0c),
+       CMN_EVENT_DVM_OCC(CMN700, trk_occupancy_cxha,   0x0d),
+       CMN_EVENT_DVM_OCC(CMN700, trk_occupancy_pdn,    0x0e),
+       CMN_EVENT_DVM(CMN700, trk_alloc,                0x0f),
+       CMN_EVENT_DVM(CMN700, trk_cxha_alloc,           0x10),
+       CMN_EVENT_DVM(CMN700, trk_pdn_alloc,            0x11),
+       CMN_EVENT_DVM(CMN700, txsnp_stall_limit,        0x12),
+       CMN_EVENT_DVM(CMN700, rxsnp_stall_starv,        0x13),
+       CMN_EVENT_DVM(CMN700, txsnp_sync_stall_op,      0x14),
 
        CMN_EVENT_HNF(CMN_ANY, cache_miss,              0x01),
        CMN_EVENT_HNF(CMN_ANY, slc_sf_cache_access,     0x02),
@@ -704,11 +822,11 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_HNF(CMN_ANY, mc_retries,              0x0c),
        CMN_EVENT_HNF(CMN_ANY, mc_reqs,                 0x0d),
        CMN_EVENT_HNF(CMN_ANY, qos_hh_retry,            0x0e),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4),
+       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0, SEL_OCCUP1ID),
+       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID),
+       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID),
+       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID),
+       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID),
        CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz,            0x10),
        CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz,     0x11),
        CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full,      0x12),
@@ -725,9 +843,22 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_HNF(CMN_ANY, stash_snp_sent,          0x1d),
        CMN_EVENT_HNF(CMN_ANY, stash_data_pull,         0x1e),
        CMN_EVENT_HNF(CMN_ANY, snp_fwded,               0x1f),
-       CMN_EVENT_HNF(CI700, atomic_fwd,                0x20),
-       CMN_EVENT_HNF(CI700, mpam_hardlim,              0x21),
-       CMN_EVENT_HNF(CI700, mpam_softlim,              0x22),
+       CMN_EVENT_HNF(NOT_CMN600, atomic_fwd,           0x20),
+       CMN_EVENT_HNF(NOT_CMN600, mpam_hardlim,         0x21),
+       CMN_EVENT_HNF(NOT_CMN600, mpam_softlim,         0x22),
+       CMN_EVENT_HNF(CMN_650ON, snp_sent_cluster,      0x23),
+       CMN_EVENT_HNF(CMN_650ON, sf_imprecise_evict,    0x24),
+       CMN_EVENT_HNF(CMN_650ON, sf_evict_shared_line,  0x25),
+       CMN_EVENT_HNF_CLS(CMN700, pocq_class_occup,     0x26),
+       CMN_EVENT_HNF_CLS(CMN700, pocq_class_retry,     0x27),
+       CMN_EVENT_HNF_CLS(CMN700, class_mc_reqs,        0x28),
+       CMN_EVENT_HNF_CLS(CMN700, class_cgnt_cmin,      0x29),
+       CMN_EVENT_HNF_SNT(CMN700, sn_throttle,          0x2a),
+       CMN_EVENT_HNF_SNT(CMN700, sn_throttle_min,      0x2b),
+       CMN_EVENT_HNF(CMN700, sf_precise_to_imprecise,  0x2c),
+       CMN_EVENT_HNF(CMN700, snp_intv_cln,             0x2d),
+       CMN_EVENT_HNF(CMN700, nc_excl,                  0x2e),
+       CMN_EVENT_HNF(CMN700, excl_mon_ovfl,            0x2f),
 
        CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl,              0x20),
        CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl,              0x21),
@@ -749,12 +880,33 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_HNI(nonpcie_serialization,            0x31),
        CMN_EVENT_HNI(pcie_serialization,               0x32),
 
+       /*
+        * HN-P events squat on top of the HN-I similarly to DVM events, except
+        * for being crammed into the same physical node as well. And of course
+        * where would the fun be if the same events were in the same order...
+        */
+       CMN_EVENT_HNP(rrt_wr_occ_cnt_ovfl,              0x01),
+       CMN_EVENT_HNP(rdt_wr_occ_cnt_ovfl,              0x02),
+       CMN_EVENT_HNP(wdb_occ_cnt_ovfl,                 0x03),
+       CMN_EVENT_HNP(rrt_wr_alloc,                     0x04),
+       CMN_EVENT_HNP(rdt_wr_alloc,                     0x05),
+       CMN_EVENT_HNP(wdb_alloc,                        0x06),
+       CMN_EVENT_HNP(awvalid_no_awready,               0x07),
+       CMN_EVENT_HNP(awready_no_awvalid,               0x08),
+       CMN_EVENT_HNP(wvalid_no_wready,                 0x09),
+       CMN_EVENT_HNP(rrt_rd_occ_cnt_ovfl,              0x11),
+       CMN_EVENT_HNP(rdt_rd_occ_cnt_ovfl,              0x12),
+       CMN_EVENT_HNP(rrt_rd_alloc,                     0x13),
+       CMN_EVENT_HNP(rdt_rd_alloc,                     0x14),
+       CMN_EVENT_HNP(arvalid_no_arready,               0x15),
+       CMN_EVENT_HNP(arready_no_arvalid,               0x16),
+
        CMN_EVENT_XP(txflit_valid,                      0x01),
        CMN_EVENT_XP(txflit_stall,                      0x02),
        CMN_EVENT_XP(partial_dat_flit,                  0x03),
        /* We treat watchpoints as a special made-up class of XP events */
-       CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP, 0),
-       CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN, 0),
+       CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP),
+       CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN),
 
        CMN_EVENT_SBSX(CMN_ANY, rd_req,                 0x01),
        CMN_EVENT_SBSX(CMN_ANY, wr_req,                 0x02),
@@ -768,7 +920,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_SBSX(CMN_ANY, wdb_occ_cnt_ovfl,       0x14),
        CMN_EVENT_SBSX(CMN_ANY, rd_axi_trkr_occ_cnt_ovfl, 0x15),
        CMN_EVENT_SBSX(CMN_ANY, cmo_axi_trkr_occ_cnt_ovfl, 0x16),
-       CMN_EVENT_SBSX(CI700, rdb_occ_cnt_ovfl,         0x17),
+       CMN_EVENT_SBSX(NOT_CMN600, rdb_occ_cnt_ovfl,    0x17),
        CMN_EVENT_SBSX(CMN_ANY, arvalid_no_arready,     0x21),
        CMN_EVENT_SBSX(CMN_ANY, awvalid_no_awready,     0x22),
        CMN_EVENT_SBSX(CMN_ANY, wvalid_no_wready,       0x23),
@@ -795,12 +947,25 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_RNID(CMN600, rdb_replay,              0x12),
        CMN_EVENT_RNID(CMN600, rdb_hybrid,              0x13),
        CMN_EVENT_RNID(CMN600, rdb_ord,                 0x14),
-       CMN_EVENT_RNID(CI700, padb_occ_ovfl,            0x11),
-       CMN_EVENT_RNID(CI700, rpdb_occ_ovfl,            0x12),
-       CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice1,    0x13),
-       CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice2,    0x14),
-       CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice3,    0x15),
-       CMN_EVENT_RNID(CI700, wrt_throttled,            0x16),
+       CMN_EVENT_RNID(NOT_CMN600, padb_occ_ovfl,       0x11),
+       CMN_EVENT_RNID(NOT_CMN600, rpdb_occ_ovfl,       0x12),
+       CMN_EVENT_RNID(NOT_CMN600, rrt_occup_ovfl_slice1, 0x13),
+       CMN_EVENT_RNID(NOT_CMN600, rrt_occup_ovfl_slice2, 0x14),
+       CMN_EVENT_RNID(NOT_CMN600, rrt_occup_ovfl_slice3, 0x15),
+       CMN_EVENT_RNID(NOT_CMN600, wrt_throttled,       0x16),
+       CMN_EVENT_RNID(CMN700, ldb_full,                0x17),
+       CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice0, 0x18),
+       CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice1, 0x19),
+       CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice2, 0x1a),
+       CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice3, 0x1b),
+       CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice0, 0x1c),
+       CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice1, 0x1d),
+       CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice2, 0x1e),
+       CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice3, 0x1f),
+       CMN_EVENT_RNID(CMN700, rrt_burst_alloc,         0x20),
+       CMN_EVENT_RNID(CMN700, awid_hash,               0x21),
+       CMN_EVENT_RNID(CMN700, atomic_alloc,            0x22),
+       CMN_EVENT_RNID(CMN700, atomic_occ_ovfl,         0x23),
 
        CMN_EVENT_MTSX(tc_lookup,                       0x01),
        CMN_EVENT_MTSX(tc_fill,                         0x02),
@@ -815,6 +980,118 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_MTSX(tcq_occ_cnt_ovfl,                0x0b),
        CMN_EVENT_MTSX(tdb_occ_cnt_ovfl,                0x0c),
 
+       CMN_EVENT_CXRA(CMN_ANY, rht_occ,                0x01),
+       CMN_EVENT_CXRA(CMN_ANY, sht_occ,                0x02),
+       CMN_EVENT_CXRA(CMN_ANY, rdb_occ,                0x03),
+       CMN_EVENT_CXRA(CMN_ANY, wdb_occ,                0x04),
+       CMN_EVENT_CXRA(CMN_ANY, ssb_occ,                0x05),
+       CMN_EVENT_CXRA(CMN_ANY, snp_bcasts,             0x06),
+       CMN_EVENT_CXRA(CMN_ANY, req_chains,             0x07),
+       CMN_EVENT_CXRA(CMN_ANY, req_chain_avglen,       0x08),
+       CMN_EVENT_CXRA(CMN_ANY, chirsp_stalls,          0x09),
+       CMN_EVENT_CXRA(CMN_ANY, chidat_stalls,          0x0a),
+       CMN_EVENT_CXRA(CMN_ANY, cxreq_pcrd_stalls_link0, 0x0b),
+       CMN_EVENT_CXRA(CMN_ANY, cxreq_pcrd_stalls_link1, 0x0c),
+       CMN_EVENT_CXRA(CMN_ANY, cxreq_pcrd_stalls_link2, 0x0d),
+       CMN_EVENT_CXRA(CMN_ANY, cxdat_pcrd_stalls_link0, 0x0e),
+       CMN_EVENT_CXRA(CMN_ANY, cxdat_pcrd_stalls_link1, 0x0f),
+       CMN_EVENT_CXRA(CMN_ANY, cxdat_pcrd_stalls_link2, 0x10),
+       CMN_EVENT_CXRA(CMN_ANY, external_chirsp_stalls, 0x11),
+       CMN_EVENT_CXRA(CMN_ANY, external_chidat_stalls, 0x12),
+       CMN_EVENT_CXRA(NOT_CMN600, cxmisc_pcrd_stalls_link0, 0x13),
+       CMN_EVENT_CXRA(NOT_CMN600, cxmisc_pcrd_stalls_link1, 0x14),
+       CMN_EVENT_CXRA(NOT_CMN600, cxmisc_pcrd_stalls_link2, 0x15),
+
+       CMN_EVENT_CXHA(rddatbyp,                        0x21),
+       CMN_EVENT_CXHA(chirsp_up_stall,                 0x22),
+       CMN_EVENT_CXHA(chidat_up_stall,                 0x23),
+       CMN_EVENT_CXHA(snppcrd_link0_stall,             0x24),
+       CMN_EVENT_CXHA(snppcrd_link1_stall,             0x25),
+       CMN_EVENT_CXHA(snppcrd_link2_stall,             0x26),
+       CMN_EVENT_CXHA(reqtrk_occ,                      0x27),
+       CMN_EVENT_CXHA(rdb_occ,                         0x28),
+       CMN_EVENT_CXHA(rdbyp_occ,                       0x29),
+       CMN_EVENT_CXHA(wdb_occ,                         0x2a),
+       CMN_EVENT_CXHA(snptrk_occ,                      0x2b),
+       CMN_EVENT_CXHA(sdb_occ,                         0x2c),
+       CMN_EVENT_CXHA(snphaz_occ,                      0x2d),
+
+       CMN_EVENT_CCRA(rht_occ,                         0x41),
+       CMN_EVENT_CCRA(sht_occ,                         0x42),
+       CMN_EVENT_CCRA(rdb_occ,                         0x43),
+       CMN_EVENT_CCRA(wdb_occ,                         0x44),
+       CMN_EVENT_CCRA(ssb_occ,                         0x45),
+       CMN_EVENT_CCRA(snp_bcasts,                      0x46),
+       CMN_EVENT_CCRA(req_chains,                      0x47),
+       CMN_EVENT_CCRA(req_chain_avglen,                0x48),
+       CMN_EVENT_CCRA(chirsp_stalls,                   0x49),
+       CMN_EVENT_CCRA(chidat_stalls,                   0x4a),
+       CMN_EVENT_CCRA(cxreq_pcrd_stalls_link0,         0x4b),
+       CMN_EVENT_CCRA(cxreq_pcrd_stalls_link1,         0x4c),
+       CMN_EVENT_CCRA(cxreq_pcrd_stalls_link2,         0x4d),
+       CMN_EVENT_CCRA(cxdat_pcrd_stalls_link0,         0x4e),
+       CMN_EVENT_CCRA(cxdat_pcrd_stalls_link1,         0x4f),
+       CMN_EVENT_CCRA(cxdat_pcrd_stalls_link2,         0x50),
+       CMN_EVENT_CCRA(external_chirsp_stalls,          0x51),
+       CMN_EVENT_CCRA(external_chidat_stalls,          0x52),
+       CMN_EVENT_CCRA(cxmisc_pcrd_stalls_link0,        0x53),
+       CMN_EVENT_CCRA(cxmisc_pcrd_stalls_link1,        0x54),
+       CMN_EVENT_CCRA(cxmisc_pcrd_stalls_link2,        0x55),
+       CMN_EVENT_CCRA(rht_alloc,                       0x56),
+       CMN_EVENT_CCRA(sht_alloc,                       0x57),
+       CMN_EVENT_CCRA(rdb_alloc,                       0x58),
+       CMN_EVENT_CCRA(wdb_alloc,                       0x59),
+       CMN_EVENT_CCRA(ssb_alloc,                       0x5a),
+
+       CMN_EVENT_CCHA(rddatbyp,                        0x61),
+       CMN_EVENT_CCHA(chirsp_up_stall,                 0x62),
+       CMN_EVENT_CCHA(chidat_up_stall,                 0x63),
+       CMN_EVENT_CCHA(snppcrd_link0_stall,             0x64),
+       CMN_EVENT_CCHA(snppcrd_link1_stall,             0x65),
+       CMN_EVENT_CCHA(snppcrd_link2_stall,             0x66),
+       CMN_EVENT_CCHA(reqtrk_occ,                      0x67),
+       CMN_EVENT_CCHA(rdb_occ,                         0x68),
+       CMN_EVENT_CCHA(rdbyp_occ,                       0x69),
+       CMN_EVENT_CCHA(wdb_occ,                         0x6a),
+       CMN_EVENT_CCHA(snptrk_occ,                      0x6b),
+       CMN_EVENT_CCHA(sdb_occ,                         0x6c),
+       CMN_EVENT_CCHA(snphaz_occ,                      0x6d),
+       CMN_EVENT_CCHA(reqtrk_alloc,                    0x6e),
+       CMN_EVENT_CCHA(rdb_alloc,                       0x6f),
+       CMN_EVENT_CCHA(rdbyp_alloc,                     0x70),
+       CMN_EVENT_CCHA(wdb_alloc,                       0x71),
+       CMN_EVENT_CCHA(snptrk_alloc,                    0x72),
+       CMN_EVENT_CCHA(sdb_alloc,                       0x73),
+       CMN_EVENT_CCHA(snphaz_alloc,                    0x74),
+       CMN_EVENT_CCHA(pb_rhu_req_occ,                  0x75),
+       CMN_EVENT_CCHA(pb_rhu_req_alloc,                0x76),
+       CMN_EVENT_CCHA(pb_rhu_pcie_req_occ,             0x77),
+       CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc,           0x78),
+       CMN_EVENT_CCHA(pb_pcie_wr_req_occ,              0x79),
+       CMN_EVENT_CCHA(pb_pcie_wr_req_alloc,            0x7a),
+       CMN_EVENT_CCHA(pb_pcie_reg_req_occ,             0x7b),
+       CMN_EVENT_CCHA(pb_pcie_reg_req_alloc,           0x7c),
+       CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ,            0x7d),
+       CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc,          0x7e),
+       CMN_EVENT_CCHA(pb_rhu_dat_occ,                  0x7f),
+       CMN_EVENT_CCHA(pb_rhu_dat_alloc,                0x80),
+       CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ,             0x81),
+       CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc,           0x82),
+       CMN_EVENT_CCHA(pb_pcie_wr_dat_occ,              0x83),
+       CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc,            0x84),
+
+       CMN_EVENT_CCLA(rx_cxs,                          0x21),
+       CMN_EVENT_CCLA(tx_cxs,                          0x22),
+       CMN_EVENT_CCLA(rx_cxs_avg_size,                 0x23),
+       CMN_EVENT_CCLA(tx_cxs_avg_size,                 0x24),
+       CMN_EVENT_CCLA(tx_cxs_lcrd_backpressure,        0x25),
+       CMN_EVENT_CCLA(link_crdbuf_occ,                 0x26),
+       CMN_EVENT_CCLA(link_crdbuf_alloc,               0x27),
+       CMN_EVENT_CCLA(pfwd_rcvr_cxs,                   0x28),
+       CMN_EVENT_CCLA(pfwd_sndr_num_flits,             0x29),
+       CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd,     0x2a),
+       CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd,    0x2b),
+
        NULL
 };
 
@@ -1032,6 +1309,42 @@ static void arm_cmn_event_read(struct perf_event *event)
        local64_add(delta, &event->count);
 }
 
+static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn,
+                                   enum cmn_filter_select fsel, u8 occupid)
+{
+       u64 reg;
+
+       if (fsel == SEL_NONE)
+               return 0;
+
+       if (!dn->occupid[fsel].count) {
+               dn->occupid[fsel].val = occupid;
+               reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL,
+                                dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) |
+                     FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID,
+                                dn->occupid[SEL_CLASS_OCCUP_ID].val) |
+                     FIELD_PREP(CMN__PMU_OCCUP1_ID,
+                                dn->occupid[SEL_OCCUP1ID].val);
+               writel_relaxed(reg >> 32, dn->pmu_base + CMN_PMU_EVENT_SEL + 4);
+       } else if (dn->occupid[fsel].val != occupid) {
+               return -EBUSY;
+       }
+       dn->occupid[fsel].count++;
+       return 0;
+}
+
+static void arm_cmn_set_event_sel_lo(struct arm_cmn_node *dn, int dtm_idx,
+                                    int eventid, bool wide_sel)
+{
+       if (wide_sel) {
+               dn->event_w[dtm_idx] = eventid;
+               writeq_relaxed(le64_to_cpu(dn->event_sel_w), dn->pmu_base + CMN_PMU_EVENT_SEL);
+       } else {
+               dn->event[dtm_idx] = eventid;
+               writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL);
+       }
+}
+
 static void arm_cmn_event_start(struct perf_event *event, int flags)
 {
        struct arm_cmn *cmn = to_cmn(event->pmu);
@@ -1058,8 +1371,8 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
        } else for_each_hw_dn(hw, dn, i) {
                int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
 
-               dn->event[dtm_idx] = CMN_EVENT_EVENTID(event);
-               writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL);
+               arm_cmn_set_event_sel_lo(dn, dtm_idx, CMN_EVENT_EVENTID(event),
+                                        hw->wide_sel);
        }
 }
 
@@ -1086,8 +1399,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
        } else for_each_hw_dn(hw, dn, i) {
                int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
 
-               dn->event[dtm_idx] = 0;
-               writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL);
+               arm_cmn_set_event_sel_lo(dn, dtm_idx, 0, hw->wide_sel);
        }
 
        arm_cmn_event_read(event);
@@ -1095,7 +1407,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
 
 struct arm_cmn_val {
        u8 dtm_count[CMN_MAX_DTMS];
-       u8 occupid[CMN_MAX_DTMS];
+       u8 occupid[CMN_MAX_DTMS][SEL_MAX];
        u8 wp[CMN_MAX_DTMS][4];
        int dtc_count;
        bool cycles;
@@ -1108,7 +1420,6 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
        struct arm_cmn_node *dn;
        enum cmn_node_type type;
        int i;
-       u8 occupid;
 
        if (is_software_event(event))
                return;
@@ -1120,16 +1431,14 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
        }
 
        val->dtc_count++;
-       if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event)))
-               occupid = CMN_EVENT_OCCUPID(event) + 1;
-       else
-               occupid = 0;
 
        for_each_hw_dn(hw, dn, i) {
-               int wp_idx, dtm = dn->dtm;
+               int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
 
                val->dtm_count[dtm]++;
-               val->occupid[dtm] = occupid;
+
+               if (sel > SEL_NONE)
+                       val->occupid[dtm][sel] = CMN_EVENT_OCCUPID(event) + 1;
 
                if (type != CMN_TYPE_WP)
                        continue;
@@ -1147,7 +1456,6 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
        enum cmn_node_type type;
        struct arm_cmn_val *val;
        int i, ret = -EINVAL;
-       u8 occupid;
 
        if (leader == event)
                return 0;
@@ -1172,18 +1480,14 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
        if (val->dtc_count == CMN_DT_NUM_COUNTERS)
                goto done;
 
-       if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event)))
-               occupid = CMN_EVENT_OCCUPID(event) + 1;
-       else
-               occupid = 0;
-
        for_each_hw_dn(hw, dn, i) {
-               int wp_idx, wp_cmb, dtm = dn->dtm;
+               int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
 
                if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS)
                        goto done;
 
-               if (occupid && val->occupid[dtm] && occupid != val->occupid[dtm])
+               if (sel > SEL_NONE && val->occupid[dtm][sel] &&
+                   val->occupid[dtm][sel] != CMN_EVENT_OCCUPID(event) + 1)
                        goto done;
 
                if (type != CMN_TYPE_WP)
@@ -1204,6 +1508,22 @@ done:
        return ret;
 }
 
+static enum cmn_filter_select arm_cmn_filter_sel(enum cmn_model model,
+                                                enum cmn_node_type type,
+                                                unsigned int eventid)
+{
+       struct arm_cmn_event_attr *e;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) {
+               e = container_of(arm_cmn_event_attrs[i], typeof(*e), attr.attr);
+               if (e->model & model && e->type == type && e->eventid == eventid)
+                       return e->fsel;
+       }
+       return SEL_NONE;
+}
+
+
 static int arm_cmn_event_init(struct perf_event *event)
 {
        struct arm_cmn *cmn = to_cmn(event->pmu);
@@ -1228,18 +1548,23 @@ static int arm_cmn_event_init(struct perf_event *event)
        if (type == CMN_TYPE_DTC)
                return 0;
 
+       eventid = CMN_EVENT_EVENTID(event);
        /* For watchpoints we need the actual XP node here */
        if (type == CMN_TYPE_WP) {
                type = CMN_TYPE_XP;
                /* ...and we need a "real" direction */
-               eventid = CMN_EVENT_EVENTID(event);
                if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
                        return -EINVAL;
                /* ...but the DTM may depend on which port we're watching */
                if (cmn->multi_dtm)
                        hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
+       } else if (type == CMN_TYPE_XP && cmn->model == CMN700) {
+               hw->wide_sel = true;
        }
 
+       /* This is sufficiently annoying to recalculate, so cache it */
+       hw->filter_sel = arm_cmn_filter_sel(cmn->model, type, eventid);
+
        bynodeid = CMN_EVENT_BYNODEID(event);
        nodeid = CMN_EVENT_NODEID(event);
 
@@ -1281,8 +1606,8 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
                if (type == CMN_TYPE_WP)
                        dtm->wp_event[arm_cmn_wp_idx(event)] = -1;
 
-               if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event)))
-                       hw->dn[i].occupid_count--;
+               if (hw->filter_sel > SEL_NONE)
+                       hw->dn[i].occupid[hw->filter_sel].count--;
 
                dtm->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx);
                writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
@@ -1362,18 +1687,8 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
                        input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx +
                                    (nid.port << 4) + (nid.dev << 2);
 
-                       if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event))) {
-                               u8 occupid = CMN_EVENT_OCCUPID(event);
-
-                               if (dn->occupid_count == 0) {
-                                       dn->occupid_val = occupid;
-                                       writel_relaxed(occupid,
-                                                      dn->pmu_base + CMN_PMU_EVENT_SEL + 4);
-                               } else if (dn->occupid_val != occupid) {
-                                       goto free_dtms;
-                               }
-                               dn->occupid_count++;
-                       }
+                       if (arm_cmn_set_event_sel_hi(dn, hw->filter_sel, CMN_EVENT_OCCUPID(event)))
+                               goto free_dtms;
                }
 
                arm_cmn_set_index(hw->dtm_idx, i, dtm_idx);
@@ -1622,6 +1937,10 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
                /* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */
                if (dn->type == CMN_TYPE_RND)
                        dn->type = CMN_TYPE_RNI;
+
+               /* We split the RN-I off already, so let the CCLA part match CCLA events */
+               if (dn->type == CMN_TYPE_CCLA_RNI)
+                       dn->type = CMN_TYPE_CCLA;
        }
 
        writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL);
@@ -1652,6 +1971,18 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c
                        node->type, node->logid, offset);
 }
 
+static enum cmn_node_type arm_cmn_subtype(enum cmn_node_type type)
+{
+       switch (type) {
+       case CMN_TYPE_HNP:
+               return CMN_TYPE_HNI;
+       case CMN_TYPE_CCLA_RNI:
+               return CMN_TYPE_RNI;
+       default:
+               return CMN_TYPE_INVALID;
+       }
+}
+
 static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 {
        void __iomem *cfg_region;
@@ -1676,6 +2007,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
        cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
        cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
 
+       reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL_1);
+       cmn->snp_vc_num = FIELD_GET(CMN_INFO_SNP_VC_NUM, reg);
+       cmn->req_vc_num = FIELD_GET(CMN_INFO_REQ_VC_NUM, reg);
+
        reg = readq_relaxed(cfg_region + CMN_CHILD_INFO);
        child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
        child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg);
@@ -1692,8 +2027,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg);
        }
 
-       /* Cheeky +1 to help terminate pointer-based iteration later */
-       dn = devm_kcalloc(cmn->dev, cmn->num_dns + 1, sizeof(*dn), GFP_KERNEL);
+       /*
+        * Some nodes effectively have two separate types, which we'll handle
+        * by creating one of each internally. For a (very) safe initial upper
+        * bound, account for double the number of non-XP nodes.
+        */
+       dn = devm_kcalloc(cmn->dev, cmn->num_dns * 2 - cmn->num_xps,
+                         sizeof(*dn), GFP_KERNEL);
        if (!dn)
                return -ENOMEM;
 
@@ -1794,6 +2134,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                        case CMN_TYPE_MTSX:
                        case CMN_TYPE_CXRA:
                        case CMN_TYPE_CXHA:
+                       case CMN_TYPE_CCRA:
+                       case CMN_TYPE_CCHA:
+                       case CMN_TYPE_CCLA:
                                dn++;
                                break;
                        /* Nothing to see here */
@@ -1802,6 +2145,19 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                        case CMN_TYPE_RNSAM:
                        case CMN_TYPE_CXLA:
                                break;
+                       /*
+                        * Split "optimised" combination nodes into separate
+                        * types for the different event sets. Offsetting the
+                        * base address lets us handle the second pmu_event_sel
+                        * register via the normal mechanism later.
+                        */
+                       case CMN_TYPE_HNP:
+                       case CMN_TYPE_CCLA_RNI:
+                               dn[1] = dn[0];
+                               dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL;
+                               dn[1].type = arm_cmn_subtype(dn->type);
+                               dn += 2;
+                               break;
                        /* Something has gone horribly wrong */
                        default:
                                dev_err(cmn->dev, "invalid device node type: 0x%x\n", dn->type);
@@ -1810,9 +2166,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                }
        }
 
-       /* Correct for any nodes we skipped */
+       /* Correct for any nodes we added or skipped */
        cmn->num_dns = dn - cmn->dns;
 
+       /* Cheeky +1 to help terminate pointer-based iteration later */
        sz = (void *)(dn + 1) - (void *)cmn->dns;
        dn = devm_krealloc(cmn->dev, cmn->dns, sz, GFP_KERNEL);
        if (dn)
@@ -1970,6 +2327,8 @@ static int arm_cmn_remove(struct platform_device *pdev)
 #ifdef CONFIG_OF
 static const struct of_device_id arm_cmn_of_match[] = {
        { .compatible = "arm,cmn-600", .data = (void *)CMN600 },
+       { .compatible = "arm,cmn-650", .data = (void *)CMN650 },
+       { .compatible = "arm,cmn-700", .data = (void *)CMN700 },
        { .compatible = "arm,ci-700", .data = (void *)CI700 },
        {}
 };
@@ -1979,6 +2338,8 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id arm_cmn_acpi_match[] = {
        { "ARMHC600", CMN600 },
+       { "ARMHC650", CMN650 },
+       { "ARMHC700", CMN700 },
        {}
 };
 MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
index f5c7a845cd7bf24d9aec4778d2e155d630df1b6f..96ffadd654ff137b30a1ac5bf5520f959b93e4e8 100644 (file)
@@ -159,7 +159,9 @@ static int arm_pmu_acpi_parse_irqs(void)
                 * them with their PMUs.
                 */
                per_cpu(pmu_irqs, cpu) = irq;
-               armpmu_request_irq(irq, cpu);
+               err = armpmu_request_irq(irq, cpu);
+               if (err)
+                       goto out_err;
        }
 
        return 0;
index d44bcc29d99c8572a23f62aa689efcc396e617a6..db670b26589717a4d06c27e7fc604eefd5b7ffe8 100644 (file)
@@ -1035,6 +1035,9 @@ static void __arm_spe_pmu_dev_probe(void *info)
                fallthrough;
        case 2:
                spe_pmu->counter_sz = 12;
+               break;
+       case 3:
+               spe_pmu->counter_sz = 16;
        }
 
        dev_info(dev,
index 506ed39e326649760f20d8b90db8a693652580cd..6be83517acaa311c66f48ffb64c1c4b7ea7587ef 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
                          hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
-                         hisi_uncore_pa_pmu.o
+                         hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o
 
 obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
new file mode 100644 (file)
index 0000000..a9bb73f
--- /dev/null
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC CPA(Coherency Protocol Agent) hardware event counters support
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ * Author: Qi Liu <liuqi115@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+
+#define pr_fmt(fmt) "cpa pmu: " fmt
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* CPA register definition */
+#define CPA_PERF_CTRL          0x1c00
+#define CPA_EVENT_CTRL         0x1c04
+#define CPA_INT_MASK           0x1c70
+#define CPA_INT_STATUS         0x1c78
+#define CPA_INT_CLEAR          0x1c7c
+#define CPA_EVENT_TYPE0                0x1c80
+#define CPA_VERSION            0x1cf0
+#define CPA_CNT0_LOWER         0x1d00
+#define CPA_CFG_REG            0x0534
+
+/* CPA operation command */
+#define CPA_PERF_CTRL_EN       BIT_ULL(0)
+#define CPA_EVTYPE_MASK                0xffUL
+#define CPA_PM_CTRL            BIT_ULL(9)
+
+/* CPA has 8-counters */
+#define CPA_NR_COUNTERS                0x8
+#define CPA_COUNTER_BITS       64
+#define CPA_NR_EVENTS          0xff
+#define CPA_REG_OFFSET         0x8
+
+static u32 hisi_cpa_pmu_get_counter_offset(int idx)
+{
+       return (CPA_CNT0_LOWER + idx * CPA_REG_OFFSET);
+}
+
+static u64 hisi_cpa_pmu_read_counter(struct hisi_pmu *cpa_pmu,
+                                    struct hw_perf_event *hwc)
+{
+       return readq(cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_cpa_pmu_write_counter(struct hisi_pmu *cpa_pmu,
+                                      struct hw_perf_event *hwc, u64 val)
+{
+       writeq(val, cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_cpa_pmu_write_evtype(struct hisi_pmu *cpa_pmu, int idx,
+                                     u32 type)
+{
+       u32 reg, reg_idx, shift, val;
+
+       /*
+        * Select the appropriate event select register(CPA_EVENT_TYPE0/1).
+        * There are 2 event select registers for the 8 hardware counters.
+        * Event code is 8-bits and for the former 4 hardware counters,
+        * CPA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+        * CPA_EVENT_TYPE1 is chosen.
+        */
+       reg = CPA_EVENT_TYPE0 + (idx / 4) * 4;
+       reg_idx = idx % 4;
+       shift = CPA_REG_OFFSET * reg_idx;
+
+       /* Write event code to CPA_EVENT_TYPEx Register */
+       val = readl(cpa_pmu->base + reg);
+       val &= ~(CPA_EVTYPE_MASK << shift);
+       val |= type << shift;
+       writel(val, cpa_pmu->base + reg);
+}
+
+static void hisi_cpa_pmu_start_counters(struct hisi_pmu *cpa_pmu)
+{
+       u32 val;
+
+       val = readl(cpa_pmu->base + CPA_PERF_CTRL);
+       val |= CPA_PERF_CTRL_EN;
+       writel(val, cpa_pmu->base + CPA_PERF_CTRL);
+}
+
+static void hisi_cpa_pmu_stop_counters(struct hisi_pmu *cpa_pmu)
+{
+       u32 val;
+
+       val = readl(cpa_pmu->base + CPA_PERF_CTRL);
+       val &= ~(CPA_PERF_CTRL_EN);
+       writel(val, cpa_pmu->base + CPA_PERF_CTRL);
+}
+
+static void hisi_cpa_pmu_disable_pm(struct hisi_pmu *cpa_pmu)
+{
+       u32 val;
+
+       val = readl(cpa_pmu->base + CPA_CFG_REG);
+       val |= CPA_PM_CTRL;
+       writel(val, cpa_pmu->base + CPA_CFG_REG);
+}
+
+static void hisi_cpa_pmu_enable_pm(struct hisi_pmu *cpa_pmu)
+{
+       u32 val;
+
+       val = readl(cpa_pmu->base + CPA_CFG_REG);
+       val &= ~(CPA_PM_CTRL);
+       writel(val, cpa_pmu->base + CPA_CFG_REG);
+}
+
+static void hisi_cpa_pmu_enable_counter(struct hisi_pmu *cpa_pmu,
+                                       struct hw_perf_event *hwc)
+{
+       u32 val;
+
+       /* Enable counter index in CPA_EVENT_CTRL register */
+       val = readl(cpa_pmu->base + CPA_EVENT_CTRL);
+       val |= 1 << hwc->idx;
+       writel(val, cpa_pmu->base + CPA_EVENT_CTRL);
+}
+
+static void hisi_cpa_pmu_disable_counter(struct hisi_pmu *cpa_pmu,
+                                        struct hw_perf_event *hwc)
+{
+       u32 val;
+
+       /* Clear counter index in CPA_EVENT_CTRL register */
+       val = readl(cpa_pmu->base + CPA_EVENT_CTRL);
+       val &= ~(1UL << hwc->idx);
+       writel(val, cpa_pmu->base + CPA_EVENT_CTRL);
+}
+
+static void hisi_cpa_pmu_enable_counter_int(struct hisi_pmu *cpa_pmu,
+                                           struct hw_perf_event *hwc)
+{
+       u32 val;
+
+       /* Write 0 to enable interrupt */
+       val = readl(cpa_pmu->base + CPA_INT_MASK);
+       val &= ~(1UL << hwc->idx);
+       writel(val, cpa_pmu->base + CPA_INT_MASK);
+}
+
+static void hisi_cpa_pmu_disable_counter_int(struct hisi_pmu *cpa_pmu,
+                                            struct hw_perf_event *hwc)
+{
+       u32 val;
+
+       /* Write 1 to mask interrupt */
+       val = readl(cpa_pmu->base + CPA_INT_MASK);
+       val |= 1 << hwc->idx;
+       writel(val, cpa_pmu->base + CPA_INT_MASK);
+}
+
+static u32 hisi_cpa_pmu_get_int_status(struct hisi_pmu *cpa_pmu)
+{
+       return readl(cpa_pmu->base + CPA_INT_STATUS);
+}
+
+static void hisi_cpa_pmu_clear_int_status(struct hisi_pmu *cpa_pmu, int idx)
+{
+       writel(1 << idx, cpa_pmu->base + CPA_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_cpa_pmu_acpi_match[] = {
+       { "HISI0281", },
+       {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match);
+
+static int hisi_cpa_pmu_init_data(struct platform_device *pdev,
+                                 struct hisi_pmu *cpa_pmu)
+{
+       if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+                                    &cpa_pmu->sicl_id)) {
+               dev_err(&pdev->dev, "Can not read sicl-id\n");
+               return -EINVAL;
+       }
+
+       if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+                                    &cpa_pmu->index_id)) {
+               dev_err(&pdev->dev, "Cannot read idx-id\n");
+               return -EINVAL;
+       }
+
+       cpa_pmu->ccl_id = -1;
+       cpa_pmu->sccl_id = -1;
+       cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(cpa_pmu->base))
+               return PTR_ERR(cpa_pmu->base);
+
+       cpa_pmu->identifier = readl(cpa_pmu->base + CPA_VERSION);
+
+       return 0;
+}
+
+static struct attribute *hisi_cpa_pmu_format_attr[] = {
+       HISI_PMU_FORMAT_ATTR(event, "config:0-15"),
+       NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_format_group = {
+       .name = "format",
+       .attrs = hisi_cpa_pmu_format_attr,
+};
+
+static struct attribute *hisi_cpa_pmu_events_attr[] = {
+       HISI_PMU_EVENT_ATTR(cpa_cycles,         0x00),
+       HISI_PMU_EVENT_ATTR(cpa_p1_wr_dat,      0x61),
+       HISI_PMU_EVENT_ATTR(cpa_p1_rd_dat,      0x62),
+       HISI_PMU_EVENT_ATTR(cpa_p0_wr_dat,      0xE1),
+       HISI_PMU_EVENT_ATTR(cpa_p0_rd_dat,      0xE2),
+       NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_events_group = {
+       .name = "events",
+       .attrs = hisi_cpa_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = {
+       .attrs = hisi_cpa_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_cpa_pmu_identifier_attr =
+       __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_cpa_pmu_identifier_attrs[] = {
+       &hisi_cpa_pmu_identifier_attr.attr,
+       NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_identifier_group = {
+       .attrs = hisi_cpa_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = {
+       &hisi_cpa_pmu_format_group,
+       &hisi_cpa_pmu_events_group,
+       &hisi_cpa_pmu_cpumask_attr_group,
+       &hisi_cpa_pmu_identifier_group,
+       NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_cpa_pmu_ops = {
+       .write_evtype           = hisi_cpa_pmu_write_evtype,
+       .get_event_idx          = hisi_uncore_pmu_get_event_idx,
+       .start_counters         = hisi_cpa_pmu_start_counters,
+       .stop_counters          = hisi_cpa_pmu_stop_counters,
+       .enable_counter         = hisi_cpa_pmu_enable_counter,
+       .disable_counter        = hisi_cpa_pmu_disable_counter,
+       .enable_counter_int     = hisi_cpa_pmu_enable_counter_int,
+       .disable_counter_int    = hisi_cpa_pmu_disable_counter_int,
+       .write_counter          = hisi_cpa_pmu_write_counter,
+       .read_counter           = hisi_cpa_pmu_read_counter,
+       .get_int_status         = hisi_cpa_pmu_get_int_status,
+       .clear_int_status       = hisi_cpa_pmu_clear_int_status,
+};
+
+static int hisi_cpa_pmu_dev_probe(struct platform_device *pdev,
+                                 struct hisi_pmu *cpa_pmu)
+{
+       int ret;
+
+       ret = hisi_cpa_pmu_init_data(pdev, cpa_pmu);
+       if (ret)
+               return ret;
+
+       ret = hisi_uncore_pmu_init_irq(cpa_pmu, pdev);
+       if (ret)
+               return ret;
+
+       cpa_pmu->counter_bits = CPA_COUNTER_BITS;
+       cpa_pmu->check_event = CPA_NR_EVENTS;
+       cpa_pmu->pmu_events.attr_groups = hisi_cpa_pmu_attr_groups;
+       cpa_pmu->ops = &hisi_uncore_cpa_pmu_ops;
+       cpa_pmu->num_counters = CPA_NR_COUNTERS;
+       cpa_pmu->dev = &pdev->dev;
+       cpa_pmu->on_cpu = -1;
+
+       return 0;
+}
+
+static int hisi_cpa_pmu_probe(struct platform_device *pdev)
+{
+       struct hisi_pmu *cpa_pmu;
+       char *name;
+       int ret;
+
+       cpa_pmu = devm_kzalloc(&pdev->dev, sizeof(*cpa_pmu), GFP_KERNEL);
+       if (!cpa_pmu)
+               return -ENOMEM;
+
+       ret = hisi_cpa_pmu_dev_probe(pdev, cpa_pmu);
+       if (ret)
+               return ret;
+
+       name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u",
+                             cpa_pmu->sicl_id, cpa_pmu->index_id);
+       if (!name)
+               return -ENOMEM;
+
+       cpa_pmu->pmu = (struct pmu) {
+               .name           = name,
+               .module         = THIS_MODULE,
+               .task_ctx_nr    = perf_invalid_context,
+               .event_init     = hisi_uncore_pmu_event_init,
+               .pmu_enable     = hisi_uncore_pmu_enable,
+               .pmu_disable    = hisi_uncore_pmu_disable,
+               .add            = hisi_uncore_pmu_add,
+               .del            = hisi_uncore_pmu_del,
+               .start          = hisi_uncore_pmu_start,
+               .stop           = hisi_uncore_pmu_stop,
+               .read           = hisi_uncore_pmu_read,
+               .attr_groups    = cpa_pmu->pmu_events.attr_groups,
+               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
+       };
+
+       /* Power Management should be disabled before using CPA PMU. */
+       hisi_cpa_pmu_disable_pm(cpa_pmu);
+       ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+                                      &cpa_pmu->node);
+       if (ret) {
+               dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+               hisi_cpa_pmu_enable_pm(cpa_pmu);
+               return ret;
+       }
+
+       ret = perf_pmu_register(&cpa_pmu->pmu, name, -1);
+       if (ret) {
+               dev_err(cpa_pmu->dev, "PMU register failed\n");
+               cpuhp_state_remove_instance_nocalls(
+                       CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, &cpa_pmu->node);
+               hisi_cpa_pmu_enable_pm(cpa_pmu);
+               return ret;
+       }
+
+       platform_set_drvdata(pdev, cpa_pmu);
+       return ret;
+}
+
+static int hisi_cpa_pmu_remove(struct platform_device *pdev)
+{
+       struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev);
+
+       perf_pmu_unregister(&cpa_pmu->pmu);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+                                           &cpa_pmu->node);
+       hisi_cpa_pmu_enable_pm(cpa_pmu);
+       return 0;
+}
+
+static struct platform_driver hisi_cpa_pmu_driver = {
+       .driver = {
+               .name = "hisi_cpa_pmu",
+               .acpi_match_table = ACPI_PTR(hisi_cpa_pmu_acpi_match),
+               .suppress_bind_attrs = true,
+       },
+       .probe = hisi_cpa_pmu_probe,
+       .remove = hisi_cpa_pmu_remove,
+};
+
+static int __init hisi_cpa_pmu_module_init(void)
+{
+       int ret;
+
+       ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+                                     "AP_PERF_ARM_HISI_CPA_ONLINE",
+                                     hisi_uncore_pmu_online_cpu,
+                                     hisi_uncore_pmu_offline_cpu);
+       if (ret) {
+               pr_err("setup hotplug failed: %d\n", ret);
+               return ret;
+       }
+
+       ret = platform_driver_register(&hisi_cpa_pmu_driver);
+       if (ret)
+               cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE);
+
+       return ret;
+}
+module_init(hisi_cpa_pmu_module_init);
+
+static void __exit hisi_cpa_pmu_module_exit(void)
+{
+       platform_driver_unregister(&hisi_cpa_pmu_driver);
+       cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE);
+}
+module_exit(hisi_cpa_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
index bad99d1491729af12f58e5b47beb581acaaa0d6b..a0ee84d97c41f99931db808a4bfcb658c538960f 100644 (file)
@@ -258,13 +258,12 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev,
                                   struct hisi_pmu *pa_pmu)
 {
        /*
-        * Use the SCCL_ID and the index ID to identify the PA PMU,
-        * while SCCL_ID is the nearst SCCL_ID from this SICL and
-        * CPU core is chosen from this SCCL to manage this PMU.
+        * As PA PMU is in a SICL, use the SICL_ID and the index ID
+        * to identify the PA PMU.
         */
        if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
-                                    &pa_pmu->sccl_id)) {
-               dev_err(&pdev->dev, "Cannot read sccl-id!\n");
+                                    &pa_pmu->sicl_id)) {
+               dev_err(&pdev->dev, "Cannot read sicl-id!\n");
                return -EINVAL;
        }
 
@@ -275,6 +274,7 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev,
        }
 
        pa_pmu->ccl_id = -1;
+       pa_pmu->sccl_id = -1;
 
        pa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pa_pmu->base)) {
@@ -399,13 +399,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
        ret = hisi_pa_pmu_dev_probe(pdev, pa_pmu);
        if (ret)
                return ret;
-       /*
-        * PA is attached in SICL and the CPU core is chosen to manage this
-        * PMU which is the nearest SCCL, while its SCCL_ID is greater than
-        * one with the SICL_ID.
-        */
+
        name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u",
-                             pa_pmu->sccl_id - 1, pa_pmu->index_id);
+                             pa_pmu->sicl_id, pa_pmu->index_id);
        if (!name)
                return -ENOMEM;
 
index 358e4e284a62905e64bfba49e58260005afb8761..980b9ee6eb149556d1fdee21171e47759346d2dc 100644 (file)
@@ -458,6 +458,10 @@ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
 {
        int sccl_id, ccl_id;
 
+       /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */
+       if (hisi_pmu->sccl_id == -1)
+               return true;
+
        if (hisi_pmu->ccl_id == -1) {
                /* If CCL_ID is -1, the PMU only shares the same SCCL */
                hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
index 7f5841d6f59246d72f08677466cf6096c8fa978a..96eeddad55ffce10a6e5781efa22d79816a78691 100644 (file)
@@ -81,6 +81,7 @@ struct hisi_pmu {
        struct device *dev;
        struct hlist_node node;
        int sccl_id;
+       int sicl_id;
        int ccl_id;
        void __iomem *base;
        /* the ID of the PMU modules */
index ee67305f822d08a08e29dcd6a7c69a2dfd82fa1a..282d3a071a67c661de6986d53a31dfd34a0186f2 100644 (file)
@@ -146,12 +146,12 @@ static int tad_pmu_event_init(struct perf_event *event)
 {
        struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
 
-       if (!event->attr.disabled)
-               return -EINVAL;
-
        if (event->attr.type != event->pmu->type)
                return -ENOENT;
 
+       if (!event->attr.disabled)
+               return -EINVAL;
+
        if (event->state != PERF_EVENT_STATE_OFF)
                return -EINVAL;
 
index a1317a483512788bdafb2bab70c1f611e472f5ea..dca3537a8dccea33ed61e070138e31a8588b9adc 100644 (file)
@@ -35,7 +35,7 @@ union sbi_pmu_ctr_info {
        };
 };
 
-/**
+/*
  * RISC-V doesn't have hetergenous harts yet. This need to be part of
  * per_cpu in case of harts with different pmu counters
  */
@@ -477,7 +477,7 @@ static int pmu_sbi_get_ctrinfo(int nctr)
 
 static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
 {
-       /**
+       /*
         * No need to check the error because we are disabling all the counters
         * which may include counters that are not enabled yet.
         */
@@ -494,7 +494,7 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
                  cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
 }
 
-/**
+/*
  * This function starts all the used counters in two step approach.
  * Any counter that did not overflow can be start in a single step
  * while the overflowed counters need to be started with updated initialization
@@ -563,7 +563,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
        /* Overflow status register should only be read after counter are stopped */
        overflow = csr_read(CSR_SSCOUNTOVF);
 
-       /**
+       /*
         * Overflow interrupt pending bit should only be cleared after stopping
         * all the counters to avoid any race condition.
         */
index a3fa03bcd9a305775a270603faf5310326a0629d..80838dc54b3abb1eccbb15a3e749f7f94a526a12 100644 (file)
@@ -1236,18 +1236,17 @@ FUNC_GROUP_DECL(SALT8, AA12);
 FUNC_GROUP_DECL(WDTRST4, AA12);
 
 #define AE12 196
-SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID,
-                       SIG_DESC_SET(SCU438, 4));
+SIG_EXPR_LIST_DECL_SESG(AE12, FWSPIQ2, FWQSPI, SIG_DESC_SET(SCU438, 4));
 SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4);
-PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2),
+PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIQ2),
          SIG_EXPR_LIST_PTR(AE12, GPIOY4));
 
 #define AF12 197
-SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID,
-                       SIG_DESC_SET(SCU438, 5));
+SIG_EXPR_LIST_DECL_SESG(AF12, FWSPIQ3, FWQSPI, SIG_DESC_SET(SCU438, 5));
 SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5);
-PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3),
+PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIQ3),
          SIG_EXPR_LIST_PTR(AF12, GPIOY5));
+FUNC_GROUP_DECL(FWQSPI, AE12, AF12);
 
 #define AC12 198
 SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6));
@@ -1520,9 +1519,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
 PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
 
 GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
-GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
 GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
-FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
+FUNC_DECL_1(FWSPID, FWSPID);
 FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
 FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
 /*
@@ -1918,7 +1916,7 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = {
        ASPEED_PINCTRL_GROUP(FSI2),
        ASPEED_PINCTRL_GROUP(FWSPIABR),
        ASPEED_PINCTRL_GROUP(FWSPID),
-       ASPEED_PINCTRL_GROUP(FWQSPID),
+       ASPEED_PINCTRL_GROUP(FWQSPI),
        ASPEED_PINCTRL_GROUP(FWSPIWP),
        ASPEED_PINCTRL_GROUP(GPIT0),
        ASPEED_PINCTRL_GROUP(GPIT1),
@@ -2160,6 +2158,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
        ASPEED_PINCTRL_FUNC(FSI2),
        ASPEED_PINCTRL_FUNC(FWSPIABR),
        ASPEED_PINCTRL_FUNC(FWSPID),
+       ASPEED_PINCTRL_FUNC(FWQSPI),
        ASPEED_PINCTRL_FUNC(FWSPIWP),
        ASPEED_PINCTRL_FUNC(GPIT0),
        ASPEED_PINCTRL_FUNC(GPIT1),
index 727c65221aef9dc912a4adf828902cf0229eacd1..57f37a294063c5999ff73efacd3e27daa7da6463 100644 (file)
@@ -259,7 +259,7 @@ static const struct mtk_pin_ies_smt_set mt8365_ies_set[] = {
        MTK_PIN_IES_SMT_SPEC(104, 104, 0x420, 13),
        MTK_PIN_IES_SMT_SPEC(105, 109, 0x420, 14),
        MTK_PIN_IES_SMT_SPEC(110, 113, 0x420, 15),
-       MTK_PIN_IES_SMT_SPEC(114, 112, 0x420, 16),
+       MTK_PIN_IES_SMT_SPEC(114, 116, 0x420, 16),
        MTK_PIN_IES_SMT_SPEC(117, 119, 0x420, 17),
        MTK_PIN_IES_SMT_SPEC(120, 122, 0x420, 18),
        MTK_PIN_IES_SMT_SPEC(123, 125, 0x420, 19),
index 1a7d686494ffbd26f2eefde41d69630bd496c27b..0645c2c24f508b48ccb9909843ea206a1d2c1406 100644 (file)
@@ -387,6 +387,8 @@ static void amd_gpio_irq_enable(struct irq_data *d)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
 
+       gpiochip_enable_irq(gc, d->hwirq);
+
        raw_spin_lock_irqsave(&gpio_dev->lock, flags);
        pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
        pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
@@ -408,6 +410,8 @@ static void amd_gpio_irq_disable(struct irq_data *d)
        pin_reg &= ~BIT(INTERRUPT_MASK_OFF);
        writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+
+       gpiochip_disable_irq(gc, d->hwirq);
 }
 
 static void amd_gpio_irq_mask(struct irq_data *d)
@@ -577,7 +581,7 @@ static void amd_irq_ack(struct irq_data *d)
        */
 }
 
-static struct irq_chip amd_gpio_irqchip = {
+static const struct irq_chip amd_gpio_irqchip = {
        .name         = "amd_gpio",
        .irq_ack      = amd_irq_ack,
        .irq_enable   = amd_gpio_irq_enable,
@@ -593,7 +597,8 @@ static struct irq_chip amd_gpio_irqchip = {
         * the wake event. Otherwise the wake event will never clear and
         * prevent the system from suspending.
         */
-       .flags        = IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND,
+       .flags        = IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND | IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 #define PIN_IRQ_PENDING        (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
@@ -1026,7 +1031,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
        amd_gpio_irq_init(gpio_dev);
 
        girq = &gpio_dev->gc.irq;
-       girq->chip = &amd_gpio_irqchip;
+       gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip);
        /* This will let us handle the parent IRQ in the driver */
        girq->parent_handler = NULL;
        girq->num_parents = 0;
index 72f4dd2466e119466735294cb3cb16708d7c6bd8..5e610849dfc3ebf5295adcf0b8d33c29d57dc8ed 100644 (file)
@@ -36,7 +36,6 @@ struct apple_gpio_pinctrl {
 
        struct pinctrl_desc pinctrl_desc;
        struct gpio_chip gpio_chip;
-       struct irq_chip irq_chip;
        u8 irqgrps[];
 };
 
@@ -275,17 +274,21 @@ static unsigned int apple_gpio_irq_type(unsigned int type)
 
 static void apple_gpio_irq_mask(struct irq_data *data)
 {
-       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(gc);
 
        apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE,
                           FIELD_PREP(REG_GPIOx_MODE, REG_GPIOx_IN_IRQ_OFF));
+       gpiochip_disable_irq(gc, data->hwirq);
 }
 
 static void apple_gpio_irq_unmask(struct irq_data *data)
 {
-       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(gc);
        unsigned int irqtype = apple_gpio_irq_type(irqd_get_trigger_type(data));
 
+       gpiochip_enable_irq(gc, data->hwirq);
        apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE,
                           FIELD_PREP(REG_GPIOx_MODE, irqtype));
 }
@@ -343,13 +346,15 @@ static void apple_gpio_irq_handler(struct irq_desc *desc)
        chained_irq_exit(chip, desc);
 }
 
-static struct irq_chip apple_gpio_irqchip = {
-       .name           = "Apple-GPIO",
-       .irq_startup    = apple_gpio_irq_startup,
-       .irq_ack        = apple_gpio_irq_ack,
-       .irq_mask       = apple_gpio_irq_mask,
-       .irq_unmask     = apple_gpio_irq_unmask,
-       .irq_set_type   = apple_gpio_irq_set_type,
+static const struct irq_chip apple_gpio_irqchip = {
+       .name                   = "Apple-GPIO",
+       .irq_startup            = apple_gpio_irq_startup,
+       .irq_ack                = apple_gpio_irq_ack,
+       .irq_mask               = apple_gpio_irq_mask,
+       .irq_unmask             = apple_gpio_irq_unmask,
+       .irq_set_type           = apple_gpio_irq_set_type,
+       .flags                  = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 /* Probe & register */
@@ -360,8 +365,6 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl)
        void **irq_data = NULL;
        int ret;
 
-       pctl->irq_chip = apple_gpio_irqchip;
-
        pctl->gpio_chip.label = dev_name(pctl->dev);
        pctl->gpio_chip.request = gpiochip_generic_request;
        pctl->gpio_chip.free = gpiochip_generic_free;
@@ -377,7 +380,7 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl)
        if (girq->num_parents) {
                int i;
 
-               girq->chip = &pctl->irq_chip;
+               gpio_irq_chip_set_chip(girq, &apple_gpio_irqchip);
                girq->parent_handler = apple_gpio_irq_handler;
 
                girq->parents = kmalloc_array(girq->num_parents,
index 003fb0e341537bd3efce6fe993bf0359f2b13174..6a956ee94494f50696fca8fb0b3d981b1b1dfb98 100644 (file)
@@ -129,6 +129,7 @@ enum {
        FUNC_PTP1,
        FUNC_PTP2,
        FUNC_PTP3,
+       FUNC_PTPSYNC_0,
        FUNC_PTPSYNC_1,
        FUNC_PTPSYNC_2,
        FUNC_PTPSYNC_3,
@@ -252,6 +253,7 @@ static const char *const ocelot_function_names[] = {
        [FUNC_PTP1]             = "ptp1",
        [FUNC_PTP2]             = "ptp2",
        [FUNC_PTP3]             = "ptp3",
+       [FUNC_PTPSYNC_0]        = "ptpsync_0",
        [FUNC_PTPSYNC_1]        = "ptpsync_1",
        [FUNC_PTPSYNC_2]        = "ptpsync_2",
        [FUNC_PTPSYNC_3]        = "ptpsync_3",
@@ -983,7 +985,7 @@ LAN966X_P(31,   GPIO,   FC3_c,     CAN1,      NONE,   OB_TRG,   RECO_b,      NON
 LAN966X_P(32,   GPIO,   FC3_c,     NONE,   SGPIO_a,     NONE,  MIIM_Sa,      NONE,        R);
 LAN966X_P(33,   GPIO,   FC1_b,     NONE,   SGPIO_a,     NONE,  MIIM_Sa,    MIIM_b,        R);
 LAN966X_P(34,   GPIO,   FC1_b,     NONE,   SGPIO_a,     NONE,  MIIM_Sa,    MIIM_b,        R);
-LAN966X_P(35,   GPIO,   FC1_b,     NONE,   SGPIO_a,   CAN0_b,     NONE,      NONE,        R);
+LAN966X_P(35,   GPIO,   FC1_b,  PTPSYNC_0, SGPIO_a,   CAN0_b,     NONE,      NONE,        R);
 LAN966X_P(36,   GPIO,    NONE,  PTPSYNC_1,    NONE,   CAN0_b,     NONE,      NONE,        R);
 LAN966X_P(37,   GPIO, FC_SHRD0, PTPSYNC_2, TWI_SLC_GATE_AD, NONE, NONE,      NONE,        R);
 LAN966X_P(38,   GPIO,    NONE,  PTPSYNC_3,    NONE,     NONE,     NONE,      NONE,        R);
index 966ea6622ff3ce860dff48c8c6f4fa18dcafa620..a2abfe987ab123c148c9b792ecadefa149989cf6 100644 (file)
@@ -42,7 +42,6 @@
  * @chip:           gpiochip handle.
  * @desc:           pin controller descriptor
  * @restart_nb:     restart notifier block.
- * @irq_chip:       irq chip information
  * @irq:            parent irq for the TLMM irq_chip.
  * @intr_target_use_scm: route irq to application cpu using scm calls
  * @lock:           Spinlock to protect register resources as well
@@ -63,7 +62,6 @@ struct msm_pinctrl {
        struct pinctrl_desc desc;
        struct notifier_block restart_nb;
 
-       struct irq_chip irq_chip;
        int irq;
 
        bool intr_target_use_scm;
@@ -868,6 +866,8 @@ static void msm_gpio_irq_enable(struct irq_data *d)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
 
+       gpiochip_enable_irq(gc, d->hwirq);
+
        if (d->parent_data)
                irq_chip_enable_parent(d);
 
@@ -885,6 +885,8 @@ static void msm_gpio_irq_disable(struct irq_data *d)
 
        if (!test_bit(d->hwirq, pctrl->skip_wake_irqs))
                msm_gpio_irq_mask(d);
+
+       gpiochip_disable_irq(gc, d->hwirq);
 }
 
 /**
@@ -958,6 +960,14 @@ static void msm_gpio_irq_ack(struct irq_data *d)
        raw_spin_unlock_irqrestore(&pctrl->lock, flags);
 }
 
+static void msm_gpio_irq_eoi(struct irq_data *d)
+{
+       d = d->parent_data;
+
+       if (d)
+               d->chip->irq_eoi(d);
+}
+
 static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d,
                                                       unsigned int type)
 {
@@ -1255,6 +1265,26 @@ static bool msm_gpio_needs_valid_mask(struct msm_pinctrl *pctrl)
        return device_property_count_u16(pctrl->dev, "gpios") > 0;
 }
 
+static const struct irq_chip msm_gpio_irq_chip = {
+       .name                   = "msmgpio",
+       .irq_enable             = msm_gpio_irq_enable,
+       .irq_disable            = msm_gpio_irq_disable,
+       .irq_mask               = msm_gpio_irq_mask,
+       .irq_unmask             = msm_gpio_irq_unmask,
+       .irq_ack                = msm_gpio_irq_ack,
+       .irq_eoi                = msm_gpio_irq_eoi,
+       .irq_set_type           = msm_gpio_irq_set_type,
+       .irq_set_wake           = msm_gpio_irq_set_wake,
+       .irq_request_resources  = msm_gpio_irq_reqres,
+       .irq_release_resources  = msm_gpio_irq_relres,
+       .irq_set_affinity       = msm_gpio_irq_set_affinity,
+       .irq_set_vcpu_affinity  = msm_gpio_irq_set_vcpu_affinity,
+       .flags                  = (IRQCHIP_MASK_ON_SUSPEND |
+                                  IRQCHIP_SET_TYPE_MASKED |
+                                  IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND |
+                                  IRQCHIP_IMMUTABLE),
+};
+
 static int msm_gpio_init(struct msm_pinctrl *pctrl)
 {
        struct gpio_chip *chip;
@@ -1276,22 +1306,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
        if (msm_gpio_needs_valid_mask(pctrl))
                chip->init_valid_mask = msm_gpio_init_valid_mask;
 
-       pctrl->irq_chip.name = "msmgpio";
-       pctrl->irq_chip.irq_enable = msm_gpio_irq_enable;
-       pctrl->irq_chip.irq_disable = msm_gpio_irq_disable;
-       pctrl->irq_chip.irq_mask = msm_gpio_irq_mask;
-       pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask;
-       pctrl->irq_chip.irq_ack = msm_gpio_irq_ack;
-       pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type;
-       pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake;
-       pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres;
-       pctrl->irq_chip.irq_release_resources = msm_gpio_irq_relres;
-       pctrl->irq_chip.irq_set_affinity = msm_gpio_irq_set_affinity;
-       pctrl->irq_chip.irq_set_vcpu_affinity = msm_gpio_irq_set_vcpu_affinity;
-       pctrl->irq_chip.flags = IRQCHIP_MASK_ON_SUSPEND |
-                               IRQCHIP_SET_TYPE_MASKED |
-                               IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND;
-
        np = of_parse_phandle(pctrl->dev->of_node, "wakeup-parent", 0);
        if (np) {
                chip->irq.parent_domain = irq_find_matching_host(np,
@@ -1300,7 +1314,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
                if (!chip->irq.parent_domain)
                        return -EPROBE_DEFER;
                chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq;
-               pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent;
                /*
                 * Let's skip handling the GPIOs, if the parent irqchip
                 * is handling the direct connect IRQ of the GPIO.
@@ -1313,7 +1326,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
        }
 
        girq = &chip->irq;
-       girq->chip = &pctrl->irq_chip;
+       gpio_irq_chip_set_chip(girq, &msm_gpio_irq_chip);
        girq->parent_handler = msm_gpio_irq_handler;
        girq->fwnode = pctrl->dev->fwnode;
        girq->num_parents = 1;
index 2801ca706273254768393fe1136916e0c38aa17a..b8fc88a23cf4b6800c5eb7d5ad87668d60ccb6c4 100644 (file)
@@ -51,7 +51,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
                  SUNXI_FUNCTION(0x3, "pwm0"),          /* PWM0 */
                  SUNXI_FUNCTION(0x4, "i2s"),           /* IN */
                  SUNXI_FUNCTION(0x5, "uart1"),         /* RX */
-                 SUNXI_FUNCTION(0x6, "spi1")),         /* MOSI */
+                 SUNXI_FUNCTION(0x6, "spi1")),         /* CLK */
        SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 3),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
@@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
                  SUNXI_FUNCTION(0x2, "lcd"),           /* D20 */
-                 SUNXI_FUNCTION(0x3, "lvds1"),         /* RX */
+                 SUNXI_FUNCTION(0x3, "uart2"),         /* RX */
                  SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)),
        SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
index d4c5c170bca095935161caa3e46af3cfb5ec61cb..72df4b8f4dd8b5a09e4943e49d7c4167a104b36c 100644 (file)
@@ -78,4 +78,21 @@ config MLXBF_PMC
          to performance monitoring counters within various blocks in the
          Mellanox BlueField SoC via a sysfs interface.
 
+config NVSW_SN2201
+       tristate "Nvidia SN2201 platform driver support"
+       depends on REGMAP
+       depends on HWMON
+       depends on I2C
+       depends on REGMAP_I2C
+       help
+         This driver provides support for the Nvidia SN2201 platfom.
+         The SN2201 is a highly integrated for one rack unit system with
+         L3 management switches. It has 48 x 1Gbps RJ45 + 4 x 100G QSFP28
+         ports in a compact 1RU form factor. The system also including a
+         serial port (RS-232 interface), an OOB port (1G/100M MDI interface)
+         and USB ports for management functions.
+         The processor used on SN2201 is Intel Atom®Processor C Series,
+         C3338R which is one of the Denverton product families.
+         System equipped with Nvidia®Spectrum-1 32x100GbE Ethernet switch.
+
 endif # MELLANOX_PLATFORM
index a4868366ff180ff04df82660e3819669e22877fa..04703c0416b18cb3fb2b4f9748e134c2ea3155af 100644 (file)
@@ -9,3 +9,4 @@ obj-$(CONFIG_MLXBF_TMFIFO)      += mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)   += mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
 obj-$(CONFIG_MLXREG_LC) += mlxreg-lc.o
+obj-$(CONFIG_NVSW_SN2201) += nvsw-sn2201.o
diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c
new file mode 100644 (file)
index 0000000..0bcdc7c
--- /dev/null
@@ -0,0 +1,1261 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Nvidia sn2201 driver
+ *
+ * Copyright (C) 2022 Nvidia Technologies Ltd.
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/gpio.h>
+#include <linux/module.h>
+#include <linux/platform_data/mlxcpld.h>
+#include <linux/platform_data/mlxreg.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* SN2201 CPLD register offset. */
+#define NVSW_SN2201_CPLD_LPC_I2C_BASE_ADRR          0x2000
+#define NVSW_SN2201_CPLD_LPC_IO_RANGE               0x100
+#define NVSW_SN2201_HW_VER_ID_OFFSET                0x00
+#define NVSW_SN2201_BOARD_ID_OFFSET                 0x01
+#define NVSW_SN2201_CPLD_VER_OFFSET                 0x02
+#define NVSW_SN2201_CPLD_MVER_OFFSET                0x03
+#define NVSW_SN2201_CPLD_ID_OFFSET                  0x04
+#define NVSW_SN2201_CPLD_PN_OFFSET                  0x05
+#define NVSW_SN2201_CPLD_PN1_OFFSET                 0x06
+#define NVSW_SN2201_PSU_CTRL_OFFSET                 0x0a
+#define NVSW_SN2201_QSFP28_STATUS_OFFSET            0x0b
+#define NVSW_SN2201_QSFP28_INT_STATUS_OFFSET        0x0c
+#define NVSW_SN2201_QSFP28_LP_STATUS_OFFSET         0x0d
+#define NVSW_SN2201_QSFP28_RST_STATUS_OFFSET        0x0e
+#define NVSW_SN2201_SYS_STATUS_OFFSET               0x0f
+#define NVSW_SN2201_FRONT_SYS_LED_CTRL_OFFSET       0x10
+#define NVSW_SN2201_FRONT_PSU_LED_CTRL_OFFSET       0x12
+#define NVSW_SN2201_FRONT_UID_LED_CTRL_OFFSET       0x13
+#define NVSW_SN2201_QSFP28_LED_TEST_STATUS_OFFSET   0x14
+#define NVSW_SN2201_SYS_RST_STATUS_OFFSET           0x15
+#define NVSW_SN2201_SYS_INT_STATUS_OFFSET           0x21
+#define NVSW_SN2201_SYS_INT_MASK_OFFSET             0x22
+#define NVSW_SN2201_ASIC_STATUS_OFFSET              0x24
+#define NVSW_SN2201_ASIC_EVENT_OFFSET               0x25
+#define NVSW_SN2201_ASIC_MAKS_OFFSET                0x26
+#define NVSW_SN2201_THML_STATUS_OFFSET              0x27
+#define NVSW_SN2201_THML_EVENT_OFFSET               0x28
+#define NVSW_SN2201_THML_MASK_OFFSET                0x29
+#define NVSW_SN2201_PS_ALT_STATUS_OFFSET            0x2a
+#define NVSW_SN2201_PS_ALT_EVENT_OFFSET             0x2b
+#define NVSW_SN2201_PS_ALT_MASK_OFFSET              0x2c
+#define NVSW_SN2201_PS_PRSNT_STATUS_OFFSET          0x30
+#define NVSW_SN2201_PS_PRSNT_EVENT_OFFSET           0x31
+#define NVSW_SN2201_PS_PRSNT_MASK_OFFSET            0x32
+#define NVSW_SN2201_PS_DC_OK_STATUS_OFFSET          0x33
+#define NVSW_SN2201_PS_DC_OK_EVENT_OFFSET           0x34
+#define NVSW_SN2201_PS_DC_OK_MASK_OFFSET            0x35
+#define NVSW_SN2201_RST_CAUSE1_OFFSET               0x36
+#define NVSW_SN2201_RST_CAUSE2_OFFSET               0x37
+#define NVSW_SN2201_RST_SW_CTRL_OFFSET              0x38
+#define NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET         0x3a
+#define NVSW_SN2201_FAN_PRSNT_EVENT_OFFSET          0x3b
+#define NVSW_SN2201_FAN_PRSNT_MASK_OFFSET           0x3c
+#define NVSW_SN2201_WD_TMR_OFFSET_LSB               0x40
+#define NVSW_SN2201_WD_TMR_OFFSET_MSB               0x41
+#define NVSW_SN2201_WD_ACT_OFFSET                   0x42
+#define NVSW_SN2201_FAN_LED1_CTRL_OFFSET            0x50
+#define NVSW_SN2201_FAN_LED2_CTRL_OFFSET            0x51
+#define NVSW_SN2201_REG_MAX                         0x52
+
+/* Number of physical I2C busses. */
+#define NVSW_SN2201_PHY_I2C_BUS_NUM            2
+/* Number of main mux channels. */
+#define NVSW_SN2201_MAIN_MUX_CHNL_NUM          8
+
+#define NVSW_SN2201_MAIN_NR                    0
+#define NVSW_SN2201_MAIN_MUX_NR                        1
+#define NVSW_SN2201_MAIN_MUX_DEFER_NR          (NVSW_SN2201_PHY_I2C_BUS_NUM + \
+                                                NVSW_SN2201_MAIN_MUX_CHNL_NUM - 1)
+
+#define NVSW_SN2201_MAIN_MUX_CH0_NR    NVSW_SN2201_PHY_I2C_BUS_NUM
+#define NVSW_SN2201_MAIN_MUX_CH1_NR    (NVSW_SN2201_MAIN_MUX_CH0_NR + 1)
+#define NVSW_SN2201_MAIN_MUX_CH2_NR    (NVSW_SN2201_MAIN_MUX_CH0_NR + 2)
+#define NVSW_SN2201_MAIN_MUX_CH3_NR    (NVSW_SN2201_MAIN_MUX_CH0_NR + 3)
+#define NVSW_SN2201_MAIN_MUX_CH5_NR    (NVSW_SN2201_MAIN_MUX_CH0_NR + 5)
+#define NVSW_SN2201_MAIN_MUX_CH6_NR    (NVSW_SN2201_MAIN_MUX_CH0_NR + 6)
+#define NVSW_SN2201_MAIN_MUX_CH7_NR    (NVSW_SN2201_MAIN_MUX_CH0_NR + 7)
+
+#define NVSW_SN2201_CPLD_NR            NVSW_SN2201_MAIN_MUX_CH0_NR
+#define NVSW_SN2201_NR_NONE            -1
+
+/* Masks for aggregation, PSU presence and power, ASIC events
+ * in CPLD related registers.
+ */
+#define NVSW_SN2201_CPLD_AGGR_ASIC_MASK_DEF    0xe0
+#define NVSW_SN2201_CPLD_AGGR_PSU_MASK_DEF     0x04
+#define NVSW_SN2201_CPLD_AGGR_PWR_MASK_DEF     0x02
+#define NVSW_SN2201_CPLD_AGGR_FAN_MASK_DEF     0x10
+#define NVSW_SN2201_CPLD_AGGR_MASK_DEF      \
+       (NVSW_SN2201_CPLD_AGGR_ASIC_MASK_DEF \
+       | NVSW_SN2201_CPLD_AGGR_PSU_MASK_DEF \
+       | NVSW_SN2201_CPLD_AGGR_PWR_MASK_DEF \
+       | NVSW_SN2201_CPLD_AGGR_FAN_MASK_DEF)
+
+#define NVSW_SN2201_CPLD_ASIC_MASK             GENMASK(3, 1)
+#define NVSW_SN2201_CPLD_PSU_MASK              GENMASK(1, 0)
+#define NVSW_SN2201_CPLD_PWR_MASK              GENMASK(1, 0)
+#define NVSW_SN2201_CPLD_FAN_MASK              GENMASK(3, 0)
+
+#define NVSW_SN2201_CPLD_SYSIRQ                        26
+#define NVSW_SN2201_LPC_SYSIRQ                 28
+#define NVSW_SN2201_CPLD_I2CADDR               0x41
+
+#define NVSW_SN2201_WD_DFLT_TIMEOUT            600
+
+/* nvsw_sn2201 - device private data
+ * @dev: platform device;
+ * @io_data: register access platform data;
+ * @led_data: LED platform data;
+ * @hotplug_data: hotplug platform data;
+ * @i2c_data: I2C controller platform data;
+ * @led: LED device;
+ * @io_regs: register access device;
+ * @pdev_hotplug: hotplug device;
+ * @sn2201_devs: I2C devices for sn2201 devices;
+ * @sn2201_devs_num: number of I2C devices for sn2201 device;
+ * @main_mux_devs: I2C devices for main mux;
+ * @main_mux_devs_num: number of I2C devices for main mux;
+ * @cpld_devs: I2C devices for cpld;
+ * @cpld_devs_num: number of I2C devices for cpld;
+ * @main_mux_deferred_nr: I2C adapter number must be exist prior creating devices execution;
+ */
+struct nvsw_sn2201 {
+       struct device *dev;
+       struct mlxreg_core_platform_data *io_data;
+       struct mlxreg_core_platform_data *led_data;
+       struct mlxreg_core_platform_data *wd_data;
+       struct mlxreg_core_hotplug_platform_data *hotplug_data;
+       struct mlxreg_core_hotplug_platform_data *i2c_data;
+       struct platform_device *led;
+       struct platform_device *wd;
+       struct platform_device *io_regs;
+       struct platform_device *pdev_hotplug;
+       struct platform_device *pdev_i2c;
+       struct mlxreg_hotplug_device *sn2201_devs;
+       int sn2201_devs_num;
+       struct mlxreg_hotplug_device *main_mux_devs;
+       int main_mux_devs_num;
+       struct mlxreg_hotplug_device *cpld_devs;
+       int cpld_devs_num;
+       int main_mux_deferred_nr;
+};
+
+static bool nvsw_sn2201_writeable_reg(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case NVSW_SN2201_PSU_CTRL_OFFSET:
+       case NVSW_SN2201_QSFP28_LP_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_RST_STATUS_OFFSET:
+       case NVSW_SN2201_FRONT_SYS_LED_CTRL_OFFSET:
+       case NVSW_SN2201_FRONT_PSU_LED_CTRL_OFFSET:
+       case NVSW_SN2201_FRONT_UID_LED_CTRL_OFFSET:
+       case NVSW_SN2201_QSFP28_LED_TEST_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_RST_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_INT_MASK_OFFSET:
+       case NVSW_SN2201_ASIC_EVENT_OFFSET:
+       case NVSW_SN2201_ASIC_MAKS_OFFSET:
+       case NVSW_SN2201_THML_EVENT_OFFSET:
+       case NVSW_SN2201_THML_MASK_OFFSET:
+       case NVSW_SN2201_PS_ALT_EVENT_OFFSET:
+       case NVSW_SN2201_PS_ALT_MASK_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_EVENT_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_MASK_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_EVENT_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_MASK_OFFSET:
+       case NVSW_SN2201_RST_SW_CTRL_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_EVENT_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_MASK_OFFSET:
+       case NVSW_SN2201_WD_TMR_OFFSET_LSB:
+       case NVSW_SN2201_WD_TMR_OFFSET_MSB:
+       case NVSW_SN2201_WD_ACT_OFFSET:
+       case NVSW_SN2201_FAN_LED1_CTRL_OFFSET:
+       case NVSW_SN2201_FAN_LED2_CTRL_OFFSET:
+               return true;
+       }
+       return false;
+}
+
+static bool nvsw_sn2201_readable_reg(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case NVSW_SN2201_HW_VER_ID_OFFSET:
+       case NVSW_SN2201_BOARD_ID_OFFSET:
+       case NVSW_SN2201_CPLD_VER_OFFSET:
+       case NVSW_SN2201_CPLD_MVER_OFFSET:
+       case NVSW_SN2201_CPLD_ID_OFFSET:
+       case NVSW_SN2201_CPLD_PN_OFFSET:
+       case NVSW_SN2201_CPLD_PN1_OFFSET:
+       case NVSW_SN2201_PSU_CTRL_OFFSET:
+       case NVSW_SN2201_QSFP28_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_INT_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_LP_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_RST_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_STATUS_OFFSET:
+       case NVSW_SN2201_FRONT_SYS_LED_CTRL_OFFSET:
+       case NVSW_SN2201_FRONT_PSU_LED_CTRL_OFFSET:
+       case NVSW_SN2201_FRONT_UID_LED_CTRL_OFFSET:
+       case NVSW_SN2201_QSFP28_LED_TEST_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_RST_STATUS_OFFSET:
+       case NVSW_SN2201_RST_CAUSE1_OFFSET:
+       case NVSW_SN2201_RST_CAUSE2_OFFSET:
+       case NVSW_SN2201_SYS_INT_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_INT_MASK_OFFSET:
+       case NVSW_SN2201_ASIC_STATUS_OFFSET:
+       case NVSW_SN2201_ASIC_EVENT_OFFSET:
+       case NVSW_SN2201_ASIC_MAKS_OFFSET:
+       case NVSW_SN2201_THML_STATUS_OFFSET:
+       case NVSW_SN2201_THML_EVENT_OFFSET:
+       case NVSW_SN2201_THML_MASK_OFFSET:
+       case NVSW_SN2201_PS_ALT_STATUS_OFFSET:
+       case NVSW_SN2201_PS_ALT_EVENT_OFFSET:
+       case NVSW_SN2201_PS_ALT_MASK_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_STATUS_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_EVENT_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_MASK_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_STATUS_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_EVENT_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_MASK_OFFSET:
+       case NVSW_SN2201_RST_SW_CTRL_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_EVENT_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_MASK_OFFSET:
+       case NVSW_SN2201_WD_TMR_OFFSET_LSB:
+       case NVSW_SN2201_WD_TMR_OFFSET_MSB:
+       case NVSW_SN2201_WD_ACT_OFFSET:
+       case NVSW_SN2201_FAN_LED1_CTRL_OFFSET:
+       case NVSW_SN2201_FAN_LED2_CTRL_OFFSET:
+               return true;
+       }
+       return false;
+}
+
+static bool nvsw_sn2201_volatile_reg(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case NVSW_SN2201_HW_VER_ID_OFFSET:
+       case NVSW_SN2201_BOARD_ID_OFFSET:
+       case NVSW_SN2201_CPLD_VER_OFFSET:
+       case NVSW_SN2201_CPLD_MVER_OFFSET:
+       case NVSW_SN2201_CPLD_ID_OFFSET:
+       case NVSW_SN2201_CPLD_PN_OFFSET:
+       case NVSW_SN2201_CPLD_PN1_OFFSET:
+       case NVSW_SN2201_PSU_CTRL_OFFSET:
+       case NVSW_SN2201_QSFP28_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_INT_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_LP_STATUS_OFFSET:
+       case NVSW_SN2201_QSFP28_RST_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_STATUS_OFFSET:
+       case NVSW_SN2201_FRONT_SYS_LED_CTRL_OFFSET:
+       case NVSW_SN2201_FRONT_PSU_LED_CTRL_OFFSET:
+       case NVSW_SN2201_FRONT_UID_LED_CTRL_OFFSET:
+       case NVSW_SN2201_QSFP28_LED_TEST_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_RST_STATUS_OFFSET:
+       case NVSW_SN2201_RST_CAUSE1_OFFSET:
+       case NVSW_SN2201_RST_CAUSE2_OFFSET:
+       case NVSW_SN2201_SYS_INT_STATUS_OFFSET:
+       case NVSW_SN2201_SYS_INT_MASK_OFFSET:
+       case NVSW_SN2201_ASIC_STATUS_OFFSET:
+       case NVSW_SN2201_ASIC_EVENT_OFFSET:
+       case NVSW_SN2201_ASIC_MAKS_OFFSET:
+       case NVSW_SN2201_THML_STATUS_OFFSET:
+       case NVSW_SN2201_THML_EVENT_OFFSET:
+       case NVSW_SN2201_THML_MASK_OFFSET:
+       case NVSW_SN2201_PS_ALT_STATUS_OFFSET:
+       case NVSW_SN2201_PS_ALT_EVENT_OFFSET:
+       case NVSW_SN2201_PS_ALT_MASK_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_STATUS_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_EVENT_OFFSET:
+       case NVSW_SN2201_PS_PRSNT_MASK_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_STATUS_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_EVENT_OFFSET:
+       case NVSW_SN2201_PS_DC_OK_MASK_OFFSET:
+       case NVSW_SN2201_RST_SW_CTRL_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_EVENT_OFFSET:
+       case NVSW_SN2201_FAN_PRSNT_MASK_OFFSET:
+       case NVSW_SN2201_WD_TMR_OFFSET_LSB:
+       case NVSW_SN2201_WD_TMR_OFFSET_MSB:
+       case NVSW_SN2201_FAN_LED1_CTRL_OFFSET:
+       case NVSW_SN2201_FAN_LED2_CTRL_OFFSET:
+               return true;
+       }
+       return false;
+}
+
+static const struct reg_default nvsw_sn2201_regmap_default[] = {
+       { NVSW_SN2201_QSFP28_LED_TEST_STATUS_OFFSET, 0x00 },
+       { NVSW_SN2201_WD_ACT_OFFSET, 0x00 },
+};
+
+/* Configuration for the register map of a device with 1 bytes address space. */
+static const struct regmap_config nvsw_sn2201_regmap_conf = {
+       .reg_bits = 8,
+       .val_bits = 8,
+       .max_register = NVSW_SN2201_REG_MAX,
+       .cache_type = REGCACHE_FLAT,
+       .writeable_reg = nvsw_sn2201_writeable_reg,
+       .readable_reg = nvsw_sn2201_readable_reg,
+       .volatile_reg = nvsw_sn2201_volatile_reg,
+       .reg_defaults = nvsw_sn2201_regmap_default,
+       .num_reg_defaults = ARRAY_SIZE(nvsw_sn2201_regmap_default),
+};
+
+/* Regions for LPC I2C controller and LPC base register space. */
+static const struct resource nvsw_sn2201_lpc_io_resources[] = {
+       [0] = DEFINE_RES_NAMED(NVSW_SN2201_CPLD_LPC_I2C_BASE_ADRR,
+                              NVSW_SN2201_CPLD_LPC_IO_RANGE,
+                              "mlxplat_cpld_lpc_i2c_ctrl", IORESOURCE_IO),
+};
+
+static struct resource nvsw_sn2201_cpld_res[] = {
+       [0] = DEFINE_RES_IRQ_NAMED(NVSW_SN2201_CPLD_SYSIRQ, "mlxreg-hotplug"),
+};
+
+static struct resource nvsw_sn2201_lpc_res[] = {
+       [0] = DEFINE_RES_IRQ_NAMED(NVSW_SN2201_LPC_SYSIRQ, "i2c-mlxcpld"),
+};
+
+/* SN2201 I2C platform data. */
+struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = {
+       .irq = NVSW_SN2201_CPLD_SYSIRQ,
+};
+
+/* SN2201 CPLD device. */
+static struct i2c_board_info nvsw_sn2201_cpld_devices[] = {
+       {
+               I2C_BOARD_INFO("nvsw-sn2201", 0x41),
+       },
+};
+
+/* SN2201 CPLD board info. */
+static struct mlxreg_hotplug_device nvsw_sn2201_cpld_brdinfo[] = {
+       {
+               .brdinfo = &nvsw_sn2201_cpld_devices[0],
+               .nr = NVSW_SN2201_CPLD_NR,
+       },
+};
+
+/* SN2201 main mux device. */
+static struct i2c_board_info nvsw_sn2201_main_mux_devices[] = {
+       {
+               I2C_BOARD_INFO("pca9548", 0x70),
+       },
+};
+
+/* SN2201 main mux board info. */
+static struct mlxreg_hotplug_device nvsw_sn2201_main_mux_brdinfo[] = {
+       {
+               .brdinfo = &nvsw_sn2201_main_mux_devices[0],
+               .nr = NVSW_SN2201_MAIN_MUX_NR,
+       },
+};
+
+/* SN2201 power devices. */
+static struct i2c_board_info nvsw_sn2201_pwr_devices[] = {
+       {
+               I2C_BOARD_INFO("pmbus", 0x58),
+       },
+       {
+               I2C_BOARD_INFO("pmbus", 0x58),
+       },
+};
+
+/* SN2201 fan devices. */
+static struct i2c_board_info nvsw_sn2201_fan_devices[] = {
+       {
+               I2C_BOARD_INFO("24c02", 0x50),
+       },
+       {
+               I2C_BOARD_INFO("24c02", 0x51),
+       },
+       {
+               I2C_BOARD_INFO("24c02", 0x52),
+       },
+       {
+               I2C_BOARD_INFO("24c02", 0x53),
+       },
+};
+
+/* SN2201 hotplug default data. */
+static struct mlxreg_core_data nvsw_sn2201_psu_items_data[] = {
+       {
+               .label = "psu1",
+               .reg = NVSW_SN2201_PS_PRSNT_STATUS_OFFSET,
+               .mask = BIT(0),
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+       {
+               .label = "psu2",
+               .reg = NVSW_SN2201_PS_PRSNT_STATUS_OFFSET,
+               .mask = BIT(1),
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+};
+
+static struct mlxreg_core_data nvsw_sn2201_pwr_items_data[] = {
+       {
+               .label = "pwr1",
+               .reg = NVSW_SN2201_PS_DC_OK_STATUS_OFFSET,
+               .mask = BIT(0),
+               .hpdev.brdinfo = &nvsw_sn2201_pwr_devices[0],
+               .hpdev.nr = NVSW_SN2201_MAIN_MUX_CH1_NR,
+       },
+       {
+               .label = "pwr2",
+               .reg = NVSW_SN2201_PS_DC_OK_STATUS_OFFSET,
+               .mask = BIT(1),
+               .hpdev.brdinfo = &nvsw_sn2201_pwr_devices[1],
+               .hpdev.nr = NVSW_SN2201_MAIN_MUX_CH2_NR,
+       },
+};
+
+static struct mlxreg_core_data nvsw_sn2201_fan_items_data[] = {
+       {
+               .label = "fan1",
+               .reg = NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET,
+               .mask = BIT(0),
+               .hpdev.brdinfo = &nvsw_sn2201_fan_devices[0],
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+       {
+               .label = "fan2",
+               .reg = NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET,
+               .mask = BIT(1),
+               .hpdev.brdinfo = &nvsw_sn2201_fan_devices[1],
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+       {
+               .label = "fan3",
+               .reg = NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET,
+               .mask = BIT(2),
+               .hpdev.brdinfo = &nvsw_sn2201_fan_devices[2],
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+       {
+               .label = "fan4",
+               .reg = NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET,
+               .mask = BIT(3),
+               .hpdev.brdinfo = &nvsw_sn2201_fan_devices[3],
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+};
+
+static struct mlxreg_core_data nvsw_sn2201_sys_items_data[] = {
+       {
+               .label = "nic_smb_alert",
+               .reg = NVSW_SN2201_ASIC_STATUS_OFFSET,
+               .mask = BIT(1),
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+       {
+               .label = "cpu_sd",
+               .reg = NVSW_SN2201_ASIC_STATUS_OFFSET,
+               .mask = BIT(2),
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+       {
+               .label = "mac_health",
+               .reg = NVSW_SN2201_ASIC_STATUS_OFFSET,
+               .mask = BIT(3),
+               .hpdev.nr = NVSW_SN2201_NR_NONE,
+       },
+};
+
+static struct mlxreg_core_item nvsw_sn2201_items[] = {
+       {
+               .data = nvsw_sn2201_psu_items_data,
+               .aggr_mask = NVSW_SN2201_CPLD_AGGR_PSU_MASK_DEF,
+               .reg = NVSW_SN2201_PS_PRSNT_STATUS_OFFSET,
+               .mask = NVSW_SN2201_CPLD_PSU_MASK,
+               .count = ARRAY_SIZE(nvsw_sn2201_psu_items_data),
+               .inversed = 1,
+               .health = false,
+       },
+       {
+               .data = nvsw_sn2201_pwr_items_data,
+               .aggr_mask = NVSW_SN2201_CPLD_AGGR_PWR_MASK_DEF,
+               .reg = NVSW_SN2201_PS_DC_OK_STATUS_OFFSET,
+               .mask = NVSW_SN2201_CPLD_PWR_MASK,
+               .count = ARRAY_SIZE(nvsw_sn2201_pwr_items_data),
+               .inversed = 0,
+               .health = false,
+       },
+       {
+               .data = nvsw_sn2201_fan_items_data,
+               .aggr_mask = NVSW_SN2201_CPLD_AGGR_FAN_MASK_DEF,
+               .reg = NVSW_SN2201_FAN_PRSNT_STATUS_OFFSET,
+               .mask = NVSW_SN2201_CPLD_FAN_MASK,
+               .count = ARRAY_SIZE(nvsw_sn2201_fan_items_data),
+               .inversed = 1,
+               .health = false,
+       },
+       {
+               .data = nvsw_sn2201_sys_items_data,
+               .aggr_mask = NVSW_SN2201_CPLD_AGGR_ASIC_MASK_DEF,
+               .reg = NVSW_SN2201_ASIC_STATUS_OFFSET,
+               .mask = NVSW_SN2201_CPLD_ASIC_MASK,
+               .count = ARRAY_SIZE(nvsw_sn2201_sys_items_data),
+               .inversed = 1,
+               .health = false,
+       },
+};
+
+static
+struct mlxreg_core_hotplug_platform_data nvsw_sn2201_hotplug = {
+       .items = nvsw_sn2201_items,
+       .counter = ARRAY_SIZE(nvsw_sn2201_items),
+       .cell = NVSW_SN2201_SYS_INT_STATUS_OFFSET,
+       .mask = NVSW_SN2201_CPLD_AGGR_MASK_DEF,
+};
+
+/* SN2201 static devices. */
+static struct i2c_board_info nvsw_sn2201_static_devices[] = {
+       {
+               I2C_BOARD_INFO("24c02", 0x57),
+       },
+       {
+               I2C_BOARD_INFO("lm75", 0x4b),
+       },
+       {
+               I2C_BOARD_INFO("24c64", 0x56),
+       },
+       {
+               I2C_BOARD_INFO("ads1015", 0x49),
+       },
+       {
+               I2C_BOARD_INFO("pca9546", 0x71),
+       },
+       {
+               I2C_BOARD_INFO("emc2305", 0x4d),
+       },
+       {
+               I2C_BOARD_INFO("lm75", 0x49),
+       },
+       {
+               I2C_BOARD_INFO("pca9555", 0x27),
+       },
+       {
+               I2C_BOARD_INFO("powr1014", 0x37),
+       },
+       {
+               I2C_BOARD_INFO("lm75", 0x4f),
+       },
+       {
+               I2C_BOARD_INFO("pmbus", 0x40),
+       },
+};
+
+/* SN2201 default static board info. */
+static struct mlxreg_hotplug_device nvsw_sn2201_static_brdinfo[] = {
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[0],
+               .nr = NVSW_SN2201_MAIN_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[1],
+               .nr = NVSW_SN2201_MAIN_MUX_CH0_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[2],
+               .nr = NVSW_SN2201_MAIN_MUX_CH0_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[3],
+               .nr = NVSW_SN2201_MAIN_MUX_CH0_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[4],
+               .nr = NVSW_SN2201_MAIN_MUX_CH3_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[5],
+               .nr = NVSW_SN2201_MAIN_MUX_CH5_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[6],
+               .nr = NVSW_SN2201_MAIN_MUX_CH5_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[7],
+               .nr = NVSW_SN2201_MAIN_MUX_CH5_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[8],
+               .nr = NVSW_SN2201_MAIN_MUX_CH6_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[9],
+               .nr = NVSW_SN2201_MAIN_MUX_CH6_NR,
+       },
+       {
+               .brdinfo = &nvsw_sn2201_static_devices[10],
+               .nr = NVSW_SN2201_MAIN_MUX_CH7_NR,
+       },
+};
+
+/* LED default data. */
+static struct mlxreg_core_data nvsw_sn2201_led_data[] = {
+       {
+               .label = "status:green",
+               .reg = NVSW_SN2201_FRONT_SYS_LED_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "status:orange",
+               .reg = NVSW_SN2201_FRONT_SYS_LED_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "psu:green",
+               .reg = NVSW_SN2201_FRONT_PSU_LED_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "psu:orange",
+               .reg = NVSW_SN2201_FRONT_PSU_LED_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "uid:blue",
+               .reg = NVSW_SN2201_FRONT_UID_LED_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "fan1:green",
+               .reg = NVSW_SN2201_FAN_LED1_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "fan1:orange",
+               .reg = NVSW_SN2201_FAN_LED1_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "fan2:green",
+               .reg = NVSW_SN2201_FAN_LED1_CTRL_OFFSET,
+               .mask = GENMASK(3, 0),
+       },
+       {
+               .label = "fan2:orange",
+               .reg = NVSW_SN2201_FAN_LED1_CTRL_OFFSET,
+               .mask = GENMASK(3, 0),
+       },
+       {
+               .label = "fan3:green",
+               .reg = NVSW_SN2201_FAN_LED2_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "fan3:orange",
+               .reg = NVSW_SN2201_FAN_LED2_CTRL_OFFSET,
+               .mask = GENMASK(7, 4),
+       },
+       {
+               .label = "fan4:green",
+               .reg = NVSW_SN2201_FAN_LED2_CTRL_OFFSET,
+               .mask = GENMASK(3, 0),
+       },
+       {
+               .label = "fan4:orange",
+               .reg = NVSW_SN2201_FAN_LED2_CTRL_OFFSET,
+               .mask = GENMASK(3, 0),
+       },
+};
+
+static struct mlxreg_core_platform_data nvsw_sn2201_led = {
+       .data = nvsw_sn2201_led_data,
+       .counter = ARRAY_SIZE(nvsw_sn2201_led_data),
+};
+
+/* Default register access data. */
+static struct mlxreg_core_data nvsw_sn2201_io_data[] = {
+       {
+               .label = "cpld1_version",
+               .reg = NVSW_SN2201_CPLD_VER_OFFSET,
+               .bit = GENMASK(7, 0),
+               .mode = 0444,
+       },
+       {
+               .label = "cpld1_version_min",
+               .reg = NVSW_SN2201_CPLD_MVER_OFFSET,
+               .bit = GENMASK(7, 0),
+               .mode = 0444,
+       },
+       {
+               .label = "cpld1_pn",
+               .reg = NVSW_SN2201_CPLD_PN_OFFSET,
+               .bit = GENMASK(15, 0),
+               .mode = 0444,
+               .regnum = 2,
+       },
+       {
+               .label = "psu1_on",
+               .reg = NVSW_SN2201_PSU_CTRL_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(0),
+               .mode = 0644,
+       },
+       {
+               .label = "psu2_on",
+               .reg = NVSW_SN2201_PSU_CTRL_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(1),
+               .mode = 0644,
+       },
+       {
+               .label = "pwr_cycle",
+               .reg = NVSW_SN2201_PSU_CTRL_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(2),
+               .mode = 0644,
+       },
+       {
+               .label = "asic_health",
+               .reg = NVSW_SN2201_SYS_STATUS_OFFSET,
+               .mask = GENMASK(4, 3),
+               .bit = 4,
+               .mode = 0444,
+       },
+       {
+               .label = "qsfp_pwr_good",
+               .reg = NVSW_SN2201_SYS_STATUS_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(0),
+               .mode = 0444,
+       },
+       {
+               .label = "phy_reset",
+               .reg = NVSW_SN2201_SYS_RST_STATUS_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(3),
+               .mode = 0644,
+       },
+       {
+               .label = "mac_reset",
+               .reg = NVSW_SN2201_SYS_RST_STATUS_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(2),
+               .mode = 0644,
+       },
+       {
+               .label = "pwr_down",
+               .reg = NVSW_SN2201_RST_SW_CTRL_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(0),
+               .mode = 0644,
+       },
+       {
+               .label = "reset_long_pb",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(0),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_short_pb",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(1),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_aux_pwr_or_fu",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(2),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_swb_dc_dc_pwr_fail",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(3),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_sw_reset",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(4),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_fw_reset",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(5),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_swb_wd",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(6),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_asic_thermal",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(7),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_system",
+               .reg = NVSW_SN2201_RST_CAUSE2_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(1),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_sw_pwr_off",
+               .reg = NVSW_SN2201_RST_CAUSE2_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(2),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_cpu_pwr_fail_thermal",
+               .reg = NVSW_SN2201_RST_CAUSE2_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(4),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_reload_bios",
+               .reg = NVSW_SN2201_RST_CAUSE2_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(5),
+               .mode = 0444,
+       },
+       {
+               .label = "reset_ac_pwr_fail",
+               .reg = NVSW_SN2201_RST_CAUSE2_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(6),
+               .mode = 0444,
+       },
+       {
+               .label = "psu1",
+               .reg = NVSW_SN2201_PS_PRSNT_STATUS_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(0),
+               .mode = 0444,
+       },
+       {
+               .label = "psu2",
+               .reg = NVSW_SN2201_PS_PRSNT_STATUS_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(1),
+               .mode = 0444,
+       },
+};
+
+static struct mlxreg_core_platform_data nvsw_sn2201_regs_io = {
+       .data = nvsw_sn2201_io_data,
+       .counter = ARRAY_SIZE(nvsw_sn2201_io_data),
+};
+
+/* Default watchdog data. */
+static struct mlxreg_core_data nvsw_sn2201_wd_data[] = {
+       {
+               .label = "action",
+               .reg = NVSW_SN2201_WD_ACT_OFFSET,
+               .mask = GENMASK(7, 1),
+               .bit = 0,
+       },
+       {
+               .label = "timeout",
+               .reg = NVSW_SN2201_WD_TMR_OFFSET_LSB,
+               .mask = 0,
+               .health_cntr = NVSW_SN2201_WD_DFLT_TIMEOUT,
+       },
+       {
+               .label = "timeleft",
+               .reg = NVSW_SN2201_WD_TMR_OFFSET_LSB,
+               .mask = 0,
+       },
+       {
+               .label = "ping",
+               .reg = NVSW_SN2201_WD_ACT_OFFSET,
+               .mask = GENMASK(7, 1),
+               .bit = 0,
+       },
+       {
+               .label = "reset",
+               .reg = NVSW_SN2201_RST_CAUSE1_OFFSET,
+               .mask = GENMASK(7, 0) & ~BIT(6),
+               .bit = 6,
+       },
+};
+
+static struct mlxreg_core_platform_data nvsw_sn2201_wd = {
+       .data = nvsw_sn2201_wd_data,
+       .counter = ARRAY_SIZE(nvsw_sn2201_wd_data),
+       .version = MLX_WDT_TYPE3,
+       .identity = "mlx-wdt-main",
+};
+
+static int
+nvsw_sn2201_create_static_devices(struct nvsw_sn2201 *nvsw_sn2201,
+                                 struct mlxreg_hotplug_device *devs,
+                                 int size)
+{
+       struct mlxreg_hotplug_device *dev = devs;
+       int i;
+
+       /* Create I2C static devices. */
+       for (i = 0; i < size; i++, dev++) {
+               dev->client = i2c_new_client_device(dev->adapter, dev->brdinfo);
+               if (IS_ERR(dev->client)) {
+                       dev_err(nvsw_sn2201->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+                               dev->brdinfo->type,
+                               dev->nr, dev->brdinfo->addr);
+
+                       dev->adapter = NULL;
+                       goto fail_create_static_devices;
+               }
+       }
+
+       return 0;
+
+fail_create_static_devices:
+       while (--i >= 0) {
+               dev = devs + i;
+               i2c_unregister_device(dev->client);
+               dev->client = NULL;
+               dev->adapter = NULL;
+       }
+       return IS_ERR(dev->client);
+}
+
+static void nvsw_sn2201_destroy_static_devices(struct nvsw_sn2201 *nvsw_sn2201,
+                                              struct mlxreg_hotplug_device *devs, int size)
+{
+       struct mlxreg_hotplug_device *dev = devs;
+       int i;
+
+       /* Destroy static I2C device for SN2201 static devices. */
+       for (i = 0; i < size; i++, dev++) {
+               if (dev->client) {
+                       i2c_unregister_device(dev->client);
+                       dev->client = NULL;
+                       i2c_put_adapter(dev->adapter);
+                       dev->adapter = NULL;
+               }
+       }
+}
+
+static int nvsw_sn2201_config_post_init(struct nvsw_sn2201 *nvsw_sn2201)
+{
+       struct mlxreg_hotplug_device *sn2201_dev;
+       struct i2c_adapter *adap;
+       struct device *dev;
+       int i, err;
+
+       dev = nvsw_sn2201->dev;
+       adap = i2c_get_adapter(nvsw_sn2201->main_mux_deferred_nr);
+       if (!adap) {
+               dev_err(dev, "Failed to get adapter for bus %d\n",
+                       nvsw_sn2201->main_mux_deferred_nr);
+               return -ENODEV;
+       }
+       i2c_put_adapter(adap);
+
+       /* Update board info. */
+       sn2201_dev = nvsw_sn2201->sn2201_devs;
+       for (i = 0; i < nvsw_sn2201->sn2201_devs_num; i++, sn2201_dev++) {
+               sn2201_dev->adapter = i2c_get_adapter(sn2201_dev->nr);
+               if (!sn2201_dev->adapter)
+                       return -ENODEV;
+               i2c_put_adapter(sn2201_dev->adapter);
+       }
+
+       err = nvsw_sn2201_create_static_devices(nvsw_sn2201, nvsw_sn2201->sn2201_devs,
+                                               nvsw_sn2201->sn2201_devs_num);
+       if (err)
+               dev_err(dev, "Failed to create static devices\n");
+
+       return err;
+}
+
+static int nvsw_sn2201_config_init(struct nvsw_sn2201 *nvsw_sn2201, void *regmap)
+{
+       struct device *dev = nvsw_sn2201->dev;
+       int err;
+
+       nvsw_sn2201->io_data = &nvsw_sn2201_regs_io;
+       nvsw_sn2201->led_data = &nvsw_sn2201_led;
+       nvsw_sn2201->wd_data = &nvsw_sn2201_wd;
+       nvsw_sn2201->hotplug_data = &nvsw_sn2201_hotplug;
+
+       /* Register IO access driver. */
+       if (nvsw_sn2201->io_data) {
+               nvsw_sn2201->io_data->regmap = regmap;
+               nvsw_sn2201->io_regs =
+               platform_device_register_resndata(dev, "mlxreg-io", PLATFORM_DEVID_NONE, NULL, 0,
+                                                 nvsw_sn2201->io_data,
+                                                 sizeof(*nvsw_sn2201->io_data));
+               if (IS_ERR(nvsw_sn2201->io_regs)) {
+                       err = PTR_ERR(nvsw_sn2201->io_regs);
+                       goto fail_register_io;
+               }
+       }
+
+       /* Register LED driver. */
+       if (nvsw_sn2201->led_data) {
+               nvsw_sn2201->led_data->regmap = regmap;
+               nvsw_sn2201->led =
+               platform_device_register_resndata(dev, "leds-mlxreg", PLATFORM_DEVID_NONE, NULL, 0,
+                                                 nvsw_sn2201->led_data,
+                                                 sizeof(*nvsw_sn2201->led_data));
+               if (IS_ERR(nvsw_sn2201->led)) {
+                       err = PTR_ERR(nvsw_sn2201->led);
+                       goto fail_register_led;
+               }
+       }
+
+       /* Register WD driver. */
+       if (nvsw_sn2201->wd_data) {
+               nvsw_sn2201->wd_data->regmap = regmap;
+               nvsw_sn2201->wd =
+               platform_device_register_resndata(dev, "mlx-wdt", PLATFORM_DEVID_NONE, NULL, 0,
+                                                 nvsw_sn2201->wd_data,
+                                                 sizeof(*nvsw_sn2201->wd_data));
+               if (IS_ERR(nvsw_sn2201->wd)) {
+                       err = PTR_ERR(nvsw_sn2201->wd);
+                       goto fail_register_wd;
+               }
+       }
+
+       /* Register hotplug driver. */
+       if (nvsw_sn2201->hotplug_data) {
+               nvsw_sn2201->hotplug_data->regmap = regmap;
+               nvsw_sn2201->pdev_hotplug =
+               platform_device_register_resndata(dev, "mlxreg-hotplug", PLATFORM_DEVID_NONE,
+                                                 nvsw_sn2201_cpld_res,
+                                                 ARRAY_SIZE(nvsw_sn2201_cpld_res),
+                                                 nvsw_sn2201->hotplug_data,
+                                                 sizeof(*nvsw_sn2201->hotplug_data));
+               if (IS_ERR(nvsw_sn2201->pdev_hotplug)) {
+                       err = PTR_ERR(nvsw_sn2201->pdev_hotplug);
+                       goto fail_register_hotplug;
+               }
+       }
+
+       return nvsw_sn2201_config_post_init(nvsw_sn2201);
+
+fail_register_hotplug:
+       if (nvsw_sn2201->wd)
+               platform_device_unregister(nvsw_sn2201->wd);
+fail_register_wd:
+       if (nvsw_sn2201->led)
+               platform_device_unregister(nvsw_sn2201->led);
+fail_register_led:
+       if (nvsw_sn2201->io_regs)
+               platform_device_unregister(nvsw_sn2201->io_regs);
+fail_register_io:
+
+       return err;
+}
+
+static void nvsw_sn2201_config_exit(struct nvsw_sn2201 *nvsw_sn2201)
+{
+       /* Unregister hotplug driver. */
+       if (nvsw_sn2201->pdev_hotplug)
+               platform_device_unregister(nvsw_sn2201->pdev_hotplug);
+       /* Unregister WD driver. */
+       if (nvsw_sn2201->wd)
+               platform_device_unregister(nvsw_sn2201->wd);
+       /* Unregister LED driver. */
+       if (nvsw_sn2201->led)
+               platform_device_unregister(nvsw_sn2201->led);
+       /* Unregister IO access driver. */
+       if (nvsw_sn2201->io_regs)
+               platform_device_unregister(nvsw_sn2201->io_regs);
+}
+
+/*
+ * Initialization is divided into two parts:
+ * - I2C main bus init.
+ * - Mux creation and attaching devices to the mux,
+ *   which assumes that the main bus is already created.
+ * This separation is required for synchronization between these two parts.
+ * Completion notify callback is used to make this flow synchronized.
+ */
+static int nvsw_sn2201_i2c_completion_notify(void *handle, int id)
+{
+       struct nvsw_sn2201 *nvsw_sn2201 = handle;
+       void *regmap;
+       int i, err;
+
+       /* Create main mux. */
+       nvsw_sn2201->main_mux_devs->adapter = i2c_get_adapter(nvsw_sn2201->main_mux_devs->nr);
+       if (!nvsw_sn2201->main_mux_devs->adapter) {
+               err = -ENODEV;
+               dev_err(nvsw_sn2201->dev, "Failed to get adapter for bus %d\n",
+                       nvsw_sn2201->cpld_devs->nr);
+               goto i2c_get_adapter_main_fail;
+       }
+
+       nvsw_sn2201->main_mux_devs_num = ARRAY_SIZE(nvsw_sn2201_main_mux_brdinfo);
+       err = nvsw_sn2201_create_static_devices(nvsw_sn2201, nvsw_sn2201->main_mux_devs,
+                                               nvsw_sn2201->main_mux_devs_num);
+       if (err) {
+               dev_err(nvsw_sn2201->dev, "Failed to create main mux devices\n");
+               goto nvsw_sn2201_create_static_devices_fail;
+       }
+
+       nvsw_sn2201->cpld_devs->adapter = i2c_get_adapter(nvsw_sn2201->cpld_devs->nr);
+       if (!nvsw_sn2201->cpld_devs->adapter) {
+               err = -ENODEV;
+               dev_err(nvsw_sn2201->dev, "Failed to get adapter for bus %d\n",
+                       nvsw_sn2201->cpld_devs->nr);
+               goto i2c_get_adapter_fail;
+       }
+
+       /* Create CPLD device. */
+       nvsw_sn2201->cpld_devs->client = i2c_new_dummy_device(nvsw_sn2201->cpld_devs->adapter,
+                                                             NVSW_SN2201_CPLD_I2CADDR);
+       if (IS_ERR(nvsw_sn2201->cpld_devs->client)) {
+               err = PTR_ERR(nvsw_sn2201->cpld_devs->client);
+               dev_err(nvsw_sn2201->dev, "Failed to create %s cpld device at bus %d at addr 0x%02x\n",
+                       nvsw_sn2201->cpld_devs->brdinfo->type, nvsw_sn2201->cpld_devs->nr,
+                       nvsw_sn2201->cpld_devs->brdinfo->addr);
+               goto i2c_new_dummy_fail;
+       }
+
+       regmap = devm_regmap_init_i2c(nvsw_sn2201->cpld_devs->client, &nvsw_sn2201_regmap_conf);
+       if (IS_ERR(regmap)) {
+               err = PTR_ERR(regmap);
+               dev_err(nvsw_sn2201->dev, "Failed to initialise managed register map\n");
+               goto devm_regmap_init_i2c_fail;
+       }
+
+       /* Set default registers. */
+       for (i = 0; i < nvsw_sn2201_regmap_conf.num_reg_defaults; i++) {
+               err = regmap_write(regmap, nvsw_sn2201_regmap_default[i].reg,
+                                  nvsw_sn2201_regmap_default[i].def);
+               if (err) {
+                       dev_err(nvsw_sn2201->dev, "Failed to set register at offset 0x%02x to default value: 0x%02x\n",
+                               nvsw_sn2201_regmap_default[i].reg,
+                               nvsw_sn2201_regmap_default[i].def);
+                       goto regmap_write_fail;
+               }
+       }
+
+       /* Sync registers with hardware. */
+       regcache_mark_dirty(regmap);
+       err = regcache_sync(regmap);
+       if (err) {
+               dev_err(nvsw_sn2201->dev, "Failed to Sync registers with hardware\n");
+               goto regcache_sync_fail;
+       }
+
+       /* Configure SN2201 board. */
+       err = nvsw_sn2201_config_init(nvsw_sn2201, regmap);
+       if (err) {
+               dev_err(nvsw_sn2201->dev, "Failed to configure board\n");
+               goto nvsw_sn2201_config_init_fail;
+       }
+
+       return 0;
+
+nvsw_sn2201_config_init_fail:
+       nvsw_sn2201_config_exit(nvsw_sn2201);
+regcache_sync_fail:
+regmap_write_fail:
+devm_regmap_init_i2c_fail:
+i2c_new_dummy_fail:
+       i2c_put_adapter(nvsw_sn2201->cpld_devs->adapter);
+       nvsw_sn2201->cpld_devs->adapter = NULL;
+i2c_get_adapter_fail:
+       /* Destroy SN2201 static I2C devices. */
+       nvsw_sn2201_destroy_static_devices(nvsw_sn2201, nvsw_sn2201->sn2201_devs,
+                                          nvsw_sn2201->sn2201_devs_num);
+       /* Destroy main mux device. */
+       nvsw_sn2201_destroy_static_devices(nvsw_sn2201, nvsw_sn2201->main_mux_devs,
+                                          nvsw_sn2201->main_mux_devs_num);
+nvsw_sn2201_create_static_devices_fail:
+       i2c_put_adapter(nvsw_sn2201->main_mux_devs->adapter);
+i2c_get_adapter_main_fail:
+       return err;
+}
+
+static int nvsw_sn2201_config_pre_init(struct nvsw_sn2201 *nvsw_sn2201)
+{
+       nvsw_sn2201->i2c_data = &nvsw_sn2201_i2c_data;
+
+       /* Register I2C controller. */
+       nvsw_sn2201->i2c_data->handle = nvsw_sn2201;
+       nvsw_sn2201->i2c_data->completion_notify = nvsw_sn2201_i2c_completion_notify;
+       nvsw_sn2201->pdev_i2c = platform_device_register_resndata(nvsw_sn2201->dev, "i2c_mlxcpld",
+                                                                 NVSW_SN2201_MAIN_MUX_NR,
+                                                                 nvsw_sn2201_lpc_res,
+                                                                 ARRAY_SIZE(nvsw_sn2201_lpc_res),
+                                                                 nvsw_sn2201->i2c_data,
+                                                                 sizeof(*nvsw_sn2201->i2c_data));
+       if (IS_ERR(nvsw_sn2201->pdev_i2c))
+               return PTR_ERR(nvsw_sn2201->pdev_i2c);
+
+       return 0;
+}
+
+static int nvsw_sn2201_probe(struct platform_device *pdev)
+{
+       struct nvsw_sn2201 *nvsw_sn2201;
+
+       nvsw_sn2201 = devm_kzalloc(&pdev->dev, sizeof(*nvsw_sn2201), GFP_KERNEL);
+       if (!nvsw_sn2201)
+               return -ENOMEM;
+
+       nvsw_sn2201->dev = &pdev->dev;
+       platform_set_drvdata(pdev, nvsw_sn2201);
+       platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources,
+                                     ARRAY_SIZE(nvsw_sn2201_lpc_io_resources));
+
+       nvsw_sn2201->main_mux_deferred_nr = NVSW_SN2201_MAIN_MUX_DEFER_NR;
+       nvsw_sn2201->main_mux_devs = nvsw_sn2201_main_mux_brdinfo;
+       nvsw_sn2201->cpld_devs = nvsw_sn2201_cpld_brdinfo;
+       nvsw_sn2201->sn2201_devs = nvsw_sn2201_static_brdinfo;
+       nvsw_sn2201->sn2201_devs_num = ARRAY_SIZE(nvsw_sn2201_static_brdinfo);
+
+       return nvsw_sn2201_config_pre_init(nvsw_sn2201);
+}
+
+static int nvsw_sn2201_remove(struct platform_device *pdev)
+{
+       struct nvsw_sn2201 *nvsw_sn2201 = platform_get_drvdata(pdev);
+
+       /* Unregister underlying drivers. */
+       nvsw_sn2201_config_exit(nvsw_sn2201);
+
+       /* Destroy SN2201 static I2C devices. */
+       nvsw_sn2201_destroy_static_devices(nvsw_sn2201,
+                                          nvsw_sn2201->sn2201_devs,
+                                          nvsw_sn2201->sn2201_devs_num);
+
+       i2c_put_adapter(nvsw_sn2201->cpld_devs->adapter);
+       nvsw_sn2201->cpld_devs->adapter = NULL;
+       /* Destroy main mux device. */
+       nvsw_sn2201_destroy_static_devices(nvsw_sn2201,
+                                          nvsw_sn2201->main_mux_devs,
+                                          nvsw_sn2201->main_mux_devs_num);
+
+       /* Unregister I2C controller. */
+       if (nvsw_sn2201->pdev_i2c)
+               platform_device_unregister(nvsw_sn2201->pdev_i2c);
+
+       return 0;
+}
+
+static const struct acpi_device_id nvsw_sn2201_acpi_ids[] = {
+       {"NVSN2201", 0},
+       {}
+};
+
+MODULE_DEVICE_TABLE(acpi, nvsw_sn2201_acpi_ids);
+
+static struct platform_driver nvsw_sn2201_driver = {
+       .probe = nvsw_sn2201_probe,
+       .remove = nvsw_sn2201_remove,
+       .driver = {
+               .name = "nvsw-sn2201",
+       .acpi_match_table = nvsw_sn2201_acpi_ids,
+       },
+};
+
+module_platform_driver(nvsw_sn2201_driver);
+
+MODULE_AUTHOR("Nvidia");
+MODULE_DESCRIPTION("Nvidia sn2201 platform driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("platform:nvsw-sn2201");
index d384d36098c270c93dae5a93330ae286492b7e5e..a62c5dfe42d64395a97bafba04e88d632615034d 100644 (file)
@@ -817,7 +817,7 @@ err_cpkg:
 err_bus:
        return status;
 }
-module_init(ssam_core_init);
+subsys_initcall(ssam_core_init);
 
 static void __exit ssam_core_exit(void)
 {
index c1775db29efb67f0c192490ac3542e7492b313fa..ec66fde28e75a873cd728664b6955ca2508952b9 100644 (file)
@@ -99,6 +99,14 @@ static const struct dmi_system_id dmi_lid_device_table[] = {
                },
                .driver_data = (void *)lid_device_props_l4D,
        },
+       {
+               .ident = "Surface Pro 8",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 8"),
+               },
+               .driver_data = (void *)lid_device_props_l4B,
+       },
        {
                .ident = "Surface Book 1",
                .matches = {
index 5d9dd70e4e0f52db0ea16edb05f0655a0020e9d3..f08ad85683cb0de9ae987a763bb18556d6b4ab1d 100644 (file)
@@ -1152,6 +1152,14 @@ config SIEMENS_SIMATIC_IPC
          To compile this driver as a module, choose M here: the module
          will be called simatic-ipc.
 
+config WINMATE_FM07_KEYS
+       tristate "Winmate FM07/FM07P front-panel keys driver"
+       depends on INPUT
+       help
+         Winmate FM07 and FM07P in-vehicle computers have a row of five
+         buttons below the display. This module adds an input device
+         that delivers key events when these buttons are pressed.
+
 endif # X86_PLATFORM_DEVICES
 
 config PMC_ATOM
index fe4d4c8970efa8072713a2b6b5830ac879bbe7af..4a59f47a46e269803acaf758417e2d0da4b33d0a 100644 (file)
@@ -130,3 +130,6 @@ obj-$(CONFIG_PMC_ATOM)                      += pmc_atom.o
 
 # Siemens Simatic Industrial PCs
 obj-$(CONFIG_SIEMENS_SIMATIC_IPC)      += simatic-ipc.o
+
+# Winmate
+obj-$(CONFIG_WINMATE_FM07_KEYS)                += winmate-fm07-keys.o
index fa4123dbdf7ff6a407f488c86c130fba1bcf771b..f11d18beac1876343c85489d901373ad99dc8d1d 100644 (file)
@@ -192,26 +192,6 @@ struct smu_metrics {
        u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX];
 } __packed;
 
-static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
-{
-       int rc;
-       u32 val;
-
-       rc = amd_pmc_send_cmd(dev, 0, &val, SMU_MSG_GETSMUVERSION, 1);
-       if (rc)
-               return rc;
-
-       dev->smu_program = (val >> 24) & GENMASK(7, 0);
-       dev->major = (val >> 16) & GENMASK(7, 0);
-       dev->minor = (val >> 8) & GENMASK(7, 0);
-       dev->rev = (val >> 0) & GENMASK(7, 0);
-
-       dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n",
-               dev->smu_program, dev->major, dev->minor, dev->rev);
-
-       return 0;
-}
-
 static int amd_pmc_stb_debugfs_open(struct inode *inode, struct file *filp)
 {
        struct amd_pmc_dev *dev = filp->f_inode->i_private;
@@ -294,6 +274,40 @@ static const struct file_operations amd_pmc_stb_debugfs_fops_v2 = {
        .release = amd_pmc_stb_debugfs_release_v2,
 };
 
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_DEBUG_FS)
+static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
+{
+       if (dev->cpu_id == AMD_CPU_ID_PCO) {
+               dev_warn_once(dev->dev, "SMU debugging info not supported on this platform\n");
+               return -EINVAL;
+       }
+
+       /* Get Active devices list from SMU */
+       if (!dev->active_ips)
+               amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1);
+
+       /* Get dram address */
+       if (!dev->smu_virt_addr) {
+               u32 phys_addr_low, phys_addr_hi;
+               u64 smu_phys_addr;
+
+               amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1);
+               amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1);
+               smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low);
+
+               dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr,
+                                                 sizeof(struct smu_metrics));
+               if (!dev->smu_virt_addr)
+                       return -ENOMEM;
+       }
+
+       /* Start the logging */
+       amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_RESET, 0);
+       amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0);
+
+       return 0;
+}
+
 static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
                                 struct seq_file *s)
 {
@@ -321,11 +335,19 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
 
 static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table)
 {
+       if (!pdev->smu_virt_addr) {
+               int ret = amd_pmc_setup_smu_logging(pdev);
+
+               if (ret)
+                       return ret;
+       }
+
        if (pdev->cpu_id == AMD_CPU_ID_PCO)
                return -ENODEV;
        memcpy_fromio(table, pdev->smu_virt_addr, sizeof(struct smu_metrics));
        return 0;
 }
+#endif /* CONFIG_SUSPEND || CONFIG_DEBUG_FS */
 
 #ifdef CONFIG_SUSPEND
 static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)
@@ -379,6 +401,17 @@ static int s0ix_stats_show(struct seq_file *s, void *unused)
        struct amd_pmc_dev *dev = s->private;
        u64 entry_time, exit_time, residency;
 
+       /* Use FCH registers to get the S0ix stats */
+       if (!dev->fch_virt_addr) {
+               u32 base_addr_lo = FCH_BASE_PHY_ADDR_LOW;
+               u32 base_addr_hi = FCH_BASE_PHY_ADDR_HIGH;
+               u64 fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
+
+               dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE);
+               if (!dev->fch_virt_addr)
+                       return -ENOMEM;
+       }
+
        entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET);
        entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET);
 
@@ -398,11 +431,38 @@ static int s0ix_stats_show(struct seq_file *s, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(s0ix_stats);
 
+static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
+{
+       int rc;
+       u32 val;
+
+       rc = amd_pmc_send_cmd(dev, 0, &val, SMU_MSG_GETSMUVERSION, 1);
+       if (rc)
+               return rc;
+
+       dev->smu_program = (val >> 24) & GENMASK(7, 0);
+       dev->major = (val >> 16) & GENMASK(7, 0);
+       dev->minor = (val >> 8) & GENMASK(7, 0);
+       dev->rev = (val >> 0) & GENMASK(7, 0);
+
+       dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n",
+               dev->smu_program, dev->major, dev->minor, dev->rev);
+
+       return 0;
+}
+
 static int amd_pmc_idlemask_show(struct seq_file *s, void *unused)
 {
        struct amd_pmc_dev *dev = s->private;
        int rc;
 
+       /* we haven't yet read SMU version */
+       if (!dev->major) {
+               rc = amd_pmc_get_smu_version(dev);
+               if (rc)
+                       return rc;
+       }
+
        if (dev->major > 56 || (dev->major >= 55 && dev->minor >= 37)) {
                rc = amd_pmc_idlemask_read(dev, NULL, s);
                if (rc)
@@ -449,32 +509,6 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
-{
-       u32 phys_addr_low, phys_addr_hi;
-       u64 smu_phys_addr;
-
-       if (dev->cpu_id == AMD_CPU_ID_PCO)
-               return -EINVAL;
-
-       /* Get Active devices list from SMU */
-       amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1);
-
-       /* Get dram address */
-       amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1);
-       amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1);
-       smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low);
-
-       dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics));
-       if (!dev->smu_virt_addr)
-               return -ENOMEM;
-
-       /* Start the logging */
-       amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0);
-
-       return 0;
-}
-
 static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
 {
        u32 value, message, argument, response;
@@ -639,8 +673,7 @@ static void amd_pmc_s2idle_prepare(void)
        u32 arg = 1;
 
        /* Reset and Start SMU logging - to monitor the s0i3 stats */
-       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
-       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
+       amd_pmc_setup_smu_logging(pdev);
 
        /* Activate CZN specific RTC functionality */
        if (pdev->cpu_id == AMD_CPU_ID_CZN) {
@@ -790,7 +823,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
        struct amd_pmc_dev *dev = &pmc;
        struct pci_dev *rdev;
        u32 base_addr_lo, base_addr_hi;
-       u64 base_addr, fch_phys_addr;
+       u64 base_addr;
        int err;
        u32 val;
 
@@ -844,28 +877,12 @@ static int amd_pmc_probe(struct platform_device *pdev)
 
        mutex_init(&dev->lock);
 
-       /* Use FCH registers to get the S0ix stats */
-       base_addr_lo = FCH_BASE_PHY_ADDR_LOW;
-       base_addr_hi = FCH_BASE_PHY_ADDR_HIGH;
-       fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
-       dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE);
-       if (!dev->fch_virt_addr) {
-               err = -ENOMEM;
-               goto err_pci_dev_put;
-       }
-
-       /* Use SMU to get the s0i3 debug stats */
-       err = amd_pmc_setup_smu_logging(dev);
-       if (err)
-               dev_err(dev->dev, "SMU debugging info not supported on this platform\n");
-
        if (enable_stb && dev->cpu_id == AMD_CPU_ID_YC) {
                err = amd_pmc_s2d_init(dev);
                if (err)
                        return err;
        }
 
-       amd_pmc_get_smu_version(dev);
        platform_set_drvdata(pdev, dev);
 #ifdef CONFIG_SUSPEND
        err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops);
index a81dc4b191b779dee0d7af5d9a1bcf622e6ea7e1..57a07db659cbaaf154d16c857809492b6c38fa7b 100644 (file)
@@ -553,6 +553,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
        { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
        { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
        { KE_KEY, 0x82, { KEY_CAMERA } },
+       { KE_KEY, 0x86, { KEY_PROG1 } }, /* MyASUS Key */
        { KE_KEY, 0x88, { KEY_RFKILL  } }, /* Radio Toggle Key */
        { KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */
        { KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */
index 0e7fbed8a50d6ff50ab4b69e2b326182450728f8..62ce198a34631d2d5c3fa4d21675261fa8a557c4 100644 (file)
@@ -2534,7 +2534,7 @@ static struct attribute *asus_fan_curve_attr[] = {
 static umode_t asus_fan_curve_is_visible(struct kobject *kobj,
                                         struct attribute *attr, int idx)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
+       struct device *dev = kobj_to_dev(kobj);
        struct asus_wmi *asus = dev_get_drvdata(dev->parent);
 
        /*
@@ -3114,7 +3114,7 @@ static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus)
 
        if (!sparse_keymap_report_event(asus->inputdev, code,
                                        key_value, autorelease))
-               pr_info("Unknown key %x pressed\n", code);
+               pr_info("Unknown key code 0x%x\n", code);
 }
 
 static void asus_wmi_notify(u32 value, void *context)
index db3633fafbd51493e1b7a266cf3d4c02fc766ae0..42beafbc54b2ac53b9cc2853dc5fc509c593d251 100644 (file)
 
 static struct platform_device *dcdbas_pdev;
 
-static u8 *smi_data_buf;
-static dma_addr_t smi_data_buf_handle;
-static unsigned long smi_data_buf_size;
 static unsigned long max_smi_data_buf_size = MAX_SMI_DATA_BUF_SIZE;
-static u32 smi_data_buf_phys_addr;
 static DEFINE_MUTEX(smi_data_lock);
 static u8 *bios_buffer;
+static struct smi_buffer smi_buf;
 
 static unsigned int host_control_action;
 static unsigned int host_control_smi_type;
@@ -54,23 +51,49 @@ static unsigned int host_control_on_shutdown;
 
 static bool wsmt_enabled;
 
+int dcdbas_smi_alloc(struct smi_buffer *smi_buffer, unsigned long size)
+{
+       smi_buffer->virt = dma_alloc_coherent(&dcdbas_pdev->dev, size,
+                                             &smi_buffer->dma, GFP_KERNEL);
+       if (!smi_buffer->virt) {
+               dev_dbg(&dcdbas_pdev->dev,
+                       "%s: failed to allocate memory size %lu\n",
+                       __func__, size);
+               return -ENOMEM;
+       }
+       smi_buffer->size = size;
+
+       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
+               __func__, (u32)smi_buffer->dma, smi_buffer->size);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dcdbas_smi_alloc);
+
+void dcdbas_smi_free(struct smi_buffer *smi_buffer)
+{
+       if (!smi_buffer->virt)
+               return;
+
+       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
+               __func__, (u32)smi_buffer->dma, smi_buffer->size);
+       dma_free_coherent(&dcdbas_pdev->dev, smi_buffer->size,
+                         smi_buffer->virt, smi_buffer->dma);
+       smi_buffer->virt = NULL;
+       smi_buffer->dma = 0;
+       smi_buffer->size = 0;
+}
+EXPORT_SYMBOL_GPL(dcdbas_smi_free);
+
 /**
  * smi_data_buf_free: free SMI data buffer
  */
 static void smi_data_buf_free(void)
 {
-       if (!smi_data_buf || wsmt_enabled)
+       if (!smi_buf.virt || wsmt_enabled)
                return;
 
-       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
-               __func__, smi_data_buf_phys_addr, smi_data_buf_size);
-
-       dma_free_coherent(&dcdbas_pdev->dev, smi_data_buf_size, smi_data_buf,
-                         smi_data_buf_handle);
-       smi_data_buf = NULL;
-       smi_data_buf_handle = 0;
-       smi_data_buf_phys_addr = 0;
-       smi_data_buf_size = 0;
+       dcdbas_smi_free(&smi_buf);
 }
 
 /**
@@ -78,39 +101,29 @@ static void smi_data_buf_free(void)
  */
 static int smi_data_buf_realloc(unsigned long size)
 {
-       void *buf;
-       dma_addr_t handle;
+       struct smi_buffer tmp;
+       int ret;
 
-       if (smi_data_buf_size >= size)
+       if (smi_buf.size >= size)
                return 0;
 
        if (size > max_smi_data_buf_size)
                return -EINVAL;
 
        /* new buffer is needed */
-       buf = dma_alloc_coherent(&dcdbas_pdev->dev, size, &handle, GFP_KERNEL);
-       if (!buf) {
-               dev_dbg(&dcdbas_pdev->dev,
-                       "%s: failed to allocate memory size %lu\n",
-                       __func__, size);
-               return -ENOMEM;
-       }
-       /* memory zeroed by dma_alloc_coherent */
+       ret = dcdbas_smi_alloc(&tmp, size);
+       if (ret)
+               return ret;
 
-       if (smi_data_buf)
-               memcpy(buf, smi_data_buf, smi_data_buf_size);
+       /* memory zeroed by dma_alloc_coherent */
+       if (smi_buf.virt)
+               memcpy(tmp.virt, smi_buf.virt, smi_buf.size);
 
        /* free any existing buffer */
        smi_data_buf_free();
 
        /* set up new buffer for use */
-       smi_data_buf = buf;
-       smi_data_buf_handle = handle;
-       smi_data_buf_phys_addr = (u32) virt_to_phys(buf);
-       smi_data_buf_size = size;
-
-       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
-               __func__, smi_data_buf_phys_addr, smi_data_buf_size);
+       smi_buf = tmp;
 
        return 0;
 }
@@ -119,14 +132,14 @@ static ssize_t smi_data_buf_phys_addr_show(struct device *dev,
                                           struct device_attribute *attr,
                                           char *buf)
 {
-       return sprintf(buf, "%x\n", smi_data_buf_phys_addr);
+       return sprintf(buf, "%x\n", (u32)smi_buf.dma);
 }
 
 static ssize_t smi_data_buf_size_show(struct device *dev,
                                      struct device_attribute *attr,
                                      char *buf)
 {
-       return sprintf(buf, "%lu\n", smi_data_buf_size);
+       return sprintf(buf, "%lu\n", smi_buf.size);
 }
 
 static ssize_t smi_data_buf_size_store(struct device *dev,
@@ -155,8 +168,8 @@ static ssize_t smi_data_read(struct file *filp, struct kobject *kobj,
        ssize_t ret;
 
        mutex_lock(&smi_data_lock);
-       ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
-                                       smi_data_buf_size);
+       ret = memory_read_from_buffer(buf, count, &pos, smi_buf.virt,
+                                       smi_buf.size);
        mutex_unlock(&smi_data_lock);
        return ret;
 }
@@ -176,7 +189,7 @@ static ssize_t smi_data_write(struct file *filp, struct kobject *kobj,
        if (ret)
                goto out;
 
-       memcpy(smi_data_buf + pos, buf, count);
+       memcpy(smi_buf.virt + pos, buf, count);
        ret = count;
 out:
        mutex_unlock(&smi_data_lock);
@@ -307,11 +320,11 @@ static ssize_t smi_request_store(struct device *dev,
 
        mutex_lock(&smi_data_lock);
 
-       if (smi_data_buf_size < sizeof(struct smi_cmd)) {
+       if (smi_buf.size < sizeof(struct smi_cmd)) {
                ret = -ENODEV;
                goto out;
        }
-       smi_cmd = (struct smi_cmd *)smi_data_buf;
+       smi_cmd = (struct smi_cmd *)smi_buf.virt;
 
        switch (val) {
        case 2:
@@ -327,20 +340,20 @@ static ssize_t smi_request_store(struct device *dev,
                 * Provide physical address of command buffer field within
                 * the struct smi_cmd to BIOS.
                 *
-                * Because the address that smi_cmd (smi_data_buf) points to
+                * Because the address that smi_cmd (smi_buf.virt) points to
                 * will be from memremap() of a non-memory address if WSMT
                 * is present, we can't use virt_to_phys() on smi_cmd, so
                 * we have to use the physical address that was saved when
                 * the virtual address for smi_cmd was received.
                 */
-               smi_cmd->ebx = smi_data_buf_phys_addr +
+               smi_cmd->ebx = (u32)smi_buf.dma +
                                offsetof(struct smi_cmd, command_buffer);
                ret = dcdbas_smi_request(smi_cmd);
                if (!ret)
                        ret = count;
                break;
        case 0:
-               memset(smi_data_buf, 0, smi_data_buf_size);
+               memset(smi_buf.virt, 0, smi_buf.size);
                ret = count;
                break;
        default:
@@ -356,7 +369,7 @@ out:
 /**
  * host_control_smi: generate host control SMI
  *
- * Caller must set up the host control command in smi_data_buf.
+ * Caller must set up the host control command in smi_buf.virt.
  */
 static int host_control_smi(void)
 {
@@ -367,14 +380,14 @@ static int host_control_smi(void)
        s8 cmd_status;
        u8 index;
 
-       apm_cmd = (struct apm_cmd *)smi_data_buf;
+       apm_cmd = (struct apm_cmd *)smi_buf.virt;
        apm_cmd->status = ESM_STATUS_CMD_UNSUCCESSFUL;
 
        switch (host_control_smi_type) {
        case HC_SMITYPE_TYPE1:
                spin_lock_irqsave(&rtc_lock, flags);
                /* write SMI data buffer physical address */
-               data = (u8 *)&smi_data_buf_phys_addr;
+               data = (u8 *)&smi_buf.dma;
                for (index = PE1300_CMOS_CMD_STRUCT_PTR;
                     index < (PE1300_CMOS_CMD_STRUCT_PTR + 4);
                     index++, data++) {
@@ -405,7 +418,7 @@ static int host_control_smi(void)
        case HC_SMITYPE_TYPE3:
                spin_lock_irqsave(&rtc_lock, flags);
                /* write SMI data buffer physical address */
-               data = (u8 *)&smi_data_buf_phys_addr;
+               data = (u8 *)&smi_buf.dma;
                for (index = PE1400_CMOS_CMD_STRUCT_PTR;
                     index < (PE1400_CMOS_CMD_STRUCT_PTR + 4);
                     index++, data++) {
@@ -450,7 +463,7 @@ static int host_control_smi(void)
  * This function is called by the driver after the system has
  * finished shutting down if the user application specified a
  * host control action to perform on shutdown.  It is safe to
- * use smi_data_buf at this point because the system has finished
+ * use smi_buf.virt at this point because the system has finished
  * shutting down and no userspace apps are running.
  */
 static void dcdbas_host_control(void)
@@ -464,18 +477,18 @@ static void dcdbas_host_control(void)
        action = host_control_action;
        host_control_action = HC_ACTION_NONE;
 
-       if (!smi_data_buf) {
+       if (!smi_buf.virt) {
                dev_dbg(&dcdbas_pdev->dev, "%s: no SMI buffer\n", __func__);
                return;
        }
 
-       if (smi_data_buf_size < sizeof(struct apm_cmd)) {
+       if (smi_buf.size < sizeof(struct apm_cmd)) {
                dev_dbg(&dcdbas_pdev->dev, "%s: SMI buffer too small\n",
                        __func__);
                return;
        }
 
-       apm_cmd = (struct apm_cmd *)smi_data_buf;
+       apm_cmd = (struct apm_cmd *)smi_buf.virt;
 
        /* power off takes precedence */
        if (action & HC_ACTION_HOST_CONTROL_POWEROFF) {
@@ -583,11 +596,11 @@ remap:
                return -ENOMEM;
        }
 
-       /* First 8 bytes is for a semaphore, not part of the smi_data_buf */
-       smi_data_buf_phys_addr = bios_buf_paddr + 8;
-       smi_data_buf = bios_buffer + 8;
-       smi_data_buf_size = remap_size - 8;
-       max_smi_data_buf_size = smi_data_buf_size;
+       /* First 8 bytes is for a semaphore, not part of the smi_buf.virt */
+       smi_buf.dma = bios_buf_paddr + 8;
+       smi_buf.virt = bios_buffer + 8;
+       smi_buf.size = remap_size - 8;
+       max_smi_data_buf_size = smi_buf.size;
        wsmt_enabled = true;
        dev_info(&dcdbas_pdev->dev,
                 "WSMT found, using firmware-provided SMI buffer.\n");
index c3cca54335256e9bb872503c18a16d811eb488fa..942a23ddded0592d2279da2dfee05113e8f4dbff 100644 (file)
@@ -105,5 +105,14 @@ struct smm_eps_table {
        u64 num_of_4k_pages;
 } __packed;
 
+struct smi_buffer {
+       u8 *virt;
+       unsigned long size;
+       dma_addr_t dma;
+};
+
+int dcdbas_smi_alloc(struct smi_buffer *smi_buffer, unsigned long size);
+void dcdbas_smi_free(struct smi_buffer *smi_buffer);
+
 #endif /* _DCDBAS_H_ */
 
index 320c032418ac43c7969fbf24acc25ede2a1f39a3..4d375985c85f8203546a70d30aba8e3f3621b68a 100644 (file)
@@ -20,6 +20,7 @@
 
 static int da_command_address;
 static int da_command_code;
+static struct smi_buffer smi_buf;
 static struct calling_interface_buffer *buffer;
 static struct platform_device *platform_device;
 static DEFINE_MUTEX(smm_mutex);
@@ -57,7 +58,7 @@ static int dell_smbios_smm_call(struct calling_interface_buffer *input)
        command.magic = SMI_CMD_MAGIC;
        command.command_address = da_command_address;
        command.command_code = da_command_code;
-       command.ebx = virt_to_phys(buffer);
+       command.ebx = smi_buf.dma;
        command.ecx = 0x42534931;
 
        mutex_lock(&smm_mutex);
@@ -101,9 +102,10 @@ int init_dell_smbios_smm(void)
         * Allocate buffer below 4GB for SMI data--only 32-bit physical addr
         * is passed to SMI handler.
         */
-       buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32);
-       if (!buffer)
-               return -ENOMEM;
+       ret = dcdbas_smi_alloc(&smi_buf, PAGE_SIZE);
+       if (ret)
+               return ret;
+       buffer = (void *)smi_buf.virt;
 
        dmi_walk(find_cmd_address, NULL);
 
@@ -138,7 +140,7 @@ fail_platform_device_add:
 
 fail_wsmt:
 fail_platform_device_alloc:
-       free_page((unsigned long)buffer);
+       dcdbas_smi_free(&smi_buf);
        return ret;
 }
 
@@ -147,6 +149,6 @@ void exit_dell_smbios_smm(void)
        if (platform_device) {
                dell_smbios_unregister_device(&platform_device->dev);
                platform_device_unregister(platform_device);
-               free_page((unsigned long)buffer);
+               dcdbas_smi_free(&smi_buf);
        }
 }
index e87a931eab1e72c1c864fd0edf038eb03077e8bd..1ef606e3ef80d2ab9b31a7086f4459f3d1f7c6c1 100644 (file)
@@ -150,7 +150,9 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
+       DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z490 AORUS ELITE AC"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
+       DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE WIFI"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
index 0e9a25b56e0e4188e78ae550def324343a5936b2..667f94bba905b26a337b9b857fdb1f3e265fd069 100644 (file)
@@ -605,6 +605,7 @@ static int hp_wmi_rfkill2_refresh(void)
        for (i = 0; i < rfkill2_count; i++) {
                int num = rfkill2[i].num;
                struct bios_rfkill2_device_state *devstate;
+
                devstate = &state.device[num];
 
                if (num >= state.count ||
@@ -625,6 +626,7 @@ static ssize_t display_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
        int value = hp_wmi_read_int(HPWMI_DISPLAY_QUERY);
+
        if (value < 0)
                return value;
        return sprintf(buf, "%d\n", value);
@@ -634,6 +636,7 @@ static ssize_t hddtemp_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
        int value = hp_wmi_read_int(HPWMI_HDDTEMP_QUERY);
+
        if (value < 0)
                return value;
        return sprintf(buf, "%d\n", value);
@@ -643,6 +646,7 @@ static ssize_t als_show(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        int value = hp_wmi_read_int(HPWMI_ALS_QUERY);
+
        if (value < 0)
                return value;
        return sprintf(buf, "%d\n", value);
@@ -652,6 +656,7 @@ static ssize_t dock_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
        int value = hp_wmi_get_dock_state();
+
        if (value < 0)
                return value;
        return sprintf(buf, "%d\n", value);
@@ -661,6 +666,7 @@ static ssize_t tablet_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
 {
        int value = hp_wmi_get_tablet_mode();
+
        if (value < 0)
                return value;
        return sprintf(buf, "%d\n", value);
@@ -671,6 +677,7 @@ static ssize_t postcode_show(struct device *dev, struct device_attribute *attr,
 {
        /* Get the POST error code of previous boot failure. */
        int value = hp_wmi_read_int(HPWMI_POSTCODEERROR_QUERY);
+
        if (value < 0)
                return value;
        return sprintf(buf, "0x%x\n", value);
@@ -1013,6 +1020,7 @@ static int __init hp_wmi_rfkill2_setup(struct platform_device *device)
                struct rfkill *rfkill;
                enum rfkill_type type;
                char *name;
+
                switch (state.device[i].radio_type) {
                case HPWMI_WIFI:
                        type = RFKILL_TYPE_WLAN;
index 1f01a8a23c570cfb889b013778b25f72c96a5b99..794968bda1153dca27f9a3abc96cd5253c745507 100644 (file)
@@ -4,6 +4,7 @@
 #
 
 source "drivers/platform/x86/intel/atomisp2/Kconfig"
+source "drivers/platform/x86/intel/ifs/Kconfig"
 source "drivers/platform/x86/intel/int1092/Kconfig"
 source "drivers/platform/x86/intel/int3472/Kconfig"
 source "drivers/platform/x86/intel/pmc/Kconfig"
index c61bc3e97121f7fdc6146c598e6c3078e33993e6..717933dd0cfdd764e77cb345d887a2772c5bb202 100644 (file)
@@ -5,6 +5,7 @@
 #
 
 obj-$(CONFIG_INTEL_ATOMISP2_PDX86)     += atomisp2/
+obj-$(CONFIG_INTEL_IFS)                        += ifs/
 obj-$(CONFIG_INTEL_SAR_INT1092)                += int1092/
 obj-$(CONFIG_INTEL_SKL_INT3472)                += int3472/
 obj-$(CONFIG_INTEL_PMC_CORE)           += pmc/
index 0de509fbf0209321c561983dce03ea58af57a76d..c52ac23e233157108dbbbe04cb864a780f96ccf1 100644 (file)
@@ -389,6 +389,8 @@ static int cht_int33fe_typec_probe(struct platform_device *pdev)
                goto out_unregister_fusb302;
        }
 
+       platform_set_drvdata(pdev, data);
+
        return 0;
 
 out_unregister_fusb302:
index 2def562c6e1de63b1129aee61380676dd624a114..216d31e3403dde4fb8ed842e4be09df6c1a8c8cc 100644 (file)
@@ -238,7 +238,7 @@ static bool intel_hid_evaluate_method(acpi_handle handle,
 
        method_name = (char *)intel_hid_dsm_fn_to_method[fn_index];
 
-       if (!(intel_hid_dsm_fn_mask & fn_index))
+       if (!(intel_hid_dsm_fn_mask & BIT(fn_index)))
                goto skip_dsm_eval;
 
        obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid,
diff --git a/drivers/platform/x86/intel/ifs/Kconfig b/drivers/platform/x86/intel/ifs/Kconfig
new file mode 100644 (file)
index 0000000..7ce8964
--- /dev/null
@@ -0,0 +1,13 @@
+config INTEL_IFS
+       tristate "Intel In Field Scan"
+       depends on X86 && CPU_SUP_INTEL && 64BIT && SMP
+       select INTEL_IFS_DEVICE
+       help
+         Enable support for the In Field Scan capability in select
+         CPUs. The capability allows for running low level tests via
+         a scan image distributed by Intel via Github to validate CPU
+         operation beyond baseline RAS capabilities. To compile this
+         support as a module, choose M here. The module will be called
+         intel_ifs.
+
+         If unsure, say N.
diff --git a/drivers/platform/x86/intel/ifs/Makefile b/drivers/platform/x86/intel/ifs/Makefile
new file mode 100644 (file)
index 0000000..30f035e
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INTEL_IFS)                += intel_ifs.o
+
+intel_ifs-objs                 := core.o load.o runtest.o sysfs.o
diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c
new file mode 100644 (file)
index 0000000..27204e3
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. */
+
+#include <linux/module.h>
+#include <linux/kdev_t.h>
+#include <linux/semaphore.h>
+
+#include <asm/cpu_device_id.h>
+
+#include "ifs.h"
+
+#define X86_MATCH(model)                               \
+       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,    \
+               INTEL_FAM6_##model, X86_FEATURE_CORE_CAPABILITIES, NULL)
+
+static const struct x86_cpu_id ifs_cpu_ids[] __initconst = {
+       X86_MATCH(SAPPHIRERAPIDS_X),
+       {}
+};
+MODULE_DEVICE_TABLE(x86cpu, ifs_cpu_ids);
+
+static struct ifs_device ifs_device = {
+       .data = {
+               .integrity_cap_bit = MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT,
+       },
+       .misc = {
+               .name = "intel_ifs_0",
+               .nodename = "intel_ifs/0",
+               .minor = MISC_DYNAMIC_MINOR,
+       },
+};
+
+static int __init ifs_init(void)
+{
+       const struct x86_cpu_id *m;
+       u64 msrval;
+
+       m = x86_match_cpu(ifs_cpu_ids);
+       if (!m)
+               return -ENODEV;
+
+       if (rdmsrl_safe(MSR_IA32_CORE_CAPS, &msrval))
+               return -ENODEV;
+
+       if (!(msrval & MSR_IA32_CORE_CAPS_INTEGRITY_CAPS))
+               return -ENODEV;
+
+       if (rdmsrl_safe(MSR_INTEGRITY_CAPS, &msrval))
+               return -ENODEV;
+
+       ifs_device.misc.groups = ifs_get_groups();
+
+       if ((msrval & BIT(ifs_device.data.integrity_cap_bit)) &&
+           !misc_register(&ifs_device.misc)) {
+               down(&ifs_sem);
+               ifs_load_firmware(ifs_device.misc.this_device);
+               up(&ifs_sem);
+               return 0;
+       }
+
+       return -ENODEV;
+}
+
+static void __exit ifs_exit(void)
+{
+       misc_deregister(&ifs_device.misc);
+}
+
+module_init(ifs_init);
+module_exit(ifs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Intel In Field Scan (IFS) device");
diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h
new file mode 100644 (file)
index 0000000..73c8e91
--- /dev/null
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation. */
+
+#ifndef _IFS_H_
+#define _IFS_H_
+
+/**
+ * DOC: In-Field Scan
+ *
+ * =============
+ * In-Field Scan
+ * =============
+ *
+ * Introduction
+ * ------------
+ *
+ * In Field Scan (IFS) is a hardware feature to run circuit level tests on
+ * a CPU core to detect problems that are not caught by parity or ECC checks.
+ * Future CPUs will support more than one type of test which will show up
+ * with a new platform-device instance-id, for now only .0 is exposed.
+ *
+ *
+ * IFS Image
+ * ---------
+ *
+ * Intel provides a firmware file containing the scan tests via
+ * github [#f1]_.  Similar to microcode there is a separate file for each
+ * family-model-stepping.
+ *
+ * IFS Image Loading
+ * -----------------
+ *
+ * The driver loads the tests into memory reserved BIOS local to each CPU
+ * socket in a two step process using writes to MSRs to first load the
+ * SHA hashes for the test. Then the tests themselves. Status MSRs provide
+ * feedback on the success/failure of these steps. When a new test file
+ * is installed it can be loaded by writing to the driver reload file::
+ *
+ *   # echo 1 > /sys/devices/virtual/misc/intel_ifs_0/reload
+ *
+ * Similar to microcode, the current version of the scan tests is stored
+ * in a fixed location: /lib/firmware/intel/ifs.0/family-model-stepping.scan
+ *
+ * Running tests
+ * -------------
+ *
+ * Tests are run by the driver synchronizing execution of all threads on a
+ * core and then writing to the ACTIVATE_SCAN MSR on all threads. Instruction
+ * execution continues when:
+ *
+ * 1) All tests have completed.
+ * 2) Execution was interrupted.
+ * 3) A test detected a problem.
+ *
+ * Note that ALL THREADS ON THE CORE ARE EFFECTIVELY OFFLINE FOR THE
+ * DURATION OF THE TEST. This can be up to 200 milliseconds. If the system
+ * is running latency sensitive applications that cannot tolerate an
+ * interruption of this magnitude, the system administrator must arrange
+ * to migrate those applications to other cores before running a core test.
+ * It may also be necessary to redirect interrupts to other CPUs.
+ *
+ * In all cases reading the SCAN_STATUS MSR provides details on what
+ * happened. The driver makes the value of this MSR visible to applications
+ * via the "details" file (see below). Interrupted tests may be restarted.
+ *
+ * The IFS driver provides sysfs interfaces via /sys/devices/virtual/misc/intel_ifs_0/
+ * to control execution:
+ *
+ * Test a specific core::
+ *
+ *   # echo <cpu#> > /sys/devices/virtual/misc/intel_ifs_0/run_test
+ *
+ * when HT is enabled any of the sibling cpu# can be specified to test
+ * its corresponding physical core. Since the tests are per physical core,
+ * the result of testing any thread is same. All siblings must be online
+ * to run a core test. It is only necessary to test one thread.
+ *
+ * For e.g. to test core corresponding to cpu5
+ *
+ *   # echo 5 > /sys/devices/virtual/misc/intel_ifs_0/run_test
+ *
+ * Results of the last test is provided in /sys::
+ *
+ *   $ cat /sys/devices/virtual/misc/intel_ifs_0/status
+ *   pass
+ *
+ * Status can be one of pass, fail, untested
+ *
+ * Additional details of the last test is provided by the details file::
+ *
+ *   $ cat /sys/devices/virtual/misc/intel_ifs_0/details
+ *   0x8081
+ *
+ * The details file reports the hex value of the SCAN_STATUS MSR.
+ * Hardware defined error codes are documented in volume 4 of the Intel
+ * Software Developer's Manual but the error_code field may contain one of
+ * the following driver defined software codes:
+ *
+ * +------+--------------------+
+ * | 0xFD | Software timeout   |
+ * +------+--------------------+
+ * | 0xFE | Partial completion |
+ * +------+--------------------+
+ *
+ * Driver design choices
+ * ---------------------
+ *
+ * 1) The ACTIVATE_SCAN MSR allows for running any consecutive subrange of
+ * available tests. But the driver always tries to run all tests and only
+ * uses the subrange feature to restart an interrupted test.
+ *
+ * 2) Hardware allows for some number of cores to be tested in parallel.
+ * The driver does not make use of this, it only tests one core at a time.
+ *
+ * .. [#f1] https://github.com/intel/TBD
+ */
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+
+#define MSR_COPY_SCAN_HASHES                   0x000002c2
+#define MSR_SCAN_HASHES_STATUS                 0x000002c3
+#define MSR_AUTHENTICATE_AND_COPY_CHUNK                0x000002c4
+#define MSR_CHUNKS_AUTHENTICATION_STATUS       0x000002c5
+#define MSR_ACTIVATE_SCAN                      0x000002c6
+#define MSR_SCAN_STATUS                                0x000002c7
+#define SCAN_NOT_TESTED                                0
+#define SCAN_TEST_PASS                         1
+#define SCAN_TEST_FAIL                         2
+
+/* MSR_SCAN_HASHES_STATUS bit fields */
+union ifs_scan_hashes_status {
+       u64     data;
+       struct {
+               u32     chunk_size      :16;
+               u32     num_chunks      :8;
+               u32     rsvd1           :8;
+               u32     error_code      :8;
+               u32     rsvd2           :11;
+               u32     max_core_limit  :12;
+               u32     valid           :1;
+       };
+};
+
+/* MSR_CHUNKS_AUTH_STATUS bit fields */
+union ifs_chunks_auth_status {
+       u64     data;
+       struct {
+               u32     valid_chunks    :8;
+               u32     total_chunks    :8;
+               u32     rsvd1           :16;
+               u32     error_code      :8;
+               u32     rsvd2           :24;
+       };
+};
+
+/* MSR_ACTIVATE_SCAN bit fields */
+union ifs_scan {
+       u64     data;
+       struct {
+               u32     start   :8;
+               u32     stop    :8;
+               u32     rsvd    :16;
+               u32     delay   :31;
+               u32     sigmce  :1;
+       };
+};
+
+/* MSR_SCAN_STATUS bit fields */
+union ifs_status {
+       u64     data;
+       struct {
+               u32     chunk_num               :8;
+               u32     chunk_stop_index        :8;
+               u32     rsvd1                   :16;
+               u32     error_code              :8;
+               u32     rsvd2                   :22;
+               u32     control_error           :1;
+               u32     signature_error         :1;
+       };
+};
+
+/*
+ * Driver populated error-codes
+ * 0xFD: Test timed out before completing all the chunks.
+ * 0xFE: not all scan chunks were executed. Maximum forward progress retries exceeded.
+ */
+#define IFS_SW_TIMEOUT                         0xFD
+#define IFS_SW_PARTIAL_COMPLETION              0xFE
+
+/**
+ * struct ifs_data - attributes related to intel IFS driver
+ * @integrity_cap_bit: MSR_INTEGRITY_CAPS bit enumerating this test
+ * @loaded_version: stores the currently loaded ifs image version.
+ * @loaded: If a valid test binary has been loaded into the memory
+ * @loading_error: Error occurred on another CPU while loading image
+ * @valid_chunks: number of chunks which could be validated.
+ * @status: it holds simple status pass/fail/untested
+ * @scan_details: opaque scan status code from h/w
+ */
+struct ifs_data {
+       int     integrity_cap_bit;
+       int     loaded_version;
+       bool    loaded;
+       bool    loading_error;
+       int     valid_chunks;
+       int     status;
+       u64     scan_details;
+};
+
+struct ifs_work {
+       struct work_struct w;
+       struct device *dev;
+};
+
+struct ifs_device {
+       struct ifs_data data;
+       struct miscdevice misc;
+};
+
+static inline struct ifs_data *ifs_get_data(struct device *dev)
+{
+       struct miscdevice *m = dev_get_drvdata(dev);
+       struct ifs_device *d = container_of(m, struct ifs_device, misc);
+
+       return &d->data;
+}
+
+void ifs_load_firmware(struct device *dev);
+int do_core_test(int cpu, struct device *dev);
+const struct attribute_group **ifs_get_groups(void);
+
+extern struct semaphore ifs_sem;
+
+#endif
diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c
new file mode 100644 (file)
index 0000000..d056617
--- /dev/null
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. */
+
+#include <linux/firmware.h>
+#include <asm/cpu.h>
+#include <linux/slab.h>
+#include <asm/microcode_intel.h>
+
+#include "ifs.h"
+
+struct ifs_header {
+       u32 header_ver;
+       u32 blob_revision;
+       u32 date;
+       u32 processor_sig;
+       u32 check_sum;
+       u32 loader_rev;
+       u32 processor_flags;
+       u32 metadata_size;
+       u32 total_size;
+       u32 fusa_info;
+       u64 reserved;
+};
+
+#define IFS_HEADER_SIZE        (sizeof(struct ifs_header))
+static struct ifs_header *ifs_header_ptr;      /* pointer to the ifs image header */
+static u64 ifs_hash_ptr;                       /* Address of ifs metadata (hash) */
+static u64 ifs_test_image_ptr;                 /* 256B aligned address of test pattern */
+static DECLARE_COMPLETION(ifs_done);
+
+static const char * const scan_hash_status[] = {
+       [0] = "No error reported",
+       [1] = "Attempt to copy scan hashes when copy already in progress",
+       [2] = "Secure Memory not set up correctly",
+       [3] = "FuSaInfo.ProgramID does not match or ff-mm-ss does not match",
+       [4] = "Reserved",
+       [5] = "Integrity check failed",
+       [6] = "Scan reload or test is in progress"
+};
+
+static const char * const scan_authentication_status[] = {
+       [0] = "No error reported",
+       [1] = "Attempt to authenticate a chunk which is already marked as authentic",
+       [2] = "Chunk authentication error. The hash of chunk did not match expected value"
+};
+
+/*
+ * To copy scan hashes and authenticate test chunks, the initiating cpu must point
+ * to the EDX:EAX to the test image in linear address.
+ * Run wrmsr(MSR_COPY_SCAN_HASHES) for scan hash copy and run wrmsr(MSR_AUTHENTICATE_AND_COPY_CHUNK)
+ * for scan hash copy and test chunk authentication.
+ */
+static void copy_hashes_authenticate_chunks(struct work_struct *work)
+{
+       struct ifs_work *local_work = container_of(work, struct ifs_work, w);
+       union ifs_scan_hashes_status hashes_status;
+       union ifs_chunks_auth_status chunk_status;
+       struct device *dev = local_work->dev;
+       int i, num_chunks, chunk_size;
+       struct ifs_data *ifsd;
+       u64 linear_addr, base;
+       u32 err_code;
+
+       ifsd = ifs_get_data(dev);
+       /* run scan hash copy */
+       wrmsrl(MSR_COPY_SCAN_HASHES, ifs_hash_ptr);
+       rdmsrl(MSR_SCAN_HASHES_STATUS, hashes_status.data);
+
+       /* enumerate the scan image information */
+       num_chunks = hashes_status.num_chunks;
+       chunk_size = hashes_status.chunk_size * 1024;
+       err_code = hashes_status.error_code;
+
+       if (!hashes_status.valid) {
+               ifsd->loading_error = true;
+               if (err_code >= ARRAY_SIZE(scan_hash_status)) {
+                       dev_err(dev, "invalid error code 0x%x for hash copy\n", err_code);
+                       goto done;
+               }
+               dev_err(dev, "Hash copy error : %s", scan_hash_status[err_code]);
+               goto done;
+       }
+
+       /* base linear address to the scan data */
+       base = ifs_test_image_ptr;
+
+       /* scan data authentication and copy chunks to secured memory */
+       for (i = 0; i < num_chunks; i++) {
+               linear_addr = base + i * chunk_size;
+               linear_addr |= i;
+
+               wrmsrl(MSR_AUTHENTICATE_AND_COPY_CHUNK, linear_addr);
+               rdmsrl(MSR_CHUNKS_AUTHENTICATION_STATUS, chunk_status.data);
+
+               ifsd->valid_chunks = chunk_status.valid_chunks;
+               err_code = chunk_status.error_code;
+
+               if (err_code) {
+                       ifsd->loading_error = true;
+                       if (err_code >= ARRAY_SIZE(scan_authentication_status)) {
+                               dev_err(dev,
+                                       "invalid error code 0x%x for authentication\n", err_code);
+                               goto done;
+                       }
+                       dev_err(dev, "Chunk authentication error %s\n",
+                               scan_authentication_status[err_code]);
+                       goto done;
+               }
+       }
+done:
+       complete(&ifs_done);
+}
+
+/*
+ * IFS requires scan chunks authenticated per each socket in the platform.
+ * Once the test chunk is authenticated, it is automatically copied to secured memory
+ * and proceed the authentication for the next chunk.
+ */
+static int scan_chunks_sanity_check(struct device *dev)
+{
+       int metadata_size, curr_pkg, cpu, ret = -ENOMEM;
+       struct ifs_data *ifsd = ifs_get_data(dev);
+       bool *package_authenticated;
+       struct ifs_work local_work;
+       char *test_ptr;
+
+       package_authenticated = kcalloc(topology_max_packages(), sizeof(bool), GFP_KERNEL);
+       if (!package_authenticated)
+               return ret;
+
+       metadata_size = ifs_header_ptr->metadata_size;
+
+       /* Spec says that if the Meta Data Size = 0 then it should be treated as 2000 */
+       if (metadata_size == 0)
+               metadata_size = 2000;
+
+       /* Scan chunk start must be 256 byte aligned */
+       if ((metadata_size + IFS_HEADER_SIZE) % 256) {
+               dev_err(dev, "Scan pattern offset within the binary is not 256 byte aligned\n");
+               return -EINVAL;
+       }
+
+       test_ptr = (char *)ifs_header_ptr + IFS_HEADER_SIZE + metadata_size;
+       ifsd->loading_error = false;
+
+       ifs_test_image_ptr = (u64)test_ptr;
+       ifsd->loaded_version = ifs_header_ptr->blob_revision;
+
+       /* copy the scan hash and authenticate per package */
+       cpus_read_lock();
+       for_each_online_cpu(cpu) {
+               curr_pkg = topology_physical_package_id(cpu);
+               if (package_authenticated[curr_pkg])
+                       continue;
+               reinit_completion(&ifs_done);
+               local_work.dev = dev;
+               INIT_WORK(&local_work.w, copy_hashes_authenticate_chunks);
+               schedule_work_on(cpu, &local_work.w);
+               wait_for_completion(&ifs_done);
+               if (ifsd->loading_error)
+                       goto out;
+               package_authenticated[curr_pkg] = 1;
+       }
+       ret = 0;
+out:
+       cpus_read_unlock();
+       kfree(package_authenticated);
+
+       return ret;
+}
+
+static int ifs_sanity_check(struct device *dev,
+                           const struct microcode_header_intel *mc_header)
+{
+       unsigned long total_size, data_size;
+       u32 sum, *mc;
+
+       total_size = get_totalsize(mc_header);
+       data_size = get_datasize(mc_header);
+
+       if ((data_size + MC_HEADER_SIZE > total_size) || (total_size % sizeof(u32))) {
+               dev_err(dev, "bad ifs data file size.\n");
+               return -EINVAL;
+       }
+
+       if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
+               dev_err(dev, "invalid/unknown ifs update format.\n");
+               return -EINVAL;
+       }
+
+       mc = (u32 *)mc_header;
+       sum = 0;
+       for (int i = 0; i < total_size / sizeof(u32); i++)
+               sum += mc[i];
+
+       if (sum) {
+               dev_err(dev, "bad ifs data checksum, aborting.\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static bool find_ifs_matching_signature(struct device *dev, struct ucode_cpu_info *uci,
+                                       const struct microcode_header_intel *shdr)
+{
+       unsigned int mc_size;
+
+       mc_size = get_totalsize(shdr);
+
+       if (!mc_size || ifs_sanity_check(dev, shdr) < 0) {
+               dev_err(dev, "ifs sanity check failure\n");
+               return false;
+       }
+
+       if (!intel_cpu_signatures_match(uci->cpu_sig.sig, uci->cpu_sig.pf, shdr->sig, shdr->pf)) {
+               dev_err(dev, "ifs signature, pf not matching\n");
+               return false;
+       }
+
+       return true;
+}
+
+static bool ifs_image_sanity_check(struct device *dev, const struct microcode_header_intel *data)
+{
+       struct ucode_cpu_info uci;
+
+       intel_cpu_collect_info(&uci);
+
+       return find_ifs_matching_signature(dev, &uci, data);
+}
+
+/*
+ * Load ifs image. Before loading ifs module, the ifs image must be located
+ * in /lib/firmware/intel/ifs and named as {family/model/stepping}.{testname}.
+ */
+void ifs_load_firmware(struct device *dev)
+{
+       struct ifs_data *ifsd = ifs_get_data(dev);
+       const struct firmware *fw;
+       char scan_path[32];
+       int ret;
+
+       snprintf(scan_path, sizeof(scan_path), "intel/ifs/%02x-%02x-%02x.scan",
+                boot_cpu_data.x86, boot_cpu_data.x86_model, boot_cpu_data.x86_stepping);
+
+       ret = request_firmware_direct(&fw, scan_path, dev);
+       if (ret) {
+               dev_err(dev, "ifs file %s load failed\n", scan_path);
+               goto done;
+       }
+
+       if (!ifs_image_sanity_check(dev, (struct microcode_header_intel *)fw->data)) {
+               dev_err(dev, "ifs header sanity check failed\n");
+               goto release;
+       }
+
+       ifs_header_ptr = (struct ifs_header *)fw->data;
+       ifs_hash_ptr = (u64)(ifs_header_ptr + 1);
+
+       ret = scan_chunks_sanity_check(dev);
+release:
+       release_firmware(fw);
+done:
+       ifsd->loaded = (ret == 0);
+}
diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c
new file mode 100644 (file)
index 0000000..b2ca2bb
--- /dev/null
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/nmi.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+
+#include "ifs.h"
+
+/*
+ * Note all code and data in this file is protected by
+ * ifs_sem. On HT systems all threads on a core will
+ * execute together, but only the first thread on the
+ * core will update results of the test.
+ */
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/intel_ifs.h>
+
+/* Max retries on the same chunk */
+#define MAX_IFS_RETRIES  5
+
+/*
+ * Number of TSC cycles that a logical CPU will wait for the other
+ * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
+ */
+#define IFS_THREAD_WAIT 100000
+
+enum ifs_status_err_code {
+       IFS_NO_ERROR                            = 0,
+       IFS_OTHER_THREAD_COULD_NOT_JOIN         = 1,
+       IFS_INTERRUPTED_BEFORE_RENDEZVOUS       = 2,
+       IFS_POWER_MGMT_INADEQUATE_FOR_SCAN      = 3,
+       IFS_INVALID_CHUNK_RANGE                 = 4,
+       IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS  = 5,
+       IFS_CORE_NOT_CAPABLE_CURRENTLY          = 6,
+       IFS_UNASSIGNED_ERROR_CODE               = 7,
+       IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8,
+       IFS_INTERRUPTED_DURING_EXECUTION        = 9,
+};
+
+static const char * const scan_test_status[] = {
+       [IFS_NO_ERROR] = "SCAN no error",
+       [IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
+       [IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
+       [IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
+       "Core Abort SCAN Response due to power management condition.",
+       [IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
+       [IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
+       [IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
+       [IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
+       [IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
+       "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
+       [IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
+};
+
+static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
+{
+       if (status.error_code < ARRAY_SIZE(scan_test_status)) {
+               dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
+                        cpumask_pr_args(cpu_smt_mask(cpu)),
+                        scan_test_status[status.error_code]);
+       } else if (status.error_code == IFS_SW_TIMEOUT) {
+               dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
+                        cpumask_pr_args(cpu_smt_mask(cpu)));
+       } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
+               dev_info(dev, "CPU(s) %*pbl: %s\n",
+                        cpumask_pr_args(cpu_smt_mask(cpu)),
+                        "Not all scan chunks were executed. Maximum forward progress retries exceeded");
+       } else {
+               dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
+                        cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
+       }
+}
+
+static void message_fail(struct device *dev, int cpu, union ifs_status status)
+{
+       /*
+        * control_error is set when the microcode runs into a problem
+        * loading the image from the reserved BIOS memory, or it has
+        * been corrupted. Reloading the image may fix this issue.
+        */
+       if (status.control_error) {
+               dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image\n",
+                       cpumask_pr_args(cpu_smt_mask(cpu)));
+       }
+
+       /*
+        * signature_error is set when the output from the scan chains does not
+        * match the expected signature. This might be a transient problem (e.g.
+        * due to a bit flip from an alpha particle or neutron). If the problem
+        * repeats on a subsequent test, then it indicates an actual problem in
+        * the core being tested.
+        */
+       if (status.signature_error) {
+               dev_err(dev, "CPU(s) %*pbl: test signature incorrect.\n",
+                       cpumask_pr_args(cpu_smt_mask(cpu)));
+       }
+}
+
+static bool can_restart(union ifs_status status)
+{
+       enum ifs_status_err_code err_code = status.error_code;
+
+       /* Signature for chunk is bad, or scan test failed */
+       if (status.signature_error || status.control_error)
+               return false;
+
+       switch (err_code) {
+       case IFS_NO_ERROR:
+       case IFS_OTHER_THREAD_COULD_NOT_JOIN:
+       case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
+       case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
+       case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
+       case IFS_INTERRUPTED_DURING_EXECUTION:
+               return true;
+       case IFS_INVALID_CHUNK_RANGE:
+       case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
+       case IFS_CORE_NOT_CAPABLE_CURRENTLY:
+       case IFS_UNASSIGNED_ERROR_CODE:
+               break;
+       }
+       return false;
+}
+
+/*
+ * Execute the scan. Called "simultaneously" on all threads of a core
+ * at high priority using the stop_cpus mechanism.
+ */
+static int doscan(void *data)
+{
+       int cpu = smp_processor_id();
+       u64 *msrs = data;
+       int first;
+
+       /* Only the first logical CPU on a core reports result */
+       first = cpumask_first(cpu_smt_mask(cpu));
+
+       /*
+        * This WRMSR will wait for other HT threads to also write
+        * to this MSR (at most for activate.delay cycles). Then it
+        * starts scan of each requested chunk. The core scan happens
+        * during the "execution" of the WRMSR. This instruction can
+        * take up to 200 milliseconds (in the case where all chunks
+        * are processed in a single pass) before it retires.
+        */
+       wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]);
+
+       if (cpu == first) {
+               /* Pass back the result of the scan */
+               rdmsrl(MSR_SCAN_STATUS, msrs[1]);
+       }
+
+       return 0;
+}
+
+/*
+ * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN
+ * on all threads of the core to be tested. Loop if necessary to complete
+ * run of all chunks. Include some defensive tests to make sure forward
+ * progress is made, and that the whole test completes in a reasonable time.
+ */
+static void ifs_test_core(int cpu, struct device *dev)
+{
+       union ifs_scan activate;
+       union ifs_status status;
+       unsigned long timeout;
+       struct ifs_data *ifsd;
+       u64 msrvals[2];
+       int retries;
+
+       ifsd = ifs_get_data(dev);
+
+       activate.rsvd = 0;
+       activate.delay = IFS_THREAD_WAIT;
+       activate.sigmce = 0;
+       activate.start = 0;
+       activate.stop = ifsd->valid_chunks - 1;
+
+       timeout = jiffies + HZ / 2;
+       retries = MAX_IFS_RETRIES;
+
+       while (activate.start <= activate.stop) {
+               if (time_after(jiffies, timeout)) {
+                       status.error_code = IFS_SW_TIMEOUT;
+                       break;
+               }
+
+               msrvals[0] = activate.data;
+               stop_core_cpuslocked(cpu, doscan, msrvals);
+
+               status.data = msrvals[1];
+
+               trace_ifs_status(cpu, activate, status);
+
+               /* Some cases can be retried, give up for others */
+               if (!can_restart(status))
+                       break;
+
+               if (status.chunk_num == activate.start) {
+                       /* Check for forward progress */
+                       if (--retries == 0) {
+                               if (status.error_code == IFS_NO_ERROR)
+                                       status.error_code = IFS_SW_PARTIAL_COMPLETION;
+                               break;
+                       }
+               } else {
+                       retries = MAX_IFS_RETRIES;
+                       activate.start = status.chunk_num;
+               }
+       }
+
+       /* Update status for this core */
+       ifsd->scan_details = status.data;
+
+       if (status.control_error || status.signature_error) {
+               ifsd->status = SCAN_TEST_FAIL;
+               message_fail(dev, cpu, status);
+       } else if (status.error_code) {
+               ifsd->status = SCAN_NOT_TESTED;
+               message_not_tested(dev, cpu, status);
+       } else {
+               ifsd->status = SCAN_TEST_PASS;
+       }
+}
+
+/*
+ * Initiate per core test. It wakes up work queue threads on the target cpu and
+ * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
+ * wait for all sibling threads to finish the scan test.
+ */
+int do_core_test(int cpu, struct device *dev)
+{
+       int ret = 0;
+
+       /* Prevent CPUs from being taken offline during the scan test */
+       cpus_read_lock();
+
+       if (!cpu_online(cpu)) {
+               dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ifs_test_core(cpu, dev);
+out:
+       cpus_read_unlock();
+       return ret;
+}
diff --git a/drivers/platform/x86/intel/ifs/sysfs.c b/drivers/platform/x86/intel/ifs/sysfs.c
new file mode 100644 (file)
index 0000000..37d8380
--- /dev/null
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+
+#include "ifs.h"
+
+/*
+ * Protects against simultaneous tests on multiple cores, or
+ * reloading can file while a test is in progress
+ */
+DEFINE_SEMAPHORE(ifs_sem);
+
+/*
+ * The sysfs interface to check additional details of last test
+ * cat /sys/devices/system/platform/ifs/details
+ */
+static ssize_t details_show(struct device *dev,
+                           struct device_attribute *attr,
+                           char *buf)
+{
+       struct ifs_data *ifsd = ifs_get_data(dev);
+
+       return sysfs_emit(buf, "%#llx\n", ifsd->scan_details);
+}
+
+static DEVICE_ATTR_RO(details);
+
+static const char * const status_msg[] = {
+       [SCAN_NOT_TESTED] = "untested",
+       [SCAN_TEST_PASS] = "pass",
+       [SCAN_TEST_FAIL] = "fail"
+};
+
+/*
+ * The sysfs interface to check the test status:
+ * To check the status of last test
+ * cat /sys/devices/platform/ifs/status
+ */
+static ssize_t status_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+{
+       struct ifs_data *ifsd = ifs_get_data(dev);
+
+       return sysfs_emit(buf, "%s\n", status_msg[ifsd->status]);
+}
+
+static DEVICE_ATTR_RO(status);
+
+/*
+ * The sysfs interface for single core testing
+ * To start test, for example, cpu5
+ * echo 5 > /sys/devices/platform/ifs/run_test
+ * To check the result:
+ * cat /sys/devices/platform/ifs/result
+ * The sibling core gets tested at the same time.
+ */
+static ssize_t run_test_store(struct device *dev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       struct ifs_data *ifsd = ifs_get_data(dev);
+       unsigned int cpu;
+       int rc;
+
+       rc = kstrtouint(buf, 0, &cpu);
+       if (rc < 0 || cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       if (down_interruptible(&ifs_sem))
+               return -EINTR;
+
+       if (!ifsd->loaded)
+               rc = -EPERM;
+       else
+               rc = do_core_test(cpu, dev);
+
+       up(&ifs_sem);
+
+       return rc ? rc : count;
+}
+
+static DEVICE_ATTR_WO(run_test);
+
+/*
+ * Reload the IFS image. When user wants to install new IFS image
+ */
+static ssize_t reload_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       struct ifs_data *ifsd = ifs_get_data(dev);
+       bool res;
+
+
+       if (kstrtobool(buf, &res))
+               return -EINVAL;
+       if (!res)
+               return count;
+
+       if (down_interruptible(&ifs_sem))
+               return -EINTR;
+
+       ifs_load_firmware(dev);
+
+       up(&ifs_sem);
+
+       return ifsd->loaded ? count : -ENODEV;
+}
+
+static DEVICE_ATTR_WO(reload);
+
+/*
+ * Display currently loaded IFS image version.
+ */
+static ssize_t image_version_show(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       struct ifs_data *ifsd = ifs_get_data(dev);
+
+       if (!ifsd->loaded)
+               return sysfs_emit(buf, "%s\n", "none");
+       else
+               return sysfs_emit(buf, "%#x\n", ifsd->loaded_version);
+}
+
+static DEVICE_ATTR_RO(image_version);
+
+/* global scan sysfs attributes */
+static struct attribute *plat_ifs_attrs[] = {
+       &dev_attr_details.attr,
+       &dev_attr_status.attr,
+       &dev_attr_run_test.attr,
+       &dev_attr_reload.attr,
+       &dev_attr_image_version.attr,
+       NULL
+};
+
+ATTRIBUTE_GROUPS(plat_ifs);
+
+const struct attribute_group **ifs_get_groups(void)
+{
+       return plat_ifs_groups;
+}
index ac19fcc9abbf5721844122ed1b5fb3607ac0aeed..edaf22e5ae98cf80d64cef1f33858f824fbde7c3 100644 (file)
@@ -999,7 +999,7 @@ static umode_t etr3_is_visible(struct kobject *kobj,
                                struct attribute *attr,
                                int idx)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
+       struct device *dev = kobj_to_dev(kobj);
        struct pmc_dev *pmcdev = dev_get_drvdata(dev);
        const struct pmc_reg_map *map = pmcdev->map;
        u32 reg;
index 6b6f3e2a617afc3d8a26604a25ed5f34ba0c75b9..f73ecfd4a30922f10c8cf2c611cc87f9b568c635 100644 (file)
@@ -103,7 +103,7 @@ static int pmt_telem_probe(struct auxiliary_device *auxdev, const struct auxilia
        auxiliary_set_drvdata(auxdev, priv);
 
        for (i = 0; i < intel_vsec_dev->num_resources; i++) {
-               struct intel_pmt_entry *entry = &priv->entry[i];
+               struct intel_pmt_entry *entry = &priv->entry[priv->num_entries];
 
                ret = intel_pmt_dev_create(entry, &pmt_telem_ns, intel_vsec_dev, i);
                if (ret < 0)
index a40fae6edc8410aea972dabbc1b853bfab923aa4..b8b1ed1406de2149eb0f1b1a5ad8da735c807c21 100644 (file)
@@ -221,19 +221,6 @@ int pmc_atom_read(int offset, u32 *value)
        *value = pmc_reg_read(pmc, offset);
        return 0;
 }
-EXPORT_SYMBOL_GPL(pmc_atom_read);
-
-int pmc_atom_write(int offset, u32 value)
-{
-       struct pmc_dev *pmc = &pmc_device;
-
-       if (!pmc->init)
-               return -ENODEV;
-
-       pmc_reg_write(pmc, offset, value);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(pmc_atom_write);
 
 static void pmc_power_off(void)
 {
index 19f6b456234f8bd5443b38ceba4189c5c1ba7d03..c187dcdf82f0ba9a0e102c27a922035bd7b5411d 100644 (file)
@@ -1208,7 +1208,7 @@ static int __init samsung_backlight_init(struct samsung_laptop *samsung)
 static umode_t samsung_sysfs_is_visible(struct kobject *kobj,
                                        struct attribute *attr, int idx)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
+       struct device *dev = kobj_to_dev(kobj);
        struct samsung_laptop *samsung = dev_get_drvdata(dev);
        bool ok = true;
 
index c568fae56db29ab979c8dfbdb30f9dd852627559..e6cb4a14cdd4705cb479e4e21b52dbf3d472dda1 100644 (file)
@@ -309,6 +309,20 @@ struct ibm_init_struct {
        struct ibm_struct *data;
 };
 
+/* DMI Quirks */
+struct quirk_entry {
+       bool btusb_bug;
+       u32 s2idle_bug_mmio;
+};
+
+static struct quirk_entry quirk_btusb_bug = {
+       .btusb_bug = true,
+};
+
+static struct quirk_entry quirk_s2idle_bug = {
+       .s2idle_bug_mmio = 0xfed80380,
+};
+
 static struct {
        u32 bluetooth:1;
        u32 hotkey:1;
@@ -338,6 +352,7 @@ static struct {
        u32 hotkey_poll_active:1;
        u32 has_adaptive_kbd:1;
        u32 kbd_lang:1;
+       struct quirk_entry *quirks;
 } tp_features;
 
 static struct {
@@ -4359,9 +4374,10 @@ static void bluetooth_exit(void)
        bluetooth_shutdown();
 }
 
-static const struct dmi_system_id bt_fwbug_list[] __initconst = {
+static const struct dmi_system_id fwbug_list[] __initconst = {
        {
                .ident = "ThinkPad E485",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20KU"),
@@ -4369,6 +4385,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad E585",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20KV"),
@@ -4376,6 +4393,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A285 - 20MW",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MW"),
@@ -4383,6 +4401,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A285 - 20MX",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MX"),
@@ -4390,6 +4409,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A485 - 20MU",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MU"),
@@ -4397,14 +4417,125 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A485 - 20MV",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MV"),
                },
        },
+       {
+               .ident = "L14 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20X5"),
+               }
+       },
+       {
+               .ident = "T14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XF"),
+               }
+       },
+       {
+               .ident = "X13 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XH"),
+               }
+       },
+       {
+               .ident = "T14 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XK"),
+               }
+       },
+       {
+               .ident = "T14 Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UD"),
+               }
+       },
+       {
+               .ident = "T14 Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UE"),
+               }
+       },
+       {
+               .ident = "T14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UH"),
+               }
+       },
+       {
+               .ident = "P14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"),
+               }
+       },
+       {
+               .ident = "P14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "21A0"),
+               }
+       },
        {}
 };
 
+#ifdef CONFIG_SUSPEND
+/*
+ * Lenovo laptops from a variety of generations run a SMI handler during the D3->D0
+ * transition that occurs specifically when exiting suspend to idle which can cause
+ * large delays during resume when the IOMMU translation layer is enabled (the default
+ * behavior) for NVME devices:
+ *
+ * To avoid this firmware problem, skip the SMI handler on these machines before the
+ * D0 transition occurs.
+ */
+static void thinkpad_acpi_amd_s2idle_restore(void)
+{
+       struct resource *res;
+       void __iomem *addr;
+       u8 val;
+
+       res = request_mem_region_muxed(tp_features.quirks->s2idle_bug_mmio, 1,
+                                       "thinkpad_acpi_pm80");
+       if (!res)
+               return;
+
+       addr = ioremap(tp_features.quirks->s2idle_bug_mmio, 1);
+       if (!addr)
+               goto cleanup_resource;
+
+       val = ioread8(addr);
+       iowrite8(val & ~BIT(0), addr);
+
+       iounmap(addr);
+cleanup_resource:
+       release_resource(res);
+}
+
+static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = {
+       .restore = thinkpad_acpi_amd_s2idle_restore,
+};
+#endif
+
 static const struct pci_device_id fwbug_cards_ids[] __initconst = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) },
@@ -4419,7 +4550,8 @@ static int __init have_bt_fwbug(void)
         * Some AMD based ThinkPads have a firmware bug that calling
         * "GBDC" will cause bluetooth on Intel wireless cards blocked
         */
-       if (dmi_check_system(bt_fwbug_list) && pci_dev_present(fwbug_cards_ids)) {
+       if (tp_features.quirks && tp_features.quirks->btusb_bug &&
+           pci_dev_present(fwbug_cards_ids)) {
                vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL,
                        FW_BUG "disable bluetooth subdriver for Intel cards\n");
                return 1;
@@ -8748,24 +8880,27 @@ static int __init fan_init(struct ibm_init_struct *iibm)
                        fan_status_access_mode = TPACPI_FAN_RD_TPEC;
                        if (quirks & TPACPI_FAN_Q1)
                                fan_quirk1_setup();
-                       if (quirks & TPACPI_FAN_2FAN) {
-                               tp_features.second_fan = 1;
-                               pr_info("secondary fan support enabled\n");
-                       }
-                       if (quirks & TPACPI_FAN_2CTL) {
-                               tp_features.second_fan = 1;
-                               tp_features.second_fan_ctl = 1;
-                               pr_info("secondary fan control enabled\n");
-                       }
                        /* Try and probe the 2nd fan */
+                       tp_features.second_fan = 1; /* needed for get_speed to work */
                        res = fan2_get_speed(&speed);
                        if (res >= 0) {
                                /* It responded - so let's assume it's there */
                                tp_features.second_fan = 1;
                                tp_features.second_fan_ctl = 1;
                                pr_info("secondary fan control detected & enabled\n");
+                       } else {
+                               /* Fan not auto-detected */
+                               tp_features.second_fan = 0;
+                               if (quirks & TPACPI_FAN_2FAN) {
+                                       tp_features.second_fan = 1;
+                                       pr_info("secondary fan support enabled\n");
+                               }
+                               if (quirks & TPACPI_FAN_2CTL) {
+                                       tp_features.second_fan = 1;
+                                       tp_features.second_fan_ctl = 1;
+                                       pr_info("secondary fan control enabled\n");
+                               }
                        }
-
                } else {
                        pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n");
                        return -ENODEV;
@@ -11455,6 +11590,10 @@ static void thinkpad_acpi_module_exit(void)
 
        tpacpi_lifecycle = TPACPI_LIFE_EXITING;
 
+#ifdef CONFIG_SUSPEND
+       if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio)
+               acpi_unregister_lps0_dev(&thinkpad_acpi_s2idle_dev_ops);
+#endif
        if (tpacpi_hwmon)
                hwmon_device_unregister(tpacpi_hwmon);
        if (tp_features.sensors_pdrv_registered)
@@ -11496,6 +11635,7 @@ static void thinkpad_acpi_module_exit(void)
 
 static int __init thinkpad_acpi_module_init(void)
 {
+       const struct dmi_system_id *dmi_id;
        int ret, i;
 
        tpacpi_lifecycle = TPACPI_LIFE_INIT;
@@ -11535,6 +11675,10 @@ static int __init thinkpad_acpi_module_init(void)
                return -ENODEV;
        }
 
+       dmi_id = dmi_first_match(fwbug_list);
+       if (dmi_id)
+               tp_features.quirks = dmi_id->driver_data;
+
        /* Device initialization */
        tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, -1,
                                                        NULL, 0);
@@ -11623,6 +11767,13 @@ static int __init thinkpad_acpi_module_init(void)
                tp_features.input_device_registered = 1;
        }
 
+#ifdef CONFIG_SUSPEND
+       if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) {
+               if (!acpi_register_lps0_dev(&thinkpad_acpi_s2idle_dev_ops))
+                       pr_info("Using s2idle quirk to avoid %s platform firmware bug\n",
+                               (dmi_id && dmi_id->ident) ? dmi_id->ident : "");
+       }
+#endif
        return 0;
 }
 
index f113dec98e21dbd37f6a977840ebe68fb491cd30..0fc9e8b8827bdf7307fe3257e04339688c9963bc 100644 (file)
@@ -2353,7 +2353,7 @@ static struct attribute *toshiba_attributes[] = {
 static umode_t toshiba_sysfs_is_visible(struct kobject *kobj,
                                        struct attribute *attr, int idx)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
+       struct device *dev = kobj_to_dev(kobj);
        struct toshiba_acpi_dev *drv = dev_get_drvdata(dev);
        bool exists = true;
 
diff --git a/drivers/platform/x86/winmate-fm07-keys.c b/drivers/platform/x86/winmate-fm07-keys.c
new file mode 100644 (file)
index 0000000..2c90c5c
--- /dev/null
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Driver for the Winmate FM07 front-panel keys
+//
+// Author: Daniel Beer <daniel.beer@tirotech.co.nz>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/dmi.h>
+#include <linux/io.h>
+
+#define DRV_NAME       "winmate-fm07keys"
+
+#define PORT_CMD       0x6c
+#define PORT_DATA      0x68
+
+#define EC_ADDR_KEYS   0x3b
+#define EC_CMD_READ    0x80
+
+#define BASE_KEY       KEY_F13
+#define NUM_KEYS       5
+
+/* Typically we're done in fewer than 10 iterations */
+#define LOOP_TIMEOUT   1000
+
+static void fm07keys_poll(struct input_dev *input)
+{
+       uint8_t k;
+       int i;
+
+       /* Flush output buffer */
+       i = 0;
+       while (inb(PORT_CMD) & 0x01) {
+               if (++i >= LOOP_TIMEOUT)
+                       goto timeout;
+               inb(PORT_DATA);
+       }
+
+       /* Send request and wait for write completion */
+       outb(EC_CMD_READ, PORT_CMD);
+       i = 0;
+       while (inb(PORT_CMD) & 0x02)
+               if (++i >= LOOP_TIMEOUT)
+                       goto timeout;
+
+       outb(EC_ADDR_KEYS, PORT_DATA);
+       i = 0;
+       while (inb(PORT_CMD) & 0x02)
+               if (++i >= LOOP_TIMEOUT)
+                       goto timeout;
+
+       /* Wait for data ready */
+       i = 0;
+       while (!(inb(PORT_CMD) & 0x01))
+               if (++i >= LOOP_TIMEOUT)
+                       goto timeout;
+       k = inb(PORT_DATA);
+
+       /* Notify of new key states */
+       for (i = 0; i < NUM_KEYS; i++) {
+               input_report_key(input, BASE_KEY + i, (~k) & 1);
+               k >>= 1;
+       }
+
+       input_sync(input);
+       return;
+
+timeout:
+       dev_warn_ratelimited(&input->dev, "timeout polling IO memory\n");
+}
+
+static int fm07keys_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct input_dev *input;
+       int ret;
+       int i;
+
+       input = devm_input_allocate_device(dev);
+       if (!input) {
+               dev_err(dev, "no memory for input device\n");
+               return -ENOMEM;
+       }
+
+       if (!devm_request_region(dev, PORT_CMD, 1, "Winmate FM07 EC"))
+               return -EBUSY;
+       if (!devm_request_region(dev, PORT_DATA, 1, "Winmate FM07 EC"))
+               return -EBUSY;
+
+       input->name = "Winmate FM07 front-panel keys";
+       input->phys = DRV_NAME "/input0";
+
+       input->id.bustype = BUS_HOST;
+       input->id.vendor = 0x0001;
+       input->id.product = 0x0001;
+       input->id.version = 0x0100;
+
+       __set_bit(EV_KEY, input->evbit);
+
+       for (i = 0; i < NUM_KEYS; i++)
+               __set_bit(BASE_KEY + i, input->keybit);
+
+       ret = input_setup_polling(input, fm07keys_poll);
+       if (ret) {
+               dev_err(dev, "unable to set up polling, err=%d\n", ret);
+               return ret;
+       }
+
+       /* These are silicone buttons. They can't be pressed in rapid
+        * succession too quickly, and 50 Hz seems to be an adequate
+        * sampling rate without missing any events when tested.
+        */
+       input_set_poll_interval(input, 20);
+
+       ret = input_register_device(input);
+       if (ret) {
+               dev_err(dev, "unable to register polled device, err=%d\n",
+                       ret);
+               return ret;
+       }
+
+       input_sync(input);
+       return 0;
+}
+
+static struct platform_driver fm07keys_driver = {
+       .probe          = fm07keys_probe,
+       .driver         = {
+               .name   = DRV_NAME
+       },
+};
+
+static struct platform_device *dev;
+
+static const struct dmi_system_id fm07keys_dmi_table[] __initconst = {
+       {
+               /* FM07 and FM07P */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Winmate Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "IP30"),
+               },
+       },
+       { }
+};
+
+MODULE_DEVICE_TABLE(dmi, fm07keys_dmi_table);
+
+static int __init fm07keys_init(void)
+{
+       int ret;
+
+       if (!dmi_check_system(fm07keys_dmi_table))
+               return -ENODEV;
+
+       ret = platform_driver_register(&fm07keys_driver);
+       if (ret) {
+               pr_err("fm07keys: failed to register driver, err=%d\n", ret);
+               return ret;
+       }
+
+       dev = platform_device_register_simple(DRV_NAME, -1, NULL, 0);
+       if (IS_ERR(dev)) {
+               ret = PTR_ERR(dev);
+               pr_err("fm07keys: failed to allocate device, err = %d\n", ret);
+               goto fail_register;
+       }
+
+       return 0;
+
+fail_register:
+       platform_driver_unregister(&fm07keys_driver);
+       return ret;
+}
+
+static void __exit fm07keys_exit(void)
+{
+       platform_driver_unregister(&fm07keys_driver);
+       platform_device_unregister(dev);
+}
+
+module_init(fm07keys_init);
+module_exit(fm07keys_exit);
+
+MODULE_AUTHOR("Daniel Beer <daniel.beer@tirotech.co.nz>");
+MODULE_DESCRIPTION("Winmate FM07 front-panel keys driver");
+MODULE_LICENSE("GPL");
index 58a23a9adbef318ab619dcc1ff15aada6f30f868..aed293b5af81b12b46e8dffc2e04139b6afa6582 100644 (file)
@@ -1308,21 +1308,20 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address,
 static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
                                    void *context)
 {
-       struct wmi_block *wblock;
-       bool found_it = false;
+       struct wmi_block *wblock = NULL, *iter;
 
-       list_for_each_entry(wblock, &wmi_block_list, list) {
-               struct guid_block *block = &wblock->gblock;
+       list_for_each_entry(iter, &wmi_block_list, list) {
+               struct guid_block *block = &iter->gblock;
 
-               if (wblock->acpi_device->handle == handle &&
+               if (iter->acpi_device->handle == handle &&
                    (block->flags & ACPI_WMI_EVENT) &&
                    (block->notify_id == event)) {
-                       found_it = true;
+                       wblock = iter;
                        break;
                }
        }
 
-       if (!found_it)
+       if (!wblock)
                return;
 
        /* If a driver is bound, then notify the driver. */
index bca2f912d3496f3435552d6a1377562c31159c79..f5eced0842b36d158cf18a2c648482513a4496cc 100644 (file)
@@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
                return 0;
 
        pd = em_cpu_get(cpu);
-       if (!pd)
+       if (!pd || em_is_artificial(pd))
                return -EINVAL;
 
        dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
index 07611a00b78fd1d9d899b2f50bb91d7b3d8ff2eb..a9c99d9e8b4285e77fbe7d037dff89c79d209370 100644 (file)
@@ -1010,7 +1010,7 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
         * where time_unit is default to 1 sec. Never 0.
         */
        if (!to_raw)
-               return (value) ? value *= rp->time_unit : rp->time_unit;
+               return (value) ? value * rp->time_unit : rp->time_unit;
 
        value = div64_u64(value, rp->time_unit);
 
@@ -1107,6 +1107,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &rapl_defaults_core),
+       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &rapl_defaults_core),
+       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &rapl_defaults_spr_server),
        X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD,           &rapl_defaults_core),
 
index 1be45f36ab6cd08856903fa32c2472161e74083c..9d23984d89311b62524f8607bfc42312d4c6f9ec 100644 (file)
@@ -140,6 +140,7 @@ static const struct x86_cpu_id pl4_support_ids[] = {
        { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
        { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
        { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
        {}
 };
 
index 0feaa4b453175b22abd933eeb38f6fba6e95bb62..860672d6a03c04ca590aa0c809653555a3255ef7 100644 (file)
@@ -300,7 +300,7 @@ struct ptp_ocp {
        struct platform_device  *spi_flash;
        struct clk_hw           *i2c_clk;
        struct timer_list       watchdog;
-       const struct ocp_attr_group *attr_tbl;
+       const struct attribute_group **attr_group;
        const struct ptp_ocp_eeprom_map *eeprom_map;
        struct dentry           *debug_root;
        time64_t                gnss_lost;
@@ -841,7 +841,7 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u32 adj_val)
 }
 
 static void
-ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
+ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, s64 delta_ns)
 {
        struct timespec64 ts;
        unsigned long flags;
@@ -850,7 +850,8 @@ ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
        spin_lock_irqsave(&bp->lock, flags);
        err = __ptp_ocp_gettime_locked(bp, &ts, NULL);
        if (likely(!err)) {
-               timespec64_add_ns(&ts, delta_ns);
+               set_normalized_timespec64(&ts, ts.tv_sec,
+                                         ts.tv_nsec + delta_ns);
                __ptp_ocp_settime_locked(bp, &ts);
        }
        spin_unlock_irqrestore(&bp->lock, flags);
@@ -1557,7 +1558,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s)
        start_ns = ktime_set(ts.tv_sec, ts.tv_nsec) + NSEC_PER_MSEC;
        if (!s->start) {
                /* roundup() does not work on 32-bit systems */
-               s->start = DIV_ROUND_UP_ULL(start_ns, s->period);
+               s->start = DIV64_U64_ROUND_UP(start_ns, s->period);
                s->start = ktime_add(s->start, s->phase);
        }
 
@@ -1835,6 +1836,42 @@ ptp_ocp_signal_init(struct ptp_ocp *bp)
                                             bp->signal_out[i]->mem);
 }
 
+static void
+ptp_ocp_attr_group_del(struct ptp_ocp *bp)
+{
+       sysfs_remove_groups(&bp->dev.kobj, bp->attr_group);
+       kfree(bp->attr_group);
+}
+
+static int
+ptp_ocp_attr_group_add(struct ptp_ocp *bp,
+                      const struct ocp_attr_group *attr_tbl)
+{
+       int count, i;
+       int err;
+
+       count = 0;
+       for (i = 0; attr_tbl[i].cap; i++)
+               if (attr_tbl[i].cap & bp->fw_cap)
+                       count++;
+
+       bp->attr_group = kcalloc(count + 1, sizeof(struct attribute_group *),
+                                GFP_KERNEL);
+       if (!bp->attr_group)
+               return -ENOMEM;
+
+       count = 0;
+       for (i = 0; attr_tbl[i].cap; i++)
+               if (attr_tbl[i].cap & bp->fw_cap)
+                       bp->attr_group[count++] = attr_tbl[i].group;
+
+       err = sysfs_create_groups(&bp->dev.kobj, bp->attr_group);
+       if (err)
+               bp->attr_group[0] = NULL;
+
+       return err;
+}
+
 static void
 ptp_ocp_sma_init(struct ptp_ocp *bp)
 {
@@ -1904,7 +1941,6 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
        bp->flash_start = 1024 * 4096;
        bp->eeprom_map = fb_eeprom_map;
        bp->fw_version = ioread32(&bp->image->version);
-       bp->attr_tbl = fb_timecard_groups;
        bp->fw_cap = OCP_CAP_BASIC;
 
        ver = bp->fw_version & 0xffff;
@@ -1918,6 +1954,10 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
        ptp_ocp_sma_init(bp);
        ptp_ocp_signal_init(bp);
 
+       err = ptp_ocp_attr_group_add(bp, fb_timecard_groups);
+       if (err)
+               return err;
+
        err = ptp_ocp_fb_set_pins(bp);
        if (err)
                return err;
@@ -3388,7 +3428,6 @@ ptp_ocp_complete(struct ptp_ocp *bp)
 {
        struct pps_device *pps;
        char buf[32];
-       int i, err;
 
        if (bp->gnss_port != -1) {
                sprintf(buf, "ttyS%d", bp->gnss_port);
@@ -3413,14 +3452,6 @@ ptp_ocp_complete(struct ptp_ocp *bp)
        if (pps)
                ptp_ocp_symlink(bp, pps->dev, "pps");
 
-       for (i = 0; bp->attr_tbl[i].cap; i++) {
-               if (!(bp->attr_tbl[i].cap & bp->fw_cap))
-                       continue;
-               err = sysfs_create_group(&bp->dev.kobj, bp->attr_tbl[i].group);
-               if (err)
-                       return err;
-       }
-
        ptp_ocp_debugfs_add_device(bp);
 
        return 0;
@@ -3492,15 +3523,11 @@ static void
 ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
 {
        struct device *dev = &bp->dev;
-       int i;
 
        sysfs_remove_link(&dev->kobj, "ttyGNSS");
        sysfs_remove_link(&dev->kobj, "ttyMAC");
        sysfs_remove_link(&dev->kobj, "ptp");
        sysfs_remove_link(&dev->kobj, "pps");
-       if (bp->attr_tbl)
-               for (i = 0; bp->attr_tbl[i].cap; i++)
-                       sysfs_remove_group(&dev->kobj, bp->attr_tbl[i].group);
 }
 
 static void
@@ -3510,6 +3537,7 @@ ptp_ocp_detach(struct ptp_ocp *bp)
 
        ptp_ocp_debugfs_remove_device(bp);
        ptp_ocp_detach_sysfs(bp);
+       ptp_ocp_attr_group_del(bp);
        if (timer_pending(&bp->watchdog))
                del_timer_sync(&bp->watchdog);
        if (bp->ts0)
index 5ef2306fce04d8bc10682652073841c2878f9311..cbe0f96ca342bb1f064c9e134bb1bf0a6c8fcc32 100644 (file)
@@ -1057,6 +1057,16 @@ config REGULATOR_RT5190A
          buck converters, 1 LDO, mute AC OFF depop function, with the general
          I2C control interface.
 
+config REGULATOR_RT5759
+       tristate "Richtek RT5759 Regulator"
+       depends on I2C
+       select REGMAP_I2C
+       help
+         This adds support for voltage regulator in Richtek RT5759.
+         The RT5759 is a high-performance, synchronous step-down DC-DC
+         converter that can deliver up to 9A output current from 3V to 6.5V
+         input supply.
+
 config REGULATOR_RT6160
        tristate "Richtek RT6160 BuckBoost voltage regulator"
        depends on I2C
@@ -1157,6 +1167,13 @@ config REGULATOR_SLG51000
          The SLG51000 is seven compact and customizable low dropout
          regulators.
 
+config REGULATOR_SM5703
+       tristate "Silicon Mitus SM5703 regulators"
+       depends on MFD_SM5703
+       help
+         This driver provides support for voltage regulators of SM5703
+         multi-function device.
+
 config REGULATOR_STM32_BOOSTER
        tristate "STMicroelectronics STM32 BOOSTER"
        depends on ARCH_STM32 || COMPILE_TEST
index 1b64ad5767be0592fb7f7fe603e2a90d084c0213..8d3ee8b6d41d8f5a37a716ab38bd9946e11de677 100644 (file)
@@ -127,6 +127,7 @@ obj-$(CONFIG_REGULATOR_RT4801)      += rt4801-regulator.o
 obj-$(CONFIG_REGULATOR_RT4831) += rt4831-regulator.o
 obj-$(CONFIG_REGULATOR_RT5033) += rt5033-regulator.o
 obj-$(CONFIG_REGULATOR_RT5190A) += rt5190a-regulator.o
+obj-$(CONFIG_REGULATOR_RT5759) += rt5759-regulator.o
 obj-$(CONFIG_REGULATOR_RT6160) += rt6160-regulator.o
 obj-$(CONFIG_REGULATOR_RT6245) += rt6245-regulator.o
 obj-$(CONFIG_REGULATOR_RTMV20) += rtmv20-regulator.o
@@ -138,6 +139,7 @@ obj-$(CONFIG_REGULATOR_S5M8767) += s5m8767.o
 obj-$(CONFIG_REGULATOR_SC2731) += sc2731-regulator.o
 obj-$(CONFIG_REGULATOR_SKY81452) += sky81452-regulator.o
 obj-$(CONFIG_REGULATOR_SLG51000) += slg51000-regulator.o
+obj-$(CONFIG_REGULATOR_SM5703) += sm5703-regulator.o
 obj-$(CONFIG_REGULATOR_STM32_BOOSTER) += stm32-booster.o
 obj-$(CONFIG_REGULATOR_STM32_VREFBUF) += stm32-vrefbuf.o
 obj-$(CONFIG_REGULATOR_STM32_PWR) += stm32-pwr.o
index d2553970a67ba73e1a7f2d75b3ca49d02f0317ea..1e54a833f2cf0e5ea2dcab5f8351e0da690a6b39 100644 (file)
@@ -83,6 +83,7 @@ struct regulator_supply_alias {
 
 static int _regulator_is_enabled(struct regulator_dev *rdev);
 static int _regulator_disable(struct regulator *regulator);
+static int _regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags);
 static int _regulator_get_current_limit(struct regulator_dev *rdev);
 static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
 static int _notifier_call_chain(struct regulator_dev *rdev,
@@ -911,6 +912,30 @@ static ssize_t bypass_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(bypass);
 
+#define REGULATOR_ERROR_ATTR(name, bit)                                                        \
+       static ssize_t name##_show(struct device *dev, struct device_attribute *attr,   \
+                                  char *buf)                                           \
+       {                                                                               \
+               int ret;                                                                \
+               unsigned int flags;                                                     \
+               struct regulator_dev *rdev = dev_get_drvdata(dev);                      \
+               ret = _regulator_get_error_flags(rdev, &flags);                         \
+               if (ret)                                                                \
+                       return ret;                                                     \
+               return sysfs_emit(buf, "%d\n", !!(flags & (bit)));                      \
+       }                                                                               \
+       static DEVICE_ATTR_RO(name)
+
+REGULATOR_ERROR_ATTR(under_voltage, REGULATOR_ERROR_UNDER_VOLTAGE);
+REGULATOR_ERROR_ATTR(over_current, REGULATOR_ERROR_OVER_CURRENT);
+REGULATOR_ERROR_ATTR(regulation_out, REGULATOR_ERROR_REGULATION_OUT);
+REGULATOR_ERROR_ATTR(fail, REGULATOR_ERROR_FAIL);
+REGULATOR_ERROR_ATTR(over_temp, REGULATOR_ERROR_OVER_TEMP);
+REGULATOR_ERROR_ATTR(under_voltage_warn, REGULATOR_ERROR_UNDER_VOLTAGE_WARN);
+REGULATOR_ERROR_ATTR(over_current_warn, REGULATOR_ERROR_OVER_CURRENT_WARN);
+REGULATOR_ERROR_ATTR(over_voltage_warn, REGULATOR_ERROR_OVER_VOLTAGE_WARN);
+REGULATOR_ERROR_ATTR(over_temp_warn, REGULATOR_ERROR_OVER_TEMP_WARN);
+
 /* Calculate the new optimum regulator operating mode based on the new total
  * consumer load. All locks held by caller
  */
@@ -1522,6 +1547,24 @@ static int set_machine_constraints(struct regulator_dev *rdev)
                }
        }
 
+       /*
+        * If there is no mechanism for controlling the regulator then
+        * flag it as always_on so we don't end up duplicating checks
+        * for this so much.  Note that we could control the state of
+        * a supply to control the output on a regulator that has no
+        * direct control.
+        */
+       if (!rdev->ena_pin && !ops->enable) {
+               if (rdev->supply_name && !rdev->supply)
+                       return -EPROBE_DEFER;
+
+               if (rdev->supply)
+                       rdev->constraints->always_on =
+                               rdev->supply->rdev->constraints->always_on;
+               else
+                       rdev->constraints->always_on = true;
+       }
+
        /* If the constraints say the regulator should be on at this point
         * and we have control then make sure it is enabled.
         */
@@ -2133,10 +2176,13 @@ struct regulator *_regulator_get(struct device *dev, const char *id,
                rdev->exclusive = 1;
 
                ret = _regulator_is_enabled(rdev);
-               if (ret > 0)
+               if (ret > 0) {
                        rdev->use_count = 1;
-               else
+                       regulator->enable_count = 1;
+               } else {
                        rdev->use_count = 0;
+                       regulator->enable_count = 0;
+               }
        }
 
        link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS);
@@ -2511,17 +2557,17 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
 }
 
 /**
- * _regulator_enable_delay - a delay helper function
+ * _regulator_delay_helper - a delay helper function
  * @delay: time to delay in microseconds
  *
  * Delay for the requested amount of time as per the guidelines in:
  *
  *     Documentation/timers/timers-howto.rst
  *
- * The assumption here is that regulators will never be enabled in
+ * The assumption here is that these regulator operations will never used in
  * atomic context and therefore sleeping functions can be used.
  */
-static void _regulator_enable_delay(unsigned int delay)
+static void _regulator_delay_helper(unsigned int delay)
 {
        unsigned int ms = delay / 1000;
        unsigned int us = delay % 1000;
@@ -2603,7 +2649,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
                s64 remaining = ktime_us_delta(end, ktime_get());
 
                if (remaining > 0)
-                       _regulator_enable_delay(remaining);
+                       _regulator_delay_helper(remaining);
        }
 
        if (rdev->ena_pin) {
@@ -2630,14 +2676,14 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
        /* If poll_enabled_time is set, poll upto the delay calculated
         * above, delaying poll_enabled_time uS to check if the regulator
         * actually got enabled.
-        * If the regulator isn't enabled after enable_delay has
-        * expired, return -ETIMEDOUT.
+        * If the regulator isn't enabled after our delay helper has expired,
+        * return -ETIMEDOUT.
         */
        if (rdev->desc->poll_enabled_time) {
                unsigned int time_remaining = delay;
 
                while (time_remaining > 0) {
-                       _regulator_enable_delay(rdev->desc->poll_enabled_time);
+                       _regulator_delay_helper(rdev->desc->poll_enabled_time);
 
                        if (rdev->desc->ops->get_status) {
                                ret = _regulator_check_status_enabled(rdev);
@@ -2656,7 +2702,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
                        return -ETIMEDOUT;
                }
        } else {
-               _regulator_enable_delay(delay);
+               _regulator_delay_helper(delay);
        }
 
        trace_regulator_enable_complete(rdev_get_name(rdev));
@@ -3548,12 +3594,7 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
        }
 
        /* Insert any necessary delays */
-       if (delay >= 1000) {
-               mdelay(delay / 1000);
-               udelay(delay % 1000);
-       } else if (delay) {
-               udelay(delay);
-       }
+       _regulator_delay_helper(delay);
 
        if (best_val >= 0) {
                unsigned long data = best_val;
@@ -4971,6 +5012,15 @@ static struct attribute *regulator_dev_attrs[] = {
        &dev_attr_max_microvolts.attr,
        &dev_attr_min_microamps.attr,
        &dev_attr_max_microamps.attr,
+       &dev_attr_under_voltage.attr,
+       &dev_attr_over_current.attr,
+       &dev_attr_regulation_out.attr,
+       &dev_attr_fail.attr,
+       &dev_attr_over_temp.attr,
+       &dev_attr_under_voltage_warn.attr,
+       &dev_attr_over_current_warn.attr,
+       &dev_attr_over_voltage_warn.attr,
+       &dev_attr_over_temp_warn.attr,
        &dev_attr_suspend_standby_state.attr,
        &dev_attr_suspend_mem_state.attr,
        &dev_attr_suspend_disk_state.attr,
@@ -5026,6 +5076,17 @@ static umode_t regulator_attr_is_visible(struct kobject *kobj,
        if (attr == &dev_attr_bypass.attr)
                return ops->get_bypass ? mode : 0;
 
+       if (attr == &dev_attr_under_voltage.attr ||
+           attr == &dev_attr_over_current.attr ||
+           attr == &dev_attr_regulation_out.attr ||
+           attr == &dev_attr_fail.attr ||
+           attr == &dev_attr_over_temp.attr ||
+           attr == &dev_attr_under_voltage_warn.attr ||
+           attr == &dev_attr_over_current_warn.attr ||
+           attr == &dev_attr_over_voltage_warn.attr ||
+           attr == &dev_attr_over_temp_warn.attr)
+               return ops->get_error_flags ? mode : 0;
+
        /* constraints need specific supporting methods */
        if (attr == &dev_attr_min_microvolts.attr ||
            attr == &dev_attr_max_microvolts.attr)
index eb9df485bd8aafcdcf15107aad88e2115c9cfa48..76e0e23bf598c7a134507fd0fa4d8e9bdc1d6163 100644 (file)
@@ -1030,6 +1030,8 @@ static int da9121_assign_chip_model(struct i2c_client *i2c,
                chip->variant_id = DA9121_TYPE_DA9142;
                regmap = &da9121_2ch_regmap_config;
                break;
+       default:
+               return -EINVAL;
        }
 
        /* Set these up for of_regulator_match call which may want .of_map_modes */
index 599ad201dca758a64c97ed88622097859e306862..2a9867abba20c256ea4304acd301ea988b7384ba 100644 (file)
@@ -236,11 +236,8 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
                drvdata->desc.supply_name = devm_kstrdup(&pdev->dev,
                                            config->input_supply,
                                            GFP_KERNEL);
-               if (!drvdata->desc.supply_name) {
-                       dev_err(&pdev->dev,
-                               "Failed to allocate input supply\n");
+               if (!drvdata->desc.supply_name)
                        return -ENOMEM;
-               }
        }
 
        if (config->microvolts)
index eb8027813b99f037750eace868075b14c637bcb8..8a5ce990f1bf9b5dc67623fe9cbc7e9d1c8ec18c 100644 (file)
@@ -130,6 +130,102 @@ struct mt6358_regulator_info {
        .qi = BIT(15),                                                  \
 }
 
+#define MT6366_BUCK(match, vreg, min, max, step,               \
+       volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask,   \
+       _modeset_reg, _modeset_shift)           \
+[MT6366_ID_##vreg] = { \
+       .desc = {       \
+               .name = #vreg,  \
+               .of_match = of_match_ptr(match),        \
+               .ops = &mt6358_volt_range_ops,  \
+               .type = REGULATOR_VOLTAGE,      \
+               .id = MT6366_ID_##vreg,         \
+               .owner = THIS_MODULE,           \
+               .n_voltages = ((max) - (min)) / (step) + 1,     \
+               .linear_ranges = volt_ranges,           \
+               .n_linear_ranges = ARRAY_SIZE(volt_ranges),     \
+               .vsel_reg = MT6358_BUCK_##vreg##_ELR0,  \
+               .vsel_mask = vosel_mask,        \
+               .enable_reg = MT6358_BUCK_##vreg##_CON0,        \
+               .enable_mask = BIT(0),  \
+               .of_map_mode = mt6358_map_mode, \
+       },      \
+       .status_reg = MT6358_BUCK_##vreg##_DBG1,        \
+       .qi = BIT(0),   \
+       .da_vsel_reg = _da_vsel_reg,    \
+       .da_vsel_mask = _da_vsel_mask,  \
+       .modeset_reg = _modeset_reg,    \
+       .modeset_mask = BIT(_modeset_shift),    \
+}
+
+#define MT6366_LDO(match, vreg, ldo_volt_table,        \
+       ldo_index_table, enreg, enbit, vosel,   \
+       vosel_mask)     \
+[MT6366_ID_##vreg] = { \
+       .desc = {       \
+               .name = #vreg,  \
+               .of_match = of_match_ptr(match),        \
+               .ops = &mt6358_volt_table_ops,  \
+               .type = REGULATOR_VOLTAGE,      \
+               .id = MT6366_ID_##vreg, \
+               .owner = THIS_MODULE,   \
+               .n_voltages = ARRAY_SIZE(ldo_volt_table),       \
+               .volt_table = ldo_volt_table,   \
+               .vsel_reg = vosel,      \
+               .vsel_mask = vosel_mask,        \
+               .enable_reg = enreg,    \
+               .enable_mask = BIT(enbit),      \
+       },      \
+       .status_reg = MT6358_LDO_##vreg##_CON1, \
+       .qi = BIT(15),  \
+       .index_table = ldo_index_table, \
+       .n_table = ARRAY_SIZE(ldo_index_table), \
+}
+
+#define MT6366_LDO1(match, vreg, min, max, step,       \
+       volt_ranges, _da_vsel_reg, _da_vsel_mask,       \
+       vosel, vosel_mask)      \
+[MT6366_ID_##vreg] = { \
+       .desc = {       \
+               .name = #vreg,  \
+               .of_match = of_match_ptr(match),        \
+               .ops = &mt6358_volt_range_ops,  \
+               .type = REGULATOR_VOLTAGE,      \
+               .id = MT6366_ID_##vreg, \
+               .owner = THIS_MODULE,   \
+               .n_voltages = ((max) - (min)) / (step) + 1,     \
+               .linear_ranges = volt_ranges,   \
+               .n_linear_ranges = ARRAY_SIZE(volt_ranges),     \
+               .vsel_reg = vosel,      \
+               .vsel_mask = vosel_mask,        \
+               .enable_reg = MT6358_LDO_##vreg##_CON0, \
+               .enable_mask = BIT(0),  \
+       },      \
+       .da_vsel_reg = _da_vsel_reg,    \
+       .da_vsel_mask = _da_vsel_mask,  \
+       .status_reg = MT6358_LDO_##vreg##_DBG1, \
+       .qi = BIT(0),   \
+}
+
+#define MT6366_REG_FIXED(match, vreg,  \
+       enreg, enbit, volt)     \
+[MT6366_ID_##vreg] = { \
+       .desc = {       \
+               .name = #vreg,  \
+               .of_match = of_match_ptr(match),        \
+               .ops = &mt6358_volt_fixed_ops,  \
+               .type = REGULATOR_VOLTAGE,      \
+               .id = MT6366_ID_##vreg, \
+               .owner = THIS_MODULE,   \
+               .n_voltages = 1,        \
+               .enable_reg = enreg,    \
+               .enable_mask = BIT(enbit),      \
+               .min_uV = volt, \
+       },      \
+       .status_reg = MT6358_LDO_##vreg##_CON1, \
+       .qi = BIT(15),                                                  \
+}
+
 static const struct linear_range buck_volt_range1[] = {
        REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
 };
@@ -409,6 +505,9 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
        MT6358_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
                    buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
                    MT6358_VCORE_VGPU_ANA_CON0, 1),
+       MT6358_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f,
+                   MT6358_VCORE_VGPU_ANA_CON0, 1),
        MT6358_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
                    buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
                    MT6358_VPA_ANA_CON0, 3),
@@ -488,6 +587,10 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
        MT6358_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
                    buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
                    MT6358_LDO_VSRAM_CON2, 0x7f),
+       MT6358_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000,
+                   1293750, 6250, buck_volt_range1,
+                   MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f,
+                   MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f),
        MT6358_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
                    buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
                    MT6358_LDO_VSRAM_CON3, 0x7f),
@@ -496,24 +599,124 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
                    MT6358_LDO_VSRAM_CON1, 0x7f),
 };
 
+/* The array is indexed by id(MT6366_ID_XXX) */
+static struct mt6358_regulator_info mt6366_regulators[] = {
+       MT6366_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500,
+                   buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f,
+                   MT6358_VDRAM1_ANA_CON0, 8),
+       MT6366_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
+                   MT6358_VCORE_VGPU_ANA_CON0, 1),
+       MT6366_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f,
+                   MT6358_VCORE_VGPU_ANA_CON0, 1),
+       MT6366_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
+                   buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
+                   MT6358_VPA_ANA_CON0, 3),
+       MT6366_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f,
+                   MT6358_VPROC_ANA_CON0, 1),
+       MT6366_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f,
+                   MT6358_VPROC_ANA_CON0, 2),
+       MT6366_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f,
+                   MT6358_VCORE_VGPU_ANA_CON0, 2),
+       MT6366_BUCK("buck_vs2", VS2, 500000, 2087500, 12500,
+                   buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f,
+                   MT6358_VS2_ANA_CON0, 8),
+       MT6366_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250,
+                   buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f,
+                   MT6358_VMODEM_ANA_CON0, 8),
+       MT6366_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500,
+                   buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f,
+                   MT6358_VS1_ANA_CON0, 8),
+       MT6366_REG_FIXED("ldo_vrf12", VRF12,
+                        MT6358_LDO_VRF12_CON0, 0, 1200000),
+       MT6366_REG_FIXED("ldo_vio18", VIO18,
+                        MT6358_LDO_VIO18_CON0, 0, 1800000),
+       MT6366_REG_FIXED("ldo_vcn18", VCN18, MT6358_LDO_VCN18_CON0, 0, 1800000),
+       MT6366_REG_FIXED("ldo_vfe28", VFE28, MT6358_LDO_VFE28_CON0, 0, 2800000),
+       MT6366_REG_FIXED("ldo_vcn28", VCN28, MT6358_LDO_VCN28_CON0, 0, 2800000),
+       MT6366_REG_FIXED("ldo_vxo22", VXO22, MT6358_LDO_VXO22_CON0, 0, 2200000),
+       MT6366_REG_FIXED("ldo_vaux18", VAUX18,
+                        MT6358_LDO_VAUX18_CON0, 0, 1800000),
+       MT6366_REG_FIXED("ldo_vbif28", VBIF28,
+                        MT6358_LDO_VBIF28_CON0, 0, 2800000),
+       MT6366_REG_FIXED("ldo_vio28", VIO28, MT6358_LDO_VIO28_CON0, 0, 2800000),
+       MT6366_REG_FIXED("ldo_va12", VA12, MT6358_LDO_VA12_CON0, 0, 1200000),
+       MT6366_REG_FIXED("ldo_vrf18", VRF18, MT6358_LDO_VRF18_CON0, 0, 1800000),
+       MT6366_REG_FIXED("ldo_vaud28", VAUD28,
+                        MT6358_LDO_VAUD28_CON0, 0, 2800000),
+       MT6366_LDO("ldo_vdram2", VDRAM2, vdram2_voltages, vdram2_idx,
+                  MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0x10),
+       MT6366_LDO("ldo_vsim1", VSIM1, vsim_voltages, vsim_idx,
+                  MT6358_LDO_VSIM1_CON0, 0, MT6358_VSIM1_ANA_CON0, 0xf00),
+       MT6366_LDO("ldo_vibr", VIBR, vibr_voltages, vibr_idx,
+                  MT6358_LDO_VIBR_CON0, 0, MT6358_VIBR_ANA_CON0, 0xf00),
+       MT6366_LDO("ldo_vusb", VUSB, vusb_voltages, vusb_idx,
+                  MT6358_LDO_VUSB_CON0_0, 0, MT6358_VUSB_ANA_CON0, 0x700),
+       MT6366_LDO("ldo_vefuse", VEFUSE, vefuse_voltages, vefuse_idx,
+                  MT6358_LDO_VEFUSE_CON0, 0, MT6358_VEFUSE_ANA_CON0, 0xf00),
+       MT6366_LDO("ldo_vmch", VMCH, vmch_vemc_voltages, vmch_vemc_idx,
+                  MT6358_LDO_VMCH_CON0, 0, MT6358_VMCH_ANA_CON0, 0x700),
+       MT6366_LDO("ldo_vemc", VEMC, vmch_vemc_voltages, vmch_vemc_idx,
+                  MT6358_LDO_VEMC_CON0, 0, MT6358_VEMC_ANA_CON0, 0x700),
+       MT6366_LDO("ldo_vcn33_bt", VCN33_BT, vcn33_bt_wifi_voltages,
+                  vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_0,
+                  0, MT6358_VCN33_ANA_CON0, 0x300),
+       MT6366_LDO("ldo_vcn33_wifi", VCN33_WIFI, vcn33_bt_wifi_voltages,
+                  vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_1,
+                  0, MT6358_VCN33_ANA_CON0, 0x300),
+       MT6366_LDO("ldo_vmc", VMC, vmc_voltages, vmc_idx,
+                  MT6358_LDO_VMC_CON0, 0, MT6358_VMC_ANA_CON0, 0xf00),
+       MT6366_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx,
+                  MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00),
+       MT6366_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250,
+                   buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00,
+                   MT6358_LDO_VSRAM_CON0, 0x7f),
+       MT6366_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
+                   buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
+                   MT6358_LDO_VSRAM_CON2, 0x7f),
+       MT6366_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000,
+                   1293750, 6250, buck_volt_range1,
+                   MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f,
+                   MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f),
+       MT6366_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
+                   buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
+                   MT6358_LDO_VSRAM_CON3, 0x7f),
+       MT6366_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250,
+                   buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00,
+                   MT6358_LDO_VSRAM_CON1, 0x7f),
+};
+
 static int mt6358_regulator_probe(struct platform_device *pdev)
 {
        struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent);
        struct regulator_config config = {};
        struct regulator_dev *rdev;
-       int i;
+       struct mt6358_regulator_info *mt6358_info;
+       int i, max_regulator;
+
+       if (mt6397->chip_id == MT6366_CHIP_ID) {
+               max_regulator = MT6366_MAX_REGULATOR;
+               mt6358_info = mt6366_regulators;
+       } else {
+               max_regulator = MT6358_MAX_REGULATOR;
+               mt6358_info = mt6358_regulators;
+       }
 
-       for (i = 0; i < MT6358_MAX_REGULATOR; i++) {
+       for (i = 0; i < max_regulator; i++) {
                config.dev = &pdev->dev;
-               config.driver_data = &mt6358_regulators[i];
+               config.driver_data = &mt6358_info[i];
                config.regmap = mt6397->regmap;
 
                rdev = devm_regulator_register(&pdev->dev,
-                                              &mt6358_regulators[i].desc,
+                                              &mt6358_info[i].desc,
                                               &config);
                if (IS_ERR(rdev)) {
                        dev_err(&pdev->dev, "failed to register %s\n",
-                               mt6358_regulators[i].desc.name);
+                               mt6358_info[i].desc.name);
                        return PTR_ERR(rdev);
                }
        }
index 64e5f5f0cc841a8c484b3f30acf68333744327a7..14b7d3376516289262987884528a39638ebfd55f 100644 (file)
@@ -174,6 +174,14 @@ static int buck_set_dvs(const struct regulator_desc *desc,
                }
        }
 
+       if (ret == 0) {
+               struct pca9450_regulator_desc *regulator = container_of(desc,
+                                       struct pca9450_regulator_desc, desc);
+
+               /* Enable DVS control through PMIC_STBY_REQ for this BUCK */
+               ret = regmap_update_bits(regmap, regulator->desc.enable_reg,
+                                        BUCK1_DVS_CTRL, BUCK1_DVS_CTRL);
+       }
        return ret;
 }
 
@@ -702,6 +710,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c,
        struct regulator_config config = { };
        struct pca9450 *pca9450;
        unsigned int device_id, i;
+       unsigned int reset_ctrl;
        int ret;
 
        if (!i2c->irq) {
@@ -802,14 +811,30 @@ static int pca9450_i2c_probe(struct i2c_client *i2c,
                return ret;
        }
 
+       if (of_property_read_bool(i2c->dev.of_node, "nxp,wdog_b-warm-reset"))
+               reset_ctrl = WDOG_B_CFG_WARM;
+       else
+               reset_ctrl = WDOG_B_CFG_COLD_LDO12;
+
        /* Set reset behavior on assertion of WDOG_B signal */
        ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_RESET_CTRL,
-                               WDOG_B_CFG_MASK, WDOG_B_CFG_COLD_LDO12);
+                                WDOG_B_CFG_MASK, reset_ctrl);
        if (ret) {
                dev_err(&i2c->dev, "Failed to set WDOG_B reset behavior\n");
                return ret;
        }
 
+       if (of_property_read_bool(i2c->dev.of_node, "nxp,i2c-lt-enable")) {
+               /* Enable I2C Level Translator */
+               ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_CONFIG2,
+                                        I2C_LT_MASK, I2C_LT_ON_STANDBY_RUN);
+               if (ret) {
+                       dev_err(&i2c->dev,
+                               "Failed to enable I2C level translator\n");
+                       return ret;
+               }
+       }
+
        /*
         * The driver uses the LDO5CTRL_H register to control the LDO5 regulator.
         * This is only valid if the SD_VSEL input of the PMIC is high. Let's
index d60d7d1b7fa25e117b6e05851a3f63b618481de5..aa55cfca9e4005dfc4b87cdfa0a660275a7b2280 100644 (file)
@@ -521,6 +521,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
        parent = of_get_child_by_name(np, "regulators");
        if (!parent) {
                dev_err(dev, "regulators node not found\n");
+               of_node_put(np);
                return -EINVAL;
        }
 
@@ -550,6 +551,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
        }
 
        of_node_put(parent);
+       of_node_put(np);
        if (ret < 0) {
                dev_err(dev, "Error parsing regulator init data: %d\n",
                        ret);
index 8490aa8eecb1a027be874faa5dc007ad91b0a1c4..7dff94a2eb7e9f1086ff6eb41b728dc1af6ec64a 100644 (file)
@@ -944,32 +944,31 @@ static const struct rpm_regulator_data rpm_pm8950_regulators[] = {
        { "s2", QCOM_SMD_RPM_SMPA, 2, &pm8950_hfsmps, "vdd_s2" },
        { "s3", QCOM_SMD_RPM_SMPA, 3, &pm8950_hfsmps, "vdd_s3" },
        { "s4", QCOM_SMD_RPM_SMPA, 4, &pm8950_hfsmps, "vdd_s4" },
-       { "s5", QCOM_SMD_RPM_SMPA, 5, &pm8950_ftsmps2p5, "vdd_s5" },
+       /* S5 is managed via SPMI. */
        { "s6", QCOM_SMD_RPM_SMPA, 6, &pm8950_hfsmps, "vdd_s6" },
 
        { "l1", QCOM_SMD_RPM_LDOA, 1, &pm8950_ult_nldo, "vdd_l1_l19" },
        { "l2", QCOM_SMD_RPM_LDOA, 2, &pm8950_ult_nldo, "vdd_l2_l23" },
        { "l3", QCOM_SMD_RPM_LDOA, 3, &pm8950_ult_nldo, "vdd_l3" },
-       { "l4", QCOM_SMD_RPM_LDOA, 4, &pm8950_ult_pldo, "vdd_l4_l5_l6_l7_l16" },
-       { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" },
-       { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" },
-       { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" },
+       /* L4 seems not to exist. */
+       { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" },
+       { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" },
+       { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" },
        { "l8", QCOM_SMD_RPM_LDOA, 8, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
        { "l9", QCOM_SMD_RPM_LDOA, 9, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
        { "l10", QCOM_SMD_RPM_LDOA, 10, &pm8950_ult_nldo, "vdd_l9_l10_l13_l14_l15_l18"},
-       { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"},
-       { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"},
-       { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
-       { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
-       { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
-       { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8950_ult_pldo, "vdd_l4_l5_l6_l7_l16"},
-       { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"},
-       { "l18", QCOM_SMD_RPM_LDOA, 18, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
-       { "l19", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l1_l19"},
-       { "l20", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l20"},
-       { "l21", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l21"},
-       { "l22", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l8_l11_l12_l17_l22"},
-       { "l23", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l2_l23"},
+       { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
+       { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
+       { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
+       { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
+       { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
+       { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8950_ult_pldo, "vdd_l5_l6_l7_l16" },
+       { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
+       /* L18 seems not to exist. */
+       { "l19", QCOM_SMD_RPM_LDOA, 19, &pm8950_pldo, "vdd_l1_l19" },
+       /* L20 & L21 seem not to exist. */
+       { "l22", QCOM_SMD_RPM_LDOA, 22, &pm8950_pldo, "vdd_l8_l11_l12_l17_l22" },
+       { "l23", QCOM_SMD_RPM_LDOA, 23, &pm8950_pldo, "vdd_l2_l23" },
        {}
 };
 
index f7df0f4b2f8744b07abb0020f2b1c466cfa7e290..fa8706a352ce9e2c19ecf4b97255ccedeba751f5 100644 (file)
@@ -364,7 +364,6 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
        state->gc.parent = &i2c->dev;
        state->gc.label = i2c->name;
        state->gc.owner = THIS_MODULE;
-       state->gc.of_node = i2c->dev.of_node;
        state->gc.base = -1;
        state->gc.ngpio = NUM_GPIO;
 
index 7a87788d3f0922399d54d88a4e35573aa5931001..563d79196fddcb4f468007142af42e58f34bb56e 100644 (file)
 
 struct rt4801_priv {
        struct device *dev;
-       struct gpio_descs *enable_gpios;
+       struct gpio_desc *enable_gpios[DSV_OUT_MAX];
        unsigned int enable_flag;
        unsigned int volt_sel[DSV_OUT_MAX];
 };
 
+static int rt4801_of_parse_cb(struct device_node *np,
+                             const struct regulator_desc *desc,
+                             struct regulator_config *config)
+{
+       struct rt4801_priv *priv = config->driver_data;
+       int id = desc->id;
+
+       if (priv->enable_gpios[id]) {
+               dev_warn(priv->dev, "duplicated enable-gpios property\n");
+               return 0;
+       }
+       priv->enable_gpios[id] = devm_fwnode_gpiod_get_index(priv->dev,
+                                                            of_fwnode_handle(np),
+                                                            "enable", 0,
+                                                            GPIOD_OUT_HIGH,
+                                                            "rt4801");
+       if (IS_ERR(priv->enable_gpios[id]))
+               priv->enable_gpios[id] = NULL;
+
+       return 0;
+}
+
 static int rt4801_set_voltage_sel(struct regulator_dev *rdev, unsigned int selector)
 {
        struct rt4801_priv *priv = rdev_get_drvdata(rdev);
@@ -63,15 +85,14 @@ static int rt4801_get_voltage_sel(struct regulator_dev *rdev)
 static int rt4801_enable(struct regulator_dev *rdev)
 {
        struct rt4801_priv *priv = rdev_get_drvdata(rdev);
-       struct gpio_descs *gpios = priv->enable_gpios;
        int id = rdev_get_id(rdev), ret;
 
-       if (!gpios || gpios->ndescs <= id) {
+       if (!priv->enable_gpios[id]) {
                dev_warn(&rdev->dev, "no dedicated gpio can control\n");
                goto bypass_gpio;
        }
 
-       gpiod_set_value(gpios->desc[id], 1);
+       gpiod_set_value(priv->enable_gpios[id], 1);
 
 bypass_gpio:
        ret = regmap_write(rdev->regmap, rdev->desc->vsel_reg, priv->volt_sel[id]);
@@ -85,15 +106,14 @@ bypass_gpio:
 static int rt4801_disable(struct regulator_dev *rdev)
 {
        struct rt4801_priv *priv = rdev_get_drvdata(rdev);
-       struct gpio_descs *gpios = priv->enable_gpios;
        int id = rdev_get_id(rdev);
 
-       if (!gpios || gpios->ndescs <= id) {
+       if (!priv->enable_gpios[id]) {
                dev_warn(&rdev->dev, "no dedicated gpio can control\n");
                goto bypass_gpio;
        }
 
-       gpiod_set_value(gpios->desc[id], 0);
+       gpiod_set_value(priv->enable_gpios[id], 0);
 
 bypass_gpio:
        priv->enable_flag &= ~BIT(id);
@@ -122,6 +142,7 @@ static const struct regulator_desc rt4801_regulator_descs[] = {
                .name = "DSVP",
                .ops = &rt4801_regulator_ops,
                .of_match = of_match_ptr("DSVP"),
+               .of_parse_cb = rt4801_of_parse_cb,
                .type = REGULATOR_VOLTAGE,
                .id = DSV_OUT_POS,
                .min_uV = MIN_UV,
@@ -135,6 +156,7 @@ static const struct regulator_desc rt4801_regulator_descs[] = {
                .name = "DSVN",
                .ops = &rt4801_regulator_ops,
                .of_match = of_match_ptr("DSVN"),
+               .of_parse_cb = rt4801_of_parse_cb,
                .type = REGULATOR_VOLTAGE,
                .id = DSV_OUT_NEG,
                .min_uV = MIN_UV,
@@ -172,10 +194,15 @@ static int rt4801_probe(struct i2c_client *i2c)
                return PTR_ERR(regmap);
        }
 
-       priv->enable_gpios = devm_gpiod_get_array_optional(&i2c->dev, "enable", GPIOD_OUT_HIGH);
-       if (IS_ERR(priv->enable_gpios)) {
-               dev_err(&i2c->dev, "Failed to get gpios\n");
-               return PTR_ERR(priv->enable_gpios);
+       for (i = 0; i < DSV_OUT_MAX; i++) {
+               priv->enable_gpios[i] = devm_gpiod_get_index_optional(&i2c->dev,
+                                                                     "enable",
+                                                                     i,
+                                                                     GPIOD_OUT_HIGH);
+               if (IS_ERR(priv->enable_gpios[i])) {
+                       dev_err(&i2c->dev, "Failed to get gpios\n");
+                       return PTR_ERR(priv->enable_gpios[i]);
+               }
        }
 
        for (i = 0; i < DSV_OUT_MAX; i++) {
diff --git a/drivers/regulator/rt5759-regulator.c b/drivers/regulator/rt5759-regulator.c
new file mode 100644 (file)
index 0000000..6b96899
--- /dev/null
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bits.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+
+#define RT5759_REG_VENDORINFO  0x00
+#define RT5759_REG_FREQ                0x01
+#define RT5759_REG_VSEL                0x02
+#define RT5759_REG_DCDCCTRL    0x03
+#define RT5759_REG_STATUS      0x04
+#define RT5759_REG_DCDCSET     0x05
+#define RT5759A_REG_WDTEN      0x42
+
+#define RT5759_TSTEP_MASK      GENMASK(3, 2)
+#define RT5759_VSEL_MASK       GENMASK(6, 0)
+#define RT5759_DISCHARGE_MASK  BIT(3)
+#define RT5759_FPWM_MASK       BIT(2)
+#define RT5759_ENABLE_MASK     BIT(1)
+#define RT5759_OT_MASK         BIT(1)
+#define RT5759_UV_MASK         BIT(0)
+#define RT5957_OCLVL_MASK      GENMASK(7, 6)
+#define RT5759_OCLVL_SHIFT     6
+#define RT5957_OTLVL_MASK      GENMASK(5, 4)
+#define RT5759_OTLVL_SHIFT     4
+#define RT5759A_WDTEN_MASK     BIT(1)
+
+#define RT5759_MANUFACTURER_ID 0x82
+/* vsel range 0x00 ~ 0x5A */
+#define RT5759_NUM_VOLTS       91
+#define RT5759_MIN_UV          600000
+#define RT5759_STEP_UV         10000
+#define RT5759A_STEP_UV                12500
+#define RT5759_MINSS_TIMEUS    1500
+
+#define RT5759_PSKIP_MODE      0
+#define RT5759_FPWM_MODE       1
+
+enum {
+       CHIP_TYPE_RT5759 = 0,
+       CHIP_TYPE_RT5759A,
+       CHIP_TYPE_MAX
+};
+
+struct rt5759_priv {
+       struct device *dev;
+       struct regmap *regmap;
+       struct regulator_desc desc;
+       unsigned long chip_type;
+};
+
+static int rt5759_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+       struct regmap *regmap = rdev_get_regmap(rdev);
+       unsigned int mode_val;
+
+       switch (mode) {
+       case REGULATOR_MODE_NORMAL:
+               mode_val = 0;
+               break;
+       case REGULATOR_MODE_FAST:
+               mode_val = RT5759_FPWM_MASK;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return regmap_update_bits(regmap, RT5759_REG_STATUS, RT5759_FPWM_MASK,
+                                 mode_val);
+}
+
+static unsigned int rt5759_get_mode(struct regulator_dev *rdev)
+{
+       struct regmap *regmap = rdev_get_regmap(rdev);
+       unsigned int regval;
+       int ret;
+
+       ret = regmap_read(regmap, RT5759_REG_DCDCCTRL, &regval);
+       if (ret)
+               return REGULATOR_MODE_INVALID;
+
+       if (regval & RT5759_FPWM_MASK)
+               return REGULATOR_MODE_FAST;
+
+       return REGULATOR_MODE_NORMAL;
+}
+
+static int rt5759_get_error_flags(struct regulator_dev *rdev,
+                                 unsigned int *flags)
+{
+       struct regmap *regmap = rdev_get_regmap(rdev);
+       unsigned int status, events = 0;
+       int ret;
+
+       ret = regmap_read(regmap, RT5759_REG_STATUS, &status);
+       if (ret)
+               return ret;
+
+       if (status & RT5759_OT_MASK)
+               events |= REGULATOR_ERROR_OVER_TEMP;
+
+       if (status & RT5759_UV_MASK)
+               events |= REGULATOR_ERROR_UNDER_VOLTAGE;
+
+       *flags = events;
+       return 0;
+}
+
+static int rt5759_set_ocp(struct regulator_dev *rdev, int lim_uA, int severity,
+                         bool enable)
+{
+       struct regmap *regmap = rdev_get_regmap(rdev);
+       int ocp_lvl[] = { 9800000, 10800000, 11800000 };
+       unsigned int ocp_regval;
+       int i;
+
+       /* Only support over current protection parameter */
+       if (severity != REGULATOR_SEVERITY_PROT)
+               return 0;
+
+       if (enable) {
+               /* Default ocp level is 10.8A */
+               if (lim_uA == 0)
+                       lim_uA = 10800000;
+
+               for (i = 0; i < ARRAY_SIZE(ocp_lvl); i++) {
+                       if (lim_uA <= ocp_lvl[i])
+                               break;
+               }
+
+               if (i == ARRAY_SIZE(ocp_lvl))
+                       i = ARRAY_SIZE(ocp_lvl) - 1;
+
+               ocp_regval = i + 1;
+       } else
+               ocp_regval = 0;
+
+       return regmap_update_bits(regmap, RT5759_REG_DCDCSET, RT5957_OCLVL_MASK,
+                                 ocp_regval << RT5759_OCLVL_SHIFT);
+}
+
+static int rt5759_set_otp(struct regulator_dev *rdev, int lim, int severity,
+                         bool enable)
+{
+       struct regmap *regmap = rdev_get_regmap(rdev);
+       int otp_lvl[] = { 140, 150, 170 };
+       unsigned int otp_regval;
+       int i;
+
+       /* Only support over temperature protection parameter */
+       if (severity != REGULATOR_SEVERITY_PROT)
+               return 0;
+
+       if (enable) {
+               /* Default otp level is 150'c */
+               if (lim == 0)
+                       lim = 150;
+
+               for (i = 0; i < ARRAY_SIZE(otp_lvl); i++) {
+                       if (lim <= otp_lvl[i])
+                               break;
+               }
+
+               if (i == ARRAY_SIZE(otp_lvl))
+                       i = ARRAY_SIZE(otp_lvl) - 1;
+
+               otp_regval = i + 1;
+       } else
+               otp_regval = 0;
+
+       return regmap_update_bits(regmap, RT5759_REG_DCDCSET, RT5957_OTLVL_MASK,
+                                 otp_regval << RT5759_OTLVL_SHIFT);
+}
+
+static const struct regulator_ops rt5759_regulator_ops = {
+       .list_voltage = regulator_list_voltage_linear,
+       .set_voltage_sel = regulator_set_voltage_sel_regmap,
+       .get_voltage_sel = regulator_get_voltage_sel_regmap,
+       .enable = regulator_enable_regmap,
+       .disable = regulator_disable_regmap,
+       .is_enabled = regulator_is_enabled_regmap,
+       .set_active_discharge = regulator_set_active_discharge_regmap,
+       .set_mode = rt5759_set_mode,
+       .get_mode = rt5759_get_mode,
+       .set_ramp_delay = regulator_set_ramp_delay_regmap,
+       .get_error_flags = rt5759_get_error_flags,
+       .set_over_current_protection = rt5759_set_ocp,
+       .set_thermal_protection = rt5759_set_otp,
+};
+
+static unsigned int rt5759_of_map_mode(unsigned int mode)
+{
+       switch (mode) {
+       case RT5759_FPWM_MODE:
+               return REGULATOR_MODE_FAST;
+       case RT5759_PSKIP_MODE:
+               return REGULATOR_MODE_NORMAL;
+       default:
+               return REGULATOR_MODE_INVALID;
+       }
+}
+
+static const unsigned int rt5759_ramp_table[] = { 20000, 15000, 10000, 5000 };
+
+static int rt5759_regulator_register(struct rt5759_priv *priv)
+{
+       struct device_node *np = priv->dev->of_node;
+       struct regulator_desc *reg_desc = &priv->desc;
+       struct regulator_config reg_cfg;
+       struct regulator_dev *rdev;
+       int ret;
+
+       reg_desc->name = "rt5759-buck";
+       reg_desc->type = REGULATOR_VOLTAGE;
+       reg_desc->owner = THIS_MODULE;
+       reg_desc->ops = &rt5759_regulator_ops;
+       reg_desc->n_voltages = RT5759_NUM_VOLTS;
+       reg_desc->min_uV = RT5759_MIN_UV;
+       reg_desc->uV_step = RT5759_STEP_UV;
+       reg_desc->vsel_reg = RT5759_REG_VSEL;
+       reg_desc->vsel_mask = RT5759_VSEL_MASK;
+       reg_desc->enable_reg = RT5759_REG_DCDCCTRL;
+       reg_desc->enable_mask = RT5759_ENABLE_MASK;
+       reg_desc->active_discharge_reg = RT5759_REG_DCDCCTRL;
+       reg_desc->active_discharge_mask = RT5759_DISCHARGE_MASK;
+       reg_desc->active_discharge_on = RT5759_DISCHARGE_MASK;
+       reg_desc->ramp_reg = RT5759_REG_FREQ;
+       reg_desc->ramp_mask = RT5759_TSTEP_MASK;
+       reg_desc->ramp_delay_table = rt5759_ramp_table;
+       reg_desc->n_ramp_values = ARRAY_SIZE(rt5759_ramp_table);
+       reg_desc->enable_time = RT5759_MINSS_TIMEUS;
+       reg_desc->of_map_mode = rt5759_of_map_mode;
+
+       /*
+        * RT5759 step uV = 10000
+        * RT5759A step uV = 12500
+        */
+       if (priv->chip_type == CHIP_TYPE_RT5759A)
+               reg_desc->uV_step = RT5759A_STEP_UV;
+
+       reg_cfg.dev = priv->dev;
+       reg_cfg.of_node = np;
+       reg_cfg.init_data = of_get_regulator_init_data(priv->dev, np, reg_desc);
+       reg_cfg.regmap = priv->regmap;
+
+       rdev = devm_regulator_register(priv->dev, reg_desc, &reg_cfg);
+       if (IS_ERR(rdev)) {
+               ret = PTR_ERR(rdev);
+               dev_err(priv->dev, "Failed to register regulator (%d)\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int rt5759_init_device_property(struct rt5759_priv *priv)
+{
+       unsigned int val = 0;
+
+       /*
+        * Only RT5759A support external watchdog input
+        */
+       if (priv->chip_type != CHIP_TYPE_RT5759A)
+               return 0;
+
+       if (device_property_read_bool(priv->dev, "richtek,watchdog-enable"))
+               val = RT5759A_WDTEN_MASK;
+
+       return regmap_update_bits(priv->regmap, RT5759A_REG_WDTEN,
+                                 RT5759A_WDTEN_MASK, val);
+}
+
+static int rt5759_manufacturer_check(struct rt5759_priv *priv)
+{
+       unsigned int vendor;
+       int ret;
+
+       ret = regmap_read(priv->regmap, RT5759_REG_VENDORINFO, &vendor);
+       if (ret)
+               return ret;
+
+       if (vendor != RT5759_MANUFACTURER_ID) {
+               dev_err(priv->dev, "vendor info not correct (%d)\n", vendor);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static bool rt5759_is_accessible_reg(struct device *dev, unsigned int reg)
+{
+       struct rt5759_priv *priv = dev_get_drvdata(dev);
+
+       if (reg <= RT5759_REG_DCDCSET)
+               return true;
+
+       if (priv->chip_type == CHIP_TYPE_RT5759A && reg == RT5759A_REG_WDTEN)
+               return true;
+
+       return false;
+}
+
+static const struct regmap_config rt5759_regmap_config = {
+       .reg_bits = 8,
+       .val_bits = 8,
+       .max_register = RT5759A_REG_WDTEN,
+       .readable_reg = rt5759_is_accessible_reg,
+       .writeable_reg = rt5759_is_accessible_reg,
+};
+
+static int rt5759_probe(struct i2c_client *i2c)
+{
+       struct rt5759_priv *priv;
+       int ret;
+
+       priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->dev = &i2c->dev;
+       priv->chip_type = (unsigned long)of_device_get_match_data(&i2c->dev);
+       i2c_set_clientdata(i2c, priv);
+
+       priv->regmap = devm_regmap_init_i2c(i2c, &rt5759_regmap_config);
+       if (IS_ERR(priv->regmap)) {
+               ret = PTR_ERR(priv->regmap);
+               dev_err(&i2c->dev, "Failed to allocate regmap (%d)\n", ret);
+               return ret;
+       }
+
+       ret = rt5759_manufacturer_check(priv);
+       if (ret) {
+               dev_err(&i2c->dev, "Failed to check device (%d)\n", ret);
+               return ret;
+       }
+
+       ret = rt5759_init_device_property(priv);
+       if (ret) {
+               dev_err(&i2c->dev, "Failed to init device (%d)\n", ret);
+               return ret;
+       }
+
+       return rt5759_regulator_register(priv);
+}
+
+static const struct of_device_id __maybe_unused rt5759_device_table[] = {
+       { .compatible = "richtek,rt5759", .data = (void *)CHIP_TYPE_RT5759 },
+       { .compatible = "richtek,rt5759a", .data = (void *)CHIP_TYPE_RT5759A },
+       {}
+};
+MODULE_DEVICE_TABLE(of, rt5759_device_table);
+
+static struct i2c_driver rt5759_driver = {
+       .driver = {
+               .name = "rt5759",
+               .of_match_table = of_match_ptr(rt5759_device_table),
+       },
+       .probe_new = rt5759_probe,
+};
+module_i2c_driver(rt5759_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("Richtek RT5759 Regulator Driver");
+MODULE_LICENSE("GPL v2");
index 1f02f60ad1366fb983006c7460dde762a72d05af..41ae7ac27ff6a847b6cc3c80fbd8d1d3aaaea9ba 100644 (file)
@@ -352,7 +352,7 @@ static int scmi_regulator_probe(struct scmi_device *sdev)
                        return ret;
                }
        }
-
+       of_node_put(np);
        /*
         * Register a regulator for each valid regulator-DT-entry that we
         * can successfully reach via SCMI and has a valid associated voltage
diff --git a/drivers/regulator/sm5703-regulator.c b/drivers/regulator/sm5703-regulator.c
new file mode 100644 (file)
index 0000000..05ad28f
--- /dev/null
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/mfd/sm5703.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+
+enum sm5703_regulators {
+       SM5703_BUCK,
+       SM5703_LDO1,
+       SM5703_LDO2,
+       SM5703_LDO3,
+       SM5703_USBLDO1,
+       SM5703_USBLDO2,
+       SM5703_VBUS,
+       SM5703_MAX_REGULATORS,
+};
+
+static const int sm5703_ldo_voltagemap[] = {
+       1500000, 1800000, 2600000, 2800000, 3000000, 3300000,
+};
+
+static const int sm5703_buck_voltagemap[] = {
+       1000000, 1000000, 1000000, 1000000,
+       1000000, 1000000, 1000000, 1000000,
+       1000000, 1000000, 1000000, 1100000,
+       1200000, 1300000, 1400000, 1500000,
+       1600000, 1700000, 1800000, 1900000,
+       2000000, 2100000, 2200000, 2300000,
+       2400000, 2500000, 2600000, 2700000,
+       2800000, 2900000, 3000000, 3000000,
+};
+
+#define SM5703USBLDO(_name, _id)                                       \
+       [SM5703_USBLDO ## _id] = {                                      \
+               .name = _name,                                          \
+               .of_match = _name,                                      \
+               .regulators_node = "regulators",                        \
+               .type = REGULATOR_VOLTAGE,                              \
+               .id = SM5703_USBLDO ## _id,                             \
+               .ops = &sm5703_regulator_ops_fixed,                     \
+               .fixed_uV = SM5703_USBLDO_MICROVOLT,                    \
+               .enable_reg = SM5703_REG_USBLDO12,                      \
+               .enable_mask = SM5703_REG_EN_USBLDO ##_id,              \
+               .owner                  = THIS_MODULE,                  \
+       }
+
+#define SM5703VBUS(_name)                                              \
+       [SM5703_VBUS] = {                                               \
+               .name = _name,                                          \
+               .of_match = _name,                                      \
+               .regulators_node = "regulators",                        \
+               .type = REGULATOR_VOLTAGE,                              \
+               .id = SM5703_VBUS,                                      \
+               .ops = &sm5703_regulator_ops_fixed,                     \
+               .fixed_uV = SM5703_VBUS_MICROVOLT,                      \
+               .enable_reg = SM5703_REG_CNTL,                          \
+               .enable_mask = SM5703_OPERATION_MODE_MASK,              \
+               .enable_val = SM5703_OPERATION_MODE_USB_OTG_MODE,       \
+               .disable_val = SM5703_OPERATION_MODE_CHARGING_ON,       \
+               .owner                  = THIS_MODULE,                  \
+       }
+
+#define SM5703BUCK(_name)                                              \
+       [SM5703_BUCK] = {                                               \
+               .name = _name,                                          \
+               .of_match = _name,                                      \
+               .regulators_node = "regulators",                        \
+               .type = REGULATOR_VOLTAGE,                              \
+               .id = SM5703_BUCK,                                      \
+               .ops = &sm5703_regulator_ops,                           \
+               .n_voltages = ARRAY_SIZE(sm5703_buck_voltagemap),       \
+               .volt_table = sm5703_buck_voltagemap,                   \
+               .vsel_reg = SM5703_REG_BUCK,                            \
+               .vsel_mask = SM5703_BUCK_VOLT_MASK,                     \
+               .enable_reg = SM5703_REG_BUCK,                          \
+               .enable_mask = SM5703_REG_EN_BUCK,                      \
+               .owner                  = THIS_MODULE,                  \
+       }
+
+#define SM5703LDO(_name, _id)                                          \
+       [SM5703_LDO ## _id] = {                                         \
+               .name = _name,                                          \
+               .of_match = _name,                                      \
+               .regulators_node = "regulators",                        \
+               .type = REGULATOR_VOLTAGE,                              \
+               .id = SM5703_LDO ## _id,                                \
+               .ops = &sm5703_regulator_ops,                           \
+               .n_voltages = ARRAY_SIZE(sm5703_ldo_voltagemap),        \
+               .volt_table = sm5703_ldo_voltagemap,                    \
+               .vsel_reg = SM5703_REG_LDO ##_id,                       \
+               .vsel_mask = SM5703_LDO_VOLT_MASK,                      \
+               .enable_reg = SM5703_REG_LDO ##_id,                     \
+               .enable_mask = SM5703_LDO_EN,                           \
+               .owner                  = THIS_MODULE,                  \
+       }
+
+static const struct regulator_ops sm5703_regulator_ops = {
+       .enable                 = regulator_enable_regmap,
+       .disable                = regulator_disable_regmap,
+       .is_enabled             = regulator_is_enabled_regmap,
+       .list_voltage           = regulator_list_voltage_table,
+       .get_voltage_sel        = regulator_get_voltage_sel_regmap,
+       .set_voltage_sel        = regulator_set_voltage_sel_regmap,
+};
+
+static const struct regulator_ops sm5703_regulator_ops_fixed = {
+       .enable                 = regulator_enable_regmap,
+       .disable                = regulator_disable_regmap,
+       .is_enabled             = regulator_is_enabled_regmap,
+};
+
+static struct regulator_desc sm5703_regulators_desc[SM5703_MAX_REGULATORS] = {
+       SM5703BUCK("buck"),
+       SM5703LDO("ldo1", 1),
+       SM5703LDO("ldo2", 2),
+       SM5703LDO("ldo3", 3),
+       SM5703USBLDO("usbldo1", 1),
+       SM5703USBLDO("usbldo2", 2),
+       SM5703VBUS("vbus"),
+};
+
+static int sm5703_regulator_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct regulator_config config = { NULL, };
+       struct regulator_dev *rdev;
+       struct sm5703_dev *sm5703 = dev_get_drvdata(pdev->dev.parent);
+       int i;
+
+       config.dev = dev->parent;
+       config.regmap = sm5703->regmap;
+
+       for (i = 0; i < SM5703_MAX_REGULATORS; i++) {
+               rdev = devm_regulator_register(dev,
+                                              &sm5703_regulators_desc[i],
+                                              &config);
+               if (IS_ERR(rdev))
+                       return dev_err_probe(dev, PTR_ERR(rdev),
+                                            "Failed to register a regulator\n");
+       }
+
+       return 0;
+}
+
+static const struct platform_device_id sm5703_regulator_id[] = {
+       { "sm5703-regulator", 0 },
+       {}
+};
+MODULE_DEVICE_TABLE(platform, sm5703_regulator_id);
+
+static struct platform_driver sm5703_regulator_driver = {
+       .driver = {
+               .name = "sm5703-regulator",
+       },
+       .probe  = sm5703_regulator_probe,
+       .id_table       = sm5703_regulator_id,
+};
+
+module_platform_driver(sm5703_regulator_driver);
+
+MODULE_DESCRIPTION("Silicon Mitus SM5703 LDO/Buck/USB regulator driver");
+MODULE_AUTHOR("Markuss Broks <markuss.broks@gmail.com>");
+MODULE_LICENSE("GPL");
index 161622ea72592815bdb0d510e987c76d4102dd3f..30ea3bc8ca1921ced12c21c8c418475e6864db1b 100644 (file)
@@ -44,11 +44,9 @@ static int stm32_vrefbuf_enable(struct regulator_dev *rdev)
        u32 val;
        int ret;
 
-       ret = pm_runtime_get_sync(priv->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(priv->dev);
+       ret = pm_runtime_resume_and_get(priv->dev);
+       if (ret < 0)
                return ret;
-       }
 
        val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
        val = (val & ~STM32_HIZ) | STM32_ENVR;
@@ -81,11 +79,9 @@ static int stm32_vrefbuf_disable(struct regulator_dev *rdev)
        u32 val;
        int ret;
 
-       ret = pm_runtime_get_sync(priv->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(priv->dev);
+       ret = pm_runtime_resume_and_get(priv->dev);
+       if (ret < 0)
                return ret;
-       }
 
        val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
        val &= ~STM32_ENVR;
@@ -102,11 +98,9 @@ static int stm32_vrefbuf_is_enabled(struct regulator_dev *rdev)
        struct stm32_vrefbuf *priv = rdev_get_drvdata(rdev);
        int ret;
 
-       ret = pm_runtime_get_sync(priv->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(priv->dev);
+       ret = pm_runtime_resume_and_get(priv->dev);
+       if (ret < 0)
                return ret;
-       }
 
        ret = readl_relaxed(priv->base + STM32_VREFBUF_CSR) & STM32_ENVR;
 
@@ -123,11 +117,9 @@ static int stm32_vrefbuf_set_voltage_sel(struct regulator_dev *rdev,
        u32 val;
        int ret;
 
-       ret = pm_runtime_get_sync(priv->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(priv->dev);
+       ret = pm_runtime_resume_and_get(priv->dev);
+       if (ret < 0)
                return ret;
-       }
 
        val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
        val = (val & ~STM32_VRS) | FIELD_PREP(STM32_VRS, sel);
@@ -145,11 +137,9 @@ static int stm32_vrefbuf_get_voltage_sel(struct regulator_dev *rdev)
        u32 val;
        int ret;
 
-       ret = pm_runtime_get_sync(priv->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(priv->dev);
+       ret = pm_runtime_resume_and_get(priv->dev);
+       if (ret < 0)
                return ret;
-       }
 
        val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
        ret = FIELD_GET(STM32_VRS, val);
index 7ab95d05264402b74dd411befc7d639f3d51d828..59c0f38cc08dfe9ac69317ca6514362b027f1a7f 100644 (file)
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
-
-#define TIMER_TIME_LOW         0x00    /* get low bits of current time  */
-                                       /*   and update TIMER_TIME_HIGH  */
-#define TIMER_TIME_HIGH        0x04    /* get high bits of time at last */
-                                       /*   TIMER_TIME_LOW read         */
-#define TIMER_ALARM_LOW        0x08    /* set low bits of alarm and     */
-                                       /*   activate it                 */
-#define TIMER_ALARM_HIGH       0x0c    /* set high bits of next alarm   */
-#define TIMER_IRQ_ENABLED      0x10
-#define TIMER_CLEAR_ALARM      0x14
-#define TIMER_ALARM_STATUS     0x18
-#define TIMER_CLEAR_INTERRUPT  0x1c
+#include <linux/goldfish.h>
+#include <clocksource/timer-goldfish.h>
 
 struct goldfish_rtc {
        void __iomem *base;
@@ -41,8 +31,8 @@ static int goldfish_rtc_read_alarm(struct device *dev,
        rtcdrv = dev_get_drvdata(dev);
        base = rtcdrv->base;
 
-       rtc_alarm_low = readl(base + TIMER_ALARM_LOW);
-       rtc_alarm_high = readl(base + TIMER_ALARM_HIGH);
+       rtc_alarm_low = gf_ioread32(base + TIMER_ALARM_LOW);
+       rtc_alarm_high = gf_ioread32(base + TIMER_ALARM_HIGH);
        rtc_alarm = (rtc_alarm_high << 32) | rtc_alarm_low;
 
        do_div(rtc_alarm, NSEC_PER_SEC);
@@ -50,7 +40,7 @@ static int goldfish_rtc_read_alarm(struct device *dev,
 
        rtc_time64_to_tm(rtc_alarm, &alrm->time);
 
-       if (readl(base + TIMER_ALARM_STATUS))
+       if (gf_ioread32(base + TIMER_ALARM_STATUS))
                alrm->enabled = 1;
        else
                alrm->enabled = 0;
@@ -71,18 +61,18 @@ static int goldfish_rtc_set_alarm(struct device *dev,
 
        if (alrm->enabled) {
                rtc_alarm64 = rtc_tm_to_time64(&alrm->time) * NSEC_PER_SEC;
-               writel((rtc_alarm64 >> 32), base + TIMER_ALARM_HIGH);
-               writel(rtc_alarm64, base + TIMER_ALARM_LOW);
-               writel(1, base + TIMER_IRQ_ENABLED);
+               gf_iowrite32((rtc_alarm64 >> 32), base + TIMER_ALARM_HIGH);
+               gf_iowrite32(rtc_alarm64, base + TIMER_ALARM_LOW);
+               gf_iowrite32(1, base + TIMER_IRQ_ENABLED);
        } else {
                /*
                 * if this function was called with enabled=0
                 * then it could mean that the application is
                 * trying to cancel an ongoing alarm
                 */
-               rtc_status_reg = readl(base + TIMER_ALARM_STATUS);
+               rtc_status_reg = gf_ioread32(base + TIMER_ALARM_STATUS);
                if (rtc_status_reg)
-                       writel(1, base + TIMER_CLEAR_ALARM);
+                       gf_iowrite32(1, base + TIMER_CLEAR_ALARM);
        }
 
        return 0;
@@ -98,9 +88,9 @@ static int goldfish_rtc_alarm_irq_enable(struct device *dev,
        base = rtcdrv->base;
 
        if (enabled)
-               writel(1, base + TIMER_IRQ_ENABLED);
+               gf_iowrite32(1, base + TIMER_IRQ_ENABLED);
        else
-               writel(0, base + TIMER_IRQ_ENABLED);
+               gf_iowrite32(0, base + TIMER_IRQ_ENABLED);
 
        return 0;
 }
@@ -110,7 +100,7 @@ static irqreturn_t goldfish_rtc_interrupt(int irq, void *dev_id)
        struct goldfish_rtc *rtcdrv = dev_id;
        void __iomem *base = rtcdrv->base;
 
-       writel(1, base + TIMER_CLEAR_INTERRUPT);
+       gf_iowrite32(1, base + TIMER_CLEAR_INTERRUPT);
 
        rtc_update_irq(rtcdrv->rtc, 1, RTC_IRQF | RTC_AF);
 
@@ -128,8 +118,8 @@ static int goldfish_rtc_read_time(struct device *dev, struct rtc_time *tm)
        rtcdrv = dev_get_drvdata(dev);
        base = rtcdrv->base;
 
-       time_low = readl(base + TIMER_TIME_LOW);
-       time_high = readl(base + TIMER_TIME_HIGH);
+       time_low = gf_ioread32(base + TIMER_TIME_LOW);
+       time_high = gf_ioread32(base + TIMER_TIME_HIGH);
        time = (time_high << 32) | time_low;
 
        do_div(time, NSEC_PER_SEC);
@@ -149,8 +139,8 @@ static int goldfish_rtc_set_time(struct device *dev, struct rtc_time *tm)
        base = rtcdrv->base;
 
        now64 = rtc_tm_to_time64(tm) * NSEC_PER_SEC;
-       writel((now64 >> 32), base + TIMER_TIME_HIGH);
-       writel(now64, base + TIMER_TIME_LOW);
+       gf_iowrite32((now64 >> 32), base + TIMER_TIME_HIGH);
+       gf_iowrite32(now64, base + TIMER_TIME_LOW);
 
        return 0;
 }
index 5b3e4da6340612f78eec1e691966b60cde6661ec..5252ce4cbda4ecd71a6134f63c4cc672c190ed4f 100644 (file)
@@ -370,6 +370,23 @@ CLK_OF_DECLARE_DRIVER(sun8i_h3_rtc_clk, "allwinner,sun8i-h3-rtc",
 CLK_OF_DECLARE_DRIVER(sun50i_h5_rtc_clk, "allwinner,sun50i-h5-rtc",
                      sun8i_h3_rtc_clk_init);
 
+static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = {
+       .rc_osc_rate = 16000000,
+       .fixed_prescaler = 32,
+       .has_prescaler = 1,
+       .has_out_clk = 1,
+       .export_iosc = 1,
+       .has_losc_en = 1,
+       .has_auto_swt = 1,
+};
+
+static void __init sun50i_h6_rtc_clk_init(struct device_node *node)
+{
+       sun6i_rtc_clk_init(node, &sun50i_h6_rtc_data);
+}
+CLK_OF_DECLARE_DRIVER(sun50i_h6_rtc_clk, "allwinner,sun50i-h6-rtc",
+                     sun50i_h6_rtc_clk_init);
+
 /*
  * The R40 user manual is self-conflicting on whether the prescaler is
  * fixed or configurable. The clock diagram shows it as fixed, but there
index e084f4deddddd26429c01a38bde53962f258fb8b..60be7f7bf2d167d02e1526cc7367adfd8e0e1642 100644 (file)
@@ -782,7 +782,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
        blk_queue_segment_boundary(q, PAGE_SIZE - 1);
 
        q->limits.discard_granularity = logical_block_size;
-       q->limits.discard_alignment = PAGE_SIZE;
 
        /* Calculate max_discard_sectors and make it PAGE aligned */
        max_bytes = USHRT_MAX * logical_block_size;
@@ -791,7 +790,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
 
        blk_queue_max_discard_sectors(q, max_discard_sectors);
        blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
 static int dasd_fba_pe_handler(struct dasd_device *device,
index f356607835d84b4895c94dcd1428f3bcedcdc978..4ae07c7e2175fb307c867af7f3a684837697ebd8 100644 (file)
@@ -771,35 +771,36 @@ static struct tty_driver *con3215_device(struct console *c, int *index)
 }
 
 /*
- * panic() calls con3215_flush through a panic_notifier
- * before the system enters a disabled, endless loop.
+ * The below function is called as a panic/reboot notifier before the
+ * system enters a disabled, endless loop.
+ *
+ * Notice we must use the spin_trylock() alternative, to prevent lockups
+ * in atomic context (panic routine runs with secondary CPUs, local IRQs
+ * and preemption disabled).
  */
-static void con3215_flush(void)
+static int con3215_notify(struct notifier_block *self,
+                         unsigned long event, void *data)
 {
        struct raw3215_info *raw;
        unsigned long flags;
 
        raw = raw3215[0];  /* console 3215 is the first one */
-       spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags);
+       if (!spin_trylock_irqsave(get_ccwdev_lock(raw->cdev), flags))
+               return NOTIFY_DONE;
        raw3215_make_room(raw, RAW3215_BUFFER_SIZE);
        spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags);
-}
 
-static int con3215_notify(struct notifier_block *self,
-                         unsigned long event, void *data)
-{
-       con3215_flush();
-       return NOTIFY_OK;
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block on_panic_nb = {
        .notifier_call = con3215_notify,
-       .priority = 0,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 static struct notifier_block on_reboot_nb = {
        .notifier_call = con3215_notify,
-       .priority = 0,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 /*
index e4592890f20aeff10bb927a739c51edfd093fc35..10f6a37fb153117f835e0b79f35e0998dde71e48 100644 (file)
@@ -535,20 +535,26 @@ con3270_wait_write(struct con3270 *cp)
 }
 
 /*
- * panic() calls con3270_flush through a panic_notifier
- * before the system enters a disabled, endless loop.
+ * The below function is called as a panic/reboot notifier before the
+ * system enters a disabled, endless loop.
+ *
+ * Notice we must use the spin_trylock() alternative, to prevent lockups
+ * in atomic context (panic routine runs with secondary CPUs, local IRQs
+ * and preemption disabled).
  */
-static void
-con3270_flush(void)
+static int con3270_notify(struct notifier_block *self,
+                         unsigned long event, void *data)
 {
        struct con3270 *cp;
        unsigned long flags;
 
        cp = condev;
        if (!cp->view.dev)
-               return;
-       raw3270_activate_view(&cp->view);
-       spin_lock_irqsave(&cp->view.lock, flags);
+               return NOTIFY_DONE;
+       if (!raw3270_view_lock_unavailable(&cp->view))
+               raw3270_activate_view(&cp->view);
+       if (!spin_trylock_irqsave(&cp->view.lock, flags))
+               return NOTIFY_DONE;
        con3270_wait_write(cp);
        cp->nr_up = 0;
        con3270_rebuild_update(cp);
@@ -560,23 +566,18 @@ con3270_flush(void)
                con3270_wait_write(cp);
        }
        spin_unlock_irqrestore(&cp->view.lock, flags);
-}
 
-static int con3270_notify(struct notifier_block *self,
-                         unsigned long event, void *data)
-{
-       con3270_flush();
-       return NOTIFY_OK;
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block on_panic_nb = {
        .notifier_call = con3270_notify,
-       .priority = 0,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 static struct notifier_block on_reboot_nb = {
        .notifier_call = con3270_notify,
-       .priority = 0,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 /*
index dfde0d941c3c4fc9c71a1329e6c8a7d6b6afdc43..4e2b3a1a3b2ef3f39feff9cae75f4bd59f8776c1 100644 (file)
@@ -830,6 +830,21 @@ raw3270_create_device(struct ccw_device *cdev)
        return rp;
 }
 
+/*
+ * This helper just validates that it is safe to activate a
+ * view in the panic() context, due to locking restrictions.
+ */
+int raw3270_view_lock_unavailable(struct raw3270_view *view)
+{
+       struct raw3270 *rp = view->dev;
+
+       if (!rp)
+               return -ENODEV;
+       if (spin_is_locked(get_ccwdev_lock(rp->cdev)))
+               return -EBUSY;
+       return 0;
+}
+
 /*
  * Activate a view.
  */
index c6645167cd2bf2ae2d84d15fe27e6f1391dbb774..4cb6b5ee44ca4283357bd8faa485282e25afeee9 100644 (file)
@@ -160,6 +160,7 @@ struct raw3270_view {
 };
 
 int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int);
+int raw3270_view_lock_unavailable(struct raw3270_view *view);
 int raw3270_activate_view(struct raw3270_view *);
 void raw3270_del_view(struct raw3270_view *);
 void raw3270_deactivate_view(struct raw3270_view *);
index fe5ee2646fcf14b14117e506f55ac8bb78d96d50..e5d947c763ea5d41e202059a9f407f1d84d2cd6f 100644 (file)
@@ -220,30 +220,34 @@ sclp_console_device(struct console *c, int *index)
 }
 
 /*
- * Make sure that all buffers will be flushed to the SCLP.
+ * This panic/reboot notifier makes sure that all buffers
+ * will be flushed to the SCLP.
  */
-static void
-sclp_console_flush(void)
+static int sclp_console_notify(struct notifier_block *self,
+                              unsigned long event, void *data)
 {
+       /*
+        * Perform the lock check before effectively getting the
+        * lock on sclp_conbuf_emit() / sclp_console_sync_queue()
+        * to prevent potential lockups in atomic context.
+        */
+       if (spin_is_locked(&sclp_con_lock))
+               return NOTIFY_DONE;
+
        sclp_conbuf_emit();
        sclp_console_sync_queue();
-}
 
-static int sclp_console_notify(struct notifier_block *self,
-                              unsigned long event, void *data)
-{
-       sclp_console_flush();
-       return NOTIFY_OK;
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block on_panic_nb = {
        .notifier_call = sclp_console_notify,
-       .priority = 1,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 static struct notifier_block on_reboot_nb = {
        .notifier_call = sclp_console_notify,
-       .priority = 1,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 /*
index e9943a86c361e7ac341211bdd26a494332734306..dd313ff57df3b4bca267ae852d01803769ea06ea 100644 (file)
@@ -49,8 +49,10 @@ static void __init sclp_early_facilities_detect(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
        if (sccb->fac91 & 0x40)
                S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
-       if (sccb->cpuoff > 134)
+       if (sccb->cpuoff > 134) {
                sclp.has_diag318 = !!(sccb->byte_134 & 0x80);
+               sclp.has_iplcc = !!(sccb->byte_134 & 0x02);
+       }
        if (sccb->cpuoff > 137)
                sclp.has_sipl = !!(sccb->cbl & 0x4000);
        sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
index 3b4e7e5d9b71d1abd12c3a7dd5e54c88351da260..a32f34a1c6d27f19d07551c2191ae3d28632b9a6 100644 (file)
@@ -769,21 +769,6 @@ __initcall(sclp_vt220_tty_init);
 
 #ifdef CONFIG_SCLP_VT220_CONSOLE
 
-static void __sclp_vt220_flush_buffer(void)
-{
-       unsigned long flags;
-
-       sclp_vt220_emit_current();
-       spin_lock_irqsave(&sclp_vt220_lock, flags);
-       del_timer(&sclp_vt220_timer);
-       while (sclp_vt220_queue_running) {
-               spin_unlock_irqrestore(&sclp_vt220_lock, flags);
-               sclp_sync_wait();
-               spin_lock_irqsave(&sclp_vt220_lock, flags);
-       }
-       spin_unlock_irqrestore(&sclp_vt220_lock, flags);
-}
-
 static void
 sclp_vt220_con_write(struct console *con, const char *buf, unsigned int count)
 {
@@ -797,22 +782,41 @@ sclp_vt220_con_device(struct console *c, int *index)
        return sclp_vt220_driver;
 }
 
+/*
+ * This panic/reboot notifier runs in atomic context, so
+ * locking restrictions apply to prevent potential lockups.
+ */
 static int
 sclp_vt220_notify(struct notifier_block *self,
                          unsigned long event, void *data)
 {
-       __sclp_vt220_flush_buffer();
-       return NOTIFY_OK;
+       unsigned long flags;
+
+       if (spin_is_locked(&sclp_vt220_lock))
+               return NOTIFY_DONE;
+
+       sclp_vt220_emit_current();
+
+       spin_lock_irqsave(&sclp_vt220_lock, flags);
+       del_timer(&sclp_vt220_timer);
+       while (sclp_vt220_queue_running) {
+               spin_unlock_irqrestore(&sclp_vt220_lock, flags);
+               sclp_sync_wait();
+               spin_lock_irqsave(&sclp_vt220_lock, flags);
+       }
+       spin_unlock_irqrestore(&sclp_vt220_lock, flags);
+
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block on_panic_nb = {
        .notifier_call = sclp_vt220_notify,
-       .priority = 1,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 static struct notifier_block on_reboot_nb = {
        .notifier_call = sclp_vt220_notify,
-       .priority = 1,
+       .priority = INT_MIN + 1, /* run the callback late */
 };
 
 /* Structure needed to register with printk */
index 297fb399363ccff00aef5904734a4c11c482a26a..620a917cd3a1550eccd34f914e6c5c0e4abd6003 100644 (file)
@@ -1255,7 +1255,7 @@ exit:
 EXPORT_SYMBOL_GPL(css_general_characteristics);
 EXPORT_SYMBOL_GPL(css_chsc_characteristics);
 
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta)
 {
        struct {
                struct chsc_header request;
@@ -1266,7 +1266,7 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
                unsigned int rsvd2[5];
                struct chsc_header response;
                unsigned int rsvd3[3];
-               u64 clock_delta;
+               s64 clock_delta;
                unsigned int rsvd4[2];
        } *rr;
        int rc;
index fdf16cb7088194cb319e6613c22239ead3d11cad..5c13d2079d96f32e8d85f208d09d74c48f94516a 100644 (file)
@@ -179,7 +179,7 @@ static int ap_qci_available(void)
  * ap_apft_available(): Test if AP facilities test (APFT)
  * facility is available.
  *
- * Returns 1 if APFT is is available.
+ * Returns 1 if APFT is available.
  */
 static int ap_apft_available(void)
 {
@@ -693,6 +693,24 @@ void ap_send_online_uevent(struct ap_device *ap_dev, int online)
 }
 EXPORT_SYMBOL(ap_send_online_uevent);
 
+static void ap_send_mask_changed_uevent(unsigned long *newapm,
+                                       unsigned long *newaqm)
+{
+       char buf[100];
+       char *envp[] = { buf, NULL };
+
+       if (newapm)
+               snprintf(buf, sizeof(buf),
+                        "APMASK=0x%016lx%016lx%016lx%016lx\n",
+                        newapm[0], newapm[1], newapm[2], newapm[3]);
+       else
+               snprintf(buf, sizeof(buf),
+                        "AQMASK=0x%016lx%016lx%016lx%016lx\n",
+                        newaqm[0], newaqm[1], newaqm[2], newaqm[3]);
+
+       kobject_uevent_env(&ap_root_device->kobj, KOBJ_CHANGE, envp);
+}
+
 /*
  * calc # of bound APQNs
  */
@@ -704,7 +722,7 @@ struct __ap_calc_ctrs {
 
 static int __ap_calc_helper(struct device *dev, void *arg)
 {
-       struct __ap_calc_ctrs *pctrs = (struct __ap_calc_ctrs *) arg;
+       struct __ap_calc_ctrs *pctrs = (struct __ap_calc_ctrs *)arg;
 
        if (is_queue_dev(dev)) {
                pctrs->apqns++;
@@ -720,7 +738,7 @@ static void ap_calc_bound_apqns(unsigned int *apqns, unsigned int *bound)
        struct __ap_calc_ctrs ctrs;
 
        memset(&ctrs, 0, sizeof(ctrs));
-       bus_for_each_dev(&ap_bus_type, NULL, (void *) &ctrs, __ap_calc_helper);
+       bus_for_each_dev(&ap_bus_type, NULL, (void *)&ctrs, __ap_calc_helper);
 
        *apqns = ctrs.apqns;
        *bound = ctrs.bound;
@@ -781,7 +799,7 @@ EXPORT_SYMBOL(ap_wait_init_apqn_bindings_complete);
 static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data)
 {
        if (is_queue_dev(dev) &&
-           AP_QID_CARD(to_ap_queue(dev)->qid) == (int)(long) data)
+           AP_QID_CARD(to_ap_queue(dev)->qid) == (int)(long)data)
                device_unregister(dev);
        return 0;
 }
@@ -794,8 +812,8 @@ static int __ap_revise_reserved(struct device *dev, void *dummy)
                card = AP_QID_CARD(to_ap_queue(dev)->qid);
                queue = AP_QID_QUEUE(to_ap_queue(dev)->qid);
                mutex_lock(&ap_perms_mutex);
-               devres = test_bit_inv(card, ap_perms.apm)
-                       && test_bit_inv(queue, ap_perms.aqm);
+               devres = test_bit_inv(card, ap_perms.apm) &&
+                       test_bit_inv(queue, ap_perms.aqm);
                mutex_unlock(&ap_perms_mutex);
                drvres = to_ap_drv(dev->driver)->flags
                        & AP_DRIVER_FLAG_DEFAULT;
@@ -826,8 +844,8 @@ int ap_owned_by_def_drv(int card, int queue)
 
        mutex_lock(&ap_perms_mutex);
 
-       if (test_bit_inv(card, ap_perms.apm)
-           && test_bit_inv(queue, ap_perms.aqm))
+       if (test_bit_inv(card, ap_perms.apm) &&
+           test_bit_inv(queue, ap_perms.aqm))
                rc = 1;
 
        mutex_unlock(&ap_perms_mutex);
@@ -876,8 +894,8 @@ static int ap_device_probe(struct device *dev)
                card = AP_QID_CARD(to_ap_queue(dev)->qid);
                queue = AP_QID_QUEUE(to_ap_queue(dev)->qid);
                mutex_lock(&ap_perms_mutex);
-               devres = test_bit_inv(card, ap_perms.apm)
-                       && test_bit_inv(queue, ap_perms.aqm);
+               devres = test_bit_inv(card, ap_perms.apm) &&
+                       test_bit_inv(queue, ap_perms.aqm);
                mutex_unlock(&ap_perms_mutex);
                drvres = ap_drv->flags & AP_DRIVER_FLAG_DEFAULT;
                if (!!devres != !!drvres)
@@ -898,8 +916,9 @@ static int ap_device_probe(struct device *dev)
                if (is_queue_dev(dev))
                        hash_del(&to_ap_queue(dev)->hnode);
                spin_unlock_bh(&ap_queues_lock);
-       } else
+       } else {
                ap_check_bindings_complete();
+       }
 
 out:
        if (rc)
@@ -980,8 +999,8 @@ void ap_bus_force_rescan(void)
 EXPORT_SYMBOL(ap_bus_force_rescan);
 
 /*
-* A config change has happened, force an ap bus rescan.
-*/
+ * A config change has happened, force an ap bus rescan.
+ */
 void ap_bus_cfg_chg(void)
 {
        AP_DBF_DBG("%s config change, forcing bus rescan\n", __func__);
@@ -1105,7 +1124,7 @@ int ap_parse_mask_str(const char *str,
        if (bits & 0x07)
                return -EINVAL;
 
-       size = BITS_TO_LONGS(bits)*sizeof(unsigned long);
+       size = BITS_TO_LONGS(bits) * sizeof(unsigned long);
        newmap = kmalloc(size, GFP_KERNEL);
        if (!newmap)
                return -ENOMEM;
@@ -1241,8 +1260,9 @@ static ssize_t poll_thread_store(struct bus_type *bus,
                rc = ap_poll_thread_start();
                if (rc)
                        count = rc;
-       } else
+       } else {
                ap_poll_thread_stop();
+       }
        return count;
 }
 
@@ -1355,7 +1375,7 @@ static int apmask_commit(unsigned long *newapm)
 static ssize_t apmask_store(struct bus_type *bus, const char *buf,
                            size_t count)
 {
-       int rc;
+       int rc, changes = 0;
        DECLARE_BITMAP(newapm, AP_DEVICES);
 
        if (mutex_lock_interruptible(&ap_perms_mutex))
@@ -1365,14 +1385,19 @@ static ssize_t apmask_store(struct bus_type *bus, const char *buf,
        if (rc)
                goto done;
 
-       rc = apmask_commit(newapm);
+       changes = memcmp(ap_perms.apm, newapm, APMASKSIZE);
+       if (changes)
+               rc = apmask_commit(newapm);
 
 done:
        mutex_unlock(&ap_perms_mutex);
        if (rc)
                return rc;
 
-       ap_bus_revise_bindings();
+       if (changes) {
+               ap_bus_revise_bindings();
+               ap_send_mask_changed_uevent(newapm, NULL);
+       }
 
        return count;
 }
@@ -1443,7 +1468,7 @@ static int aqmask_commit(unsigned long *newaqm)
 static ssize_t aqmask_store(struct bus_type *bus, const char *buf,
                            size_t count)
 {
-       int rc;
+       int rc, changes = 0;
        DECLARE_BITMAP(newaqm, AP_DOMAINS);
 
        if (mutex_lock_interruptible(&ap_perms_mutex))
@@ -1453,14 +1478,19 @@ static ssize_t aqmask_store(struct bus_type *bus, const char *buf,
        if (rc)
                goto done;
 
-       rc = aqmask_commit(newaqm);
+       changes = memcmp(ap_perms.aqm, newaqm, APMASKSIZE);
+       if (changes)
+               rc = aqmask_commit(newaqm);
 
 done:
        mutex_unlock(&ap_perms_mutex);
        if (rc)
                return rc;
 
-       ap_bus_revise_bindings();
+       if (changes) {
+               ap_bus_revise_bindings();
+               ap_send_mask_changed_uevent(NULL, newaqm);
+       }
 
        return count;
 }
@@ -1605,9 +1635,9 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
                apinfo.mode = (func >> 26) & 0x07;
                apinfo.cat = AP_DEVICE_TYPE_CEX8;
                status = ap_qact(qid, 0, &apinfo);
-               if (status.response_code == AP_RESPONSE_NORMAL
-                   && apinfo.cat >= AP_DEVICE_TYPE_CEX2A
-                   && apinfo.cat <= AP_DEVICE_TYPE_CEX8)
+               if (status.response_code == AP_RESPONSE_NORMAL &&
+                   apinfo.cat >= AP_DEVICE_TYPE_CEX2A &&
+                   apinfo.cat <= AP_DEVICE_TYPE_CEX8)
                        comp_type = apinfo.cat;
        }
        if (!comp_type)
@@ -1627,7 +1657,7 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
  */
 static int __match_card_device_with_id(struct device *dev, const void *data)
 {
-       return is_card_dev(dev) && to_ap_card(dev)->id == (int)(long)(void *) data;
+       return is_card_dev(dev) && to_ap_card(dev)->id == (int)(long)(void *)data;
 }
 
 /*
@@ -1636,7 +1666,7 @@ static int __match_card_device_with_id(struct device *dev, const void *data)
  */
 static int __match_queue_device_with_qid(struct device *dev, const void *data)
 {
-       return is_queue_dev(dev) && to_ap_queue(dev)->qid == (int)(long) data;
+       return is_queue_dev(dev) && to_ap_queue(dev)->qid == (int)(long)data;
 }
 
 /*
@@ -1645,8 +1675,8 @@ static int __match_queue_device_with_qid(struct device *dev, const void *data)
  */
 static int __match_queue_device_with_queue_id(struct device *dev, const void *data)
 {
-       return is_queue_dev(dev)
-               && AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long) data;
+       return is_queue_dev(dev) &&
+               AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long)data;
 }
 
 /* Helper function for notify_config_changed */
@@ -1699,7 +1729,7 @@ static inline void notify_scan_complete(void)
 static inline void ap_scan_rm_card_dev_and_queue_devs(struct ap_card *ac)
 {
        bus_for_each_dev(&ap_bus_type, NULL,
-                        (void *)(long) ac->id,
+                        (void *)(long)ac->id,
                         __ap_queue_devices_with_id_unregister);
        device_unregister(&ac->ap_dev.device);
 }
@@ -1727,7 +1757,7 @@ static inline void ap_scan_domains(struct ap_card *ac)
        for (dom = 0; dom <= ap_max_domain_id; dom++) {
                qid = AP_MKQID(ac->id, dom);
                dev = bus_find_device(&ap_bus_type, NULL,
-                                     (void *)(long) qid,
+                                     (void *)(long)qid,
                                      __match_queue_device_with_qid);
                aq = dev ? to_ap_queue(dev) : NULL;
                if (!ap_test_config_usage_domain(dom)) {
@@ -1873,7 +1903,7 @@ static inline void ap_scan_adapter(int ap)
 
        /* Is there currently a card device for this adapter ? */
        dev = bus_find_device(&ap_bus_type, NULL,
-                             (void *)(long) ap,
+                             (void *)(long)ap,
                              __match_card_device_with_id);
        ac = dev ? to_ap_card(dev) : NULL;
 
@@ -2074,7 +2104,7 @@ static void ap_scan_bus(struct work_struct *unused)
        if (ap_domain_index >= 0) {
                struct device *dev =
                        bus_find_device(&ap_bus_type, NULL,
-                                       (void *)(long) ap_domain_index,
+                                       (void *)(long)ap_domain_index,
                                        __match_queue_device_with_queue_id);
                if (dev)
                        put_device(dev);
@@ -2109,7 +2139,7 @@ static int __init ap_debug_init(void)
 
 static void __init ap_perms_init(void)
 {
-       /* all resources useable if no kernel parameter string given */
+       /* all resources usable if no kernel parameter string given */
        memset(&ap_perms.ioctlm, 0xFF, sizeof(ap_perms.ioctlm));
        memset(&ap_perms.apm, 0xFF, sizeof(ap_perms.apm));
        memset(&ap_perms.aqm, 0xFF, sizeof(ap_perms.aqm));
index 6a65885f5f43fcc821bd340dd214791569b66a6a..0c40af157df2339034ddb796018a12683a71a4b0 100644 (file)
@@ -317,6 +317,7 @@ struct ap_perms {
        unsigned long aqm[BITS_TO_LONGS(AP_DOMAINS)];
        unsigned long adm[BITS_TO_LONGS(AP_DOMAINS)];
 };
+
 extern struct ap_perms ap_perms;
 extern struct mutex ap_perms_mutex;
 
index 205045cd998de0d7f34e7e7aed7d88b78117cef0..c48b0db824e3e64786974e62de530fbfc17c0e14 100644 (file)
@@ -99,7 +99,7 @@ int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
 {
        struct ap_queue_status status;
 
-       if (msg == NULL)
+       if (!msg)
                return -EINVAL;
        status = ap_dqap(qid, psmid, msg, length, NULL, NULL);
        switch (status.response_code) {
@@ -603,7 +603,7 @@ static ssize_t interrupt_show(struct device *dev,
 static DEVICE_ATTR_RO(interrupt);
 
 static ssize_t config_show(struct device *dev,
-                            struct device_attribute *attr, char *buf)
+                          struct device_attribute *attr, char *buf)
 {
        struct ap_queue *aq = to_ap_queue(dev);
        int rc;
@@ -827,8 +827,9 @@ int ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg)
                aq->requestq_count++;
                aq->total_request_count++;
                atomic64_inc(&aq->card->total_request_count);
-       } else
+       } else {
                rc = -ENODEV;
+       }
 
        /* Send/receive as many request from the queue as possible. */
        ap_wait(ap_sm_event_loop(aq, AP_SM_EVENT_POLL));
index 7f69ca695fc2a83392e727654668d1220969533e..7329caa7d46791c2c301468dc55a48fc72164c60 100644 (file)
@@ -232,7 +232,7 @@ static int pkey_ep11key2pkey(const u8 *key, struct pkey_protkey *pkey)
        int i, rc;
        u16 card, dom;
        u32 nr_apqns, *apqns = NULL;
-       struct ep11keyblob *kb = (struct ep11keyblob *) key;
+       struct ep11keyblob *kb = (struct ep11keyblob *)key;
 
        zcrypt_wait_api_operational();
 
@@ -267,12 +267,12 @@ static int pkey_verifykey(const struct pkey_seckey *seckey,
                          u16 *pcardnr, u16 *pdomain,
                          u16 *pkeysize, u32 *pattributes)
 {
-       struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
+       struct secaeskeytoken *t = (struct secaeskeytoken *)seckey;
        u16 cardnr, domain;
        int rc;
 
        /* check the secure key for valid AES secure key */
-       rc = cca_check_secaeskeytoken(debug_info, 3, (u8 *) seckey, 0);
+       rc = cca_check_secaeskeytoken(debug_info, 3, (u8 *)seckey, 0);
        if (rc)
                goto out;
        if (pattributes)
@@ -425,9 +425,9 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
                t = (struct clearaeskeytoken *)key;
                if (keylen != sizeof(*t) + t->len)
                        goto out;
-               if ((t->keytype == PKEY_KEYTYPE_AES_128 && t->len == 16)
-                   || (t->keytype == PKEY_KEYTYPE_AES_192 && t->len == 24)
-                   || (t->keytype == PKEY_KEYTYPE_AES_256 && t->len == 32))
+               if ((t->keytype == PKEY_KEYTYPE_AES_128 && t->len == 16) ||
+                   (t->keytype == PKEY_KEYTYPE_AES_192 && t->len == 24) ||
+                   (t->keytype == PKEY_KEYTYPE_AES_256 && t->len == 32))
                        memcpy(ckey.clrkey, t->clearkey, t->len);
                else
                        goto out;
@@ -541,7 +541,6 @@ int pkey_keyblob2pkey(const u8 *key, u32 keylen,
 
        DEBUG_DBG("%s rc=%d\n", __func__, rc);
        return rc;
-
 }
 EXPORT_SYMBOL(pkey_keyblob2pkey);
 
@@ -588,9 +587,11 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
                } else if (ktype == PKEY_TYPE_CCA_DATA) {
                        rc = cca_genseckey(card, dom, ksize, keybuf);
                        *keybufsize = (rc ? 0 : SECKEYBLOBSIZE);
-               } else /* TOKVER_CCA_VLSC */
+               } else {
+                       /* TOKVER_CCA_VLSC */
                        rc = cca_gencipherkey(card, dom, ksize, kflags,
                                              keybuf, keybufsize);
+               }
                if (rc == 0)
                        break;
        }
@@ -645,9 +646,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
                        rc = cca_clr2seckey(card, dom, ksize,
                                            clrkey, keybuf);
                        *keybufsize = (rc ? 0 : SECKEYBLOBSIZE);
-               } else /* TOKVER_CCA_VLSC */
+               } else {
+                       /* TOKVER_CCA_VLSC */
                        rc = cca_clr2cipherkey(card, dom, ksize, kflags,
                                               clrkey, keybuf, keybufsize);
+               }
                if (rc == 0)
                        break;
        }
@@ -667,8 +670,8 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
        if (keylen < sizeof(struct keytoken_header))
                return -EINVAL;
 
-       if (hdr->type == TOKTYPE_CCA_INTERNAL
-           && hdr->version == TOKVER_CCA_AES) {
+       if (hdr->type == TOKTYPE_CCA_INTERNAL &&
+           hdr->version == TOKVER_CCA_AES) {
                struct secaeskeytoken *t = (struct secaeskeytoken *)key;
 
                rc = cca_check_secaeskeytoken(debug_info, 3, key, 0);
@@ -677,7 +680,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
                if (ktype)
                        *ktype = PKEY_TYPE_CCA_DATA;
                if (ksize)
-                       *ksize = (enum pkey_key_size) t->bitsize;
+                       *ksize = (enum pkey_key_size)t->bitsize;
 
                rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
                                   ZCRYPT_CEX3C, AES_MK_SET, t->mkvp, 0, 1);
@@ -697,8 +700,8 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
                *cardnr = ((struct pkey_apqn *)_apqns)->card;
                *domain = ((struct pkey_apqn *)_apqns)->domain;
 
-       } else if (hdr->type == TOKTYPE_CCA_INTERNAL
-                  && hdr->version == TOKVER_CCA_VLSC) {
+       } else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
+                  hdr->version == TOKVER_CCA_VLSC) {
                struct cipherkeytoken *t = (struct cipherkeytoken *)key;
 
                rc = cca_check_secaescipherkey(debug_info, 3, key, 0, 1);
@@ -734,8 +737,8 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
                *cardnr = ((struct pkey_apqn *)_apqns)->card;
                *domain = ((struct pkey_apqn *)_apqns)->domain;
 
-       } else if (hdr->type == TOKTYPE_NON_CCA
-                  && hdr->version == TOKVER_EP11_AES) {
+       } else if (hdr->type == TOKTYPE_NON_CCA &&
+                  hdr->version == TOKVER_EP11_AES) {
                struct ep11keyblob *kb = (struct ep11keyblob *)key;
 
                rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
@@ -757,8 +760,9 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
                *cardnr = ((struct pkey_apqn *)_apqns)->card;
                *domain = ((struct pkey_apqn *)_apqns)->domain;
 
-       } else
+       } else {
                rc = -EINVAL;
+       }
 
 out:
        kfree(_apqns);
@@ -816,16 +820,17 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
        for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
                card = apqns[i].card;
                dom = apqns[i].domain;
-               if (hdr->type == TOKTYPE_CCA_INTERNAL
-                   && hdr->version == TOKVER_CCA_AES)
+               if (hdr->type == TOKTYPE_CCA_INTERNAL &&
+                   hdr->version == TOKVER_CCA_AES) {
                        rc = cca_sec2protkey(card, dom, key, pkey->protkey,
                                             &pkey->len, &pkey->type);
-               else if (hdr->type == TOKTYPE_CCA_INTERNAL
-                        && hdr->version == TOKVER_CCA_VLSC)
+               } else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
+                          hdr->version == TOKVER_CCA_VLSC) {
                        rc = cca_cipher2protkey(card, dom, key, pkey->protkey,
                                                &pkey->len, &pkey->type);
-               else { /* EP11 AES secure key blob */
-                       struct ep11keyblob *kb = (struct ep11keyblob *) key;
+               } else {
+                       /* EP11 AES secure key blob */
+                       struct ep11keyblob *kb = (struct ep11keyblob *)key;
 
                        pkey->len = sizeof(pkey->protkey);
                        rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
@@ -851,10 +856,10 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
 
        zcrypt_wait_api_operational();
 
-       if (hdr->type == TOKTYPE_NON_CCA
-           && (hdr->version == TOKVER_EP11_AES_WITH_HEADER
-               || hdr->version == TOKVER_EP11_ECC_WITH_HEADER)
-           && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
+       if (hdr->type == TOKTYPE_NON_CCA &&
+           (hdr->version == TOKVER_EP11_AES_WITH_HEADER ||
+            hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
+           is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
                int minhwtype = 0, api = 0;
                struct ep11keyblob *kb = (struct ep11keyblob *)
                        (key + sizeof(struct ep11kblob_header));
@@ -869,11 +874,11 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
                                    minhwtype, api, kb->wkvp);
                if (rc)
                        goto out;
-       } else if (hdr->type == TOKTYPE_NON_CCA
-                  && hdr->version == TOKVER_EP11_AES
-                  && is_ep11_keyblob(key)) {
+       } else if (hdr->type == TOKTYPE_NON_CCA &&
+                  hdr->version == TOKVER_EP11_AES &&
+                  is_ep11_keyblob(key)) {
                int minhwtype = 0, api = 0;
-               struct ep11keyblob *kb = (struct ep11keyblob *) key;
+               struct ep11keyblob *kb = (struct ep11keyblob *)key;
 
                if (flags != PKEY_FLAGS_MATCH_CUR_MKVP)
                        return -EINVAL;
@@ -931,8 +936,9 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
                                   cur_mkvp, old_mkvp, 1);
                if (rc)
                        goto out;
-       } else
+       } else {
                return -EINVAL;
+       }
 
        if (apqns) {
                if (*nr_apqns < _nr_apqns)
@@ -961,9 +967,9 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype,
                int minhwtype = ZCRYPT_CEX3C;
 
                if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
-                       cur_mkvp = *((u64 *) cur_mkvp);
+                       cur_mkvp = *((u64 *)cur_mkvp);
                if (flags & PKEY_FLAGS_MATCH_ALT_MKVP)
-                       old_mkvp = *((u64 *) alt_mkvp);
+                       old_mkvp = *((u64 *)alt_mkvp);
                if (ktype == PKEY_TYPE_CCA_CIPHER)
                        minhwtype = ZCRYPT_CEX6;
                rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
@@ -975,9 +981,9 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype,
                u64 cur_mkvp = 0, old_mkvp = 0;
 
                if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
-                       cur_mkvp = *((u64 *) cur_mkvp);
+                       cur_mkvp = *((u64 *)cur_mkvp);
                if (flags & PKEY_FLAGS_MATCH_ALT_MKVP)
-                       old_mkvp = *((u64 *) alt_mkvp);
+                       old_mkvp = *((u64 *)alt_mkvp);
                rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
                                   ZCRYPT_CEX7, APKA_MK_SET,
                                   cur_mkvp, old_mkvp, 1);
@@ -996,8 +1002,9 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype,
                if (rc)
                        goto out;
 
-       } else
+       } else {
                return -EINVAL;
+       }
 
        if (apqns) {
                if (*nr_apqns < _nr_apqns)
@@ -1026,21 +1033,21 @@ static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns,
        if (keylen < sizeof(struct keytoken_header))
                return -EINVAL;
 
-       if (hdr->type == TOKTYPE_NON_CCA
-           && hdr->version == TOKVER_EP11_AES_WITH_HEADER
-           && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
+       if (hdr->type == TOKTYPE_NON_CCA &&
+           hdr->version == TOKVER_EP11_AES_WITH_HEADER &&
+           is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
                /* EP11 AES key blob with header */
                if (ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1))
                        return -EINVAL;
-       } else if (hdr->type == TOKTYPE_NON_CCA
-                  && hdr->version == TOKVER_EP11_ECC_WITH_HEADER
-                  && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
+       } else if (hdr->type == TOKTYPE_NON_CCA &&
+                  hdr->version == TOKVER_EP11_ECC_WITH_HEADER &&
+                  is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) {
                /* EP11 ECC key blob with header */
                if (ep11_check_ecc_key_with_hdr(debug_info, 3, key, keylen, 1))
                        return -EINVAL;
-       } else if (hdr->type == TOKTYPE_NON_CCA
-                  && hdr->version == TOKVER_EP11_AES
-                  && is_ep11_keyblob(key)) {
+       } else if (hdr->type == TOKTYPE_NON_CCA &&
+                  hdr->version == TOKVER_EP11_AES &&
+                  is_ep11_keyblob(key)) {
                /* EP11 AES key blob with header in session field */
                if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
                        return -EINVAL;
@@ -1088,15 +1095,15 @@ static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns,
        for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) {
                card = apqns[i].card;
                dom = apqns[i].domain;
-               if (hdr->type == TOKTYPE_NON_CCA
-                   && (hdr->version == TOKVER_EP11_AES_WITH_HEADER
-                       || hdr->version == TOKVER_EP11_ECC_WITH_HEADER)
-                   && is_ep11_keyblob(key + sizeof(struct ep11kblob_header)))
+               if (hdr->type == TOKTYPE_NON_CCA &&
+                   (hdr->version == TOKVER_EP11_AES_WITH_HEADER ||
+                    hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
+                   is_ep11_keyblob(key + sizeof(struct ep11kblob_header)))
                        rc = ep11_kblob2protkey(card, dom, key, hdr->len,
                                                protkey, protkeylen, protkeytype);
-               else if (hdr->type == TOKTYPE_NON_CCA
-                        && hdr->version == TOKVER_EP11_AES
-                        && is_ep11_keyblob(key))
+               else if (hdr->type == TOKTYPE_NON_CCA &&
+                        hdr->version == TOKVER_EP11_AES &&
+                        is_ep11_keyblob(key))
                        rc = ep11_kblob2protkey(card, dom, key, hdr->len,
                                                protkey, protkeylen, protkeytype);
                else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
@@ -1144,7 +1151,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
 
        switch (cmd) {
        case PKEY_GENSECK: {
-               struct pkey_genseck __user *ugs = (void __user *) arg;
+               struct pkey_genseck __user *ugs = (void __user *)arg;
                struct pkey_genseck kgs;
 
                if (copy_from_user(&kgs, ugs, sizeof(kgs)))
@@ -1159,7 +1166,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_CLR2SECK: {
-               struct pkey_clr2seck __user *ucs = (void __user *) arg;
+               struct pkey_clr2seck __user *ucs = (void __user *)arg;
                struct pkey_clr2seck kcs;
 
                if (copy_from_user(&kcs, ucs, sizeof(kcs)))
@@ -1175,7 +1182,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_SEC2PROTK: {
-               struct pkey_sec2protk __user *usp = (void __user *) arg;
+               struct pkey_sec2protk __user *usp = (void __user *)arg;
                struct pkey_sec2protk ksp;
 
                if (copy_from_user(&ksp, usp, sizeof(ksp)))
@@ -1191,7 +1198,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_CLR2PROTK: {
-               struct pkey_clr2protk __user *ucp = (void __user *) arg;
+               struct pkey_clr2protk __user *ucp = (void __user *)arg;
                struct pkey_clr2protk kcp;
 
                if (copy_from_user(&kcp, ucp, sizeof(kcp)))
@@ -1207,7 +1214,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_FINDCARD: {
-               struct pkey_findcard __user *ufc = (void __user *) arg;
+               struct pkey_findcard __user *ufc = (void __user *)arg;
                struct pkey_findcard kfc;
 
                if (copy_from_user(&kfc, ufc, sizeof(kfc)))
@@ -1222,7 +1229,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_SKEY2PKEY: {
-               struct pkey_skey2pkey __user *usp = (void __user *) arg;
+               struct pkey_skey2pkey __user *usp = (void __user *)arg;
                struct pkey_skey2pkey ksp;
 
                if (copy_from_user(&ksp, usp, sizeof(ksp)))
@@ -1236,7 +1243,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_VERIFYKEY: {
-               struct pkey_verifykey __user *uvk = (void __user *) arg;
+               struct pkey_verifykey __user *uvk = (void __user *)arg;
                struct pkey_verifykey kvk;
 
                if (copy_from_user(&kvk, uvk, sizeof(kvk)))
@@ -1251,7 +1258,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_GENPROTK: {
-               struct pkey_genprotk __user *ugp = (void __user *) arg;
+               struct pkey_genprotk __user *ugp = (void __user *)arg;
                struct pkey_genprotk kgp;
 
                if (copy_from_user(&kgp, ugp, sizeof(kgp)))
@@ -1265,7 +1272,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_VERIFYPROTK: {
-               struct pkey_verifyprotk __user *uvp = (void __user *) arg;
+               struct pkey_verifyprotk __user *uvp = (void __user *)arg;
                struct pkey_verifyprotk kvp;
 
                if (copy_from_user(&kvp, uvp, sizeof(kvp)))
@@ -1275,7 +1282,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_KBLOB2PROTK: {
-               struct pkey_kblob2pkey __user *utp = (void __user *) arg;
+               struct pkey_kblob2pkey __user *utp = (void __user *)arg;
                struct pkey_kblob2pkey ktp;
                u8 *kkey;
 
@@ -1294,7 +1301,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_GENSECK2: {
-               struct pkey_genseck2 __user *ugs = (void __user *) arg;
+               struct pkey_genseck2 __user *ugs = (void __user *)arg;
                struct pkey_genseck2 kgs;
                struct pkey_apqn *apqns;
                size_t klen = KEYBLOBBUFSIZE;
@@ -1336,7 +1343,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_CLR2SECK2: {
-               struct pkey_clr2seck2 __user *ucs = (void __user *) arg;
+               struct pkey_clr2seck2 __user *ucs = (void __user *)arg;
                struct pkey_clr2seck2 kcs;
                struct pkey_apqn *apqns;
                size_t klen = KEYBLOBBUFSIZE;
@@ -1379,7 +1386,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_VERIFYKEY2: {
-               struct pkey_verifykey2 __user *uvk = (void __user *) arg;
+               struct pkey_verifykey2 __user *uvk = (void __user *)arg;
                struct pkey_verifykey2 kvk;
                u8 *kkey;
 
@@ -1400,7 +1407,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_KBLOB2PROTK2: {
-               struct pkey_kblob2pkey2 __user *utp = (void __user *) arg;
+               struct pkey_kblob2pkey2 __user *utp = (void __user *)arg;
                struct pkey_kblob2pkey2 ktp;
                struct pkey_apqn *apqns = NULL;
                u8 *kkey;
@@ -1427,7 +1434,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_APQNS4K: {
-               struct pkey_apqns4key __user *uak = (void __user *) arg;
+               struct pkey_apqns4key __user *uak = (void __user *)arg;
                struct pkey_apqns4key kak;
                struct pkey_apqn *apqns = NULL;
                size_t nr_apqns, len;
@@ -1476,7 +1483,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_APQNS4KT: {
-               struct pkey_apqns4keytype __user *uat = (void __user *) arg;
+               struct pkey_apqns4keytype __user *uat = (void __user *)arg;
                struct pkey_apqns4keytype kat;
                struct pkey_apqn *apqns = NULL;
                size_t nr_apqns, len;
@@ -1518,7 +1525,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                break;
        }
        case PKEY_KBLOB2PROTK3: {
-               struct pkey_kblob2pkey3 __user *utp = (void __user *) arg;
+               struct pkey_kblob2pkey3 __user *utp = (void __user *)arg;
                struct pkey_kblob2pkey3 ktp;
                struct pkey_apqn *apqns = NULL;
                u32 protkeylen = PROTKEYBLOBBUFSIZE;
@@ -1708,7 +1715,7 @@ static ssize_t pkey_ccadata_aes_attr_read(u32 keytype, bool is_xts, char *buf,
                                          loff_t off, size_t count)
 {
        int rc;
-       struct pkey_seckey *seckey = (struct pkey_seckey *) buf;
+       struct pkey_seckey *seckey = (struct pkey_seckey *)buf;
 
        if (off != 0 || count < sizeof(struct secaeskeytoken))
                return -EINVAL;
index 29ebd54f89191568f7b64ce1a41e611ffca2f5bd..4ac9c6521ec162ad523be7fd6a7974c16ca21683 100644 (file)
@@ -46,8 +46,6 @@ static struct ap_device_id ap_queue_ids[] = {
        { /* end of sibling */ },
 };
 
-MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
-
 static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q)
 {
        struct ap_matrix_mdev *matrix_mdev;
index aa6dc3c0c353d7eaf0594e0495189a3119d469f8..f94b43ce9a65836690ffe3a57688e5d340d63a58 100644 (file)
@@ -104,7 +104,7 @@ struct zcrypt_ops *zcrypt_msgtype(unsigned char *name, int variant)
        struct zcrypt_ops *zops;
 
        list_for_each_entry(zops, &zcrypt_ops_list, list)
-               if ((zops->variant == variant) &&
+               if (zops->variant == variant &&
                    (!strncmp(zops->name, name, sizeof(zops->name))))
                        return zops;
        return NULL;
@@ -438,8 +438,8 @@ static int zcdn_create(const char *name)
                strncpy(nodename, name, sizeof(nodename));
        else
                snprintf(nodename, sizeof(nodename),
-                        ZCRYPT_NAME "_%d", (int) MINOR(devt));
-       nodename[sizeof(nodename)-1] = '\0';
+                        ZCRYPT_NAME "_%d", (int)MINOR(devt));
+       nodename[sizeof(nodename) - 1] = '\0';
        if (dev_set_name(&zcdndev->device, nodename)) {
                rc = -EINVAL;
                goto unlockout;
@@ -519,7 +519,7 @@ static ssize_t zcrypt_read(struct file *filp, char __user *buf,
 /*
  * zcrypt_write(): Not allowed.
  *
- * Write is is not allowed
+ * Write is not allowed
  */
 static ssize_t zcrypt_write(struct file *filp, const char __user *buf,
                            size_t count, loff_t *f_pos)
@@ -549,7 +549,7 @@ static int zcrypt_open(struct inode *inode, struct file *filp)
                        perms = &zcdndev->perms;
        }
 #endif
-       filp->private_data = (void *) perms;
+       filp->private_data = (void *)perms;
 
        atomic_inc(&zcrypt_open_count);
        return stream_open(inode, filp);
@@ -713,7 +713,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
        pref_zq = NULL;
        spin_lock(&zcrypt_list_lock);
        for_each_zcrypt_card(zc) {
-               /* Check for useable accelarator or CCA card */
+               /* Check for usable accelarator or CCA card */
                if (!zc->online || !zc->card->config || zc->card->chkstop ||
                    !(zc->card->functions & 0x18000000))
                        continue;
@@ -733,7 +733,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
                if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
                        continue;
                for_each_zcrypt_queue(zq, zc) {
-                       /* check if device is useable and eligible */
+                       /* check if device is usable and eligible */
                        if (!zq->online || !zq->ops->rsa_modexpo ||
                            !zq->queue->config || zq->queue->chkstop)
                                continue;
@@ -823,7 +823,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
        pref_zq = NULL;
        spin_lock(&zcrypt_list_lock);
        for_each_zcrypt_card(zc) {
-               /* Check for useable accelarator or CCA card */
+               /* Check for usable accelarator or CCA card */
                if (!zc->online || !zc->card->config || zc->card->chkstop ||
                    !(zc->card->functions & 0x18000000))
                        continue;
@@ -843,7 +843,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
                if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
                        continue;
                for_each_zcrypt_queue(zq, zc) {
-                       /* check if device is useable and eligible */
+                       /* check if device is usable and eligible */
                        if (!zq->online || !zq->ops->rsa_modexpo_crt ||
                            !zq->queue->config || zq->queue->chkstop)
                                continue;
@@ -893,7 +893,7 @@ out:
 
 static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
                              struct zcrypt_track *tr,
-                             struct ica_xcRB *xcRB)
+                             struct ica_xcRB *xcrb)
 {
        struct zcrypt_card *zc, *pref_zc;
        struct zcrypt_queue *zq, *pref_zq;
@@ -904,9 +904,9 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
        int cpen, qpen, qid = 0, rc = -ENODEV;
        struct module *mod;
 
-       trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB);
+       trace_s390_zcrypt_req(xcrb, TB_ZSECSENDCPRB);
 
-       xcRB->status = 0;
+       xcrb->status = 0;
        ap_init_message(&ap_msg);
 
 #ifdef CONFIG_ZCRYPT_DEBUG
@@ -915,11 +915,11 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
        if (tr && tr->fi.action == AP_FI_ACTION_CCA_AGENT_FF) {
                ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid agent_ID 'FF'\n",
                                __func__, tr->fi.cmd);
-               xcRB->agent_ID = 0x4646;
+               xcrb->agent_ID = 0x4646;
        }
 #endif
 
-       rc = prep_cca_ap_msg(userspace, xcRB, &ap_msg, &func_code, &domain);
+       rc = prep_cca_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain);
        if (rc)
                goto out;
 
@@ -948,13 +948,13 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
        pref_zq = NULL;
        spin_lock(&zcrypt_list_lock);
        for_each_zcrypt_card(zc) {
-               /* Check for useable CCA card */
+               /* Check for usable CCA card */
                if (!zc->online || !zc->card->config || zc->card->chkstop ||
                    !(zc->card->functions & 0x10000000))
                        continue;
                /* Check for user selected CCA card */
-               if (xcRB->user_defined != AUTOSELECT &&
-                   xcRB->user_defined != zc->card->id)
+               if (xcrb->user_defined != AUTOSELECT &&
+                   xcrb->user_defined != zc->card->id)
                        continue;
                /* check if request size exceeds card max msg size */
                if (ap_msg.len > zc->card->maxmsgsize)
@@ -971,7 +971,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
                if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
                        continue;
                for_each_zcrypt_queue(zq, zc) {
-                       /* check for device useable and eligible */
+                       /* check for device usable and eligible */
                        if (!zq->online || !zq->ops->send_cprb ||
                            !zq->queue->config || zq->queue->chkstop ||
                            (tdom != AUTOSEL_DOM &&
@@ -998,7 +998,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
 
        if (!pref_zq) {
                ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n",
-                              __func__, xcRB->user_defined, *domain);
+                              __func__, xcrb->user_defined, *domain);
                rc = -ENODEV;
                goto out;
        }
@@ -1016,7 +1016,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms,
        }
 #endif
 
-       rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcRB, &ap_msg);
+       rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcrb, &ap_msg);
 
        spin_lock(&zcrypt_list_lock);
        zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt);
@@ -1028,14 +1028,14 @@ out:
                tr->last_rc = rc;
                tr->last_qid = qid;
        }
-       trace_s390_zcrypt_rep(xcRB, func_code, rc,
+       trace_s390_zcrypt_rep(xcrb, func_code, rc,
                              AP_QID_CARD(qid), AP_QID_QUEUE(qid));
        return rc;
 }
 
-long zcrypt_send_cprb(struct ica_xcRB *xcRB)
+long zcrypt_send_cprb(struct ica_xcRB *xcrb)
 {
-       return _zcrypt_send_cprb(false, &ap_perms, NULL, xcRB);
+       return _zcrypt_send_cprb(false, &ap_perms, NULL, xcrb);
 }
 EXPORT_SYMBOL(zcrypt_send_cprb);
 
@@ -1089,7 +1089,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
                ap_msg.fi.cmd = tr->fi.cmd;
 #endif
 
-       target_num = (unsigned short) xcrb->targets_num;
+       target_num = (unsigned short)xcrb->targets_num;
 
        /* empty list indicates autoselect (all available targets) */
        targets = NULL;
@@ -1103,9 +1103,9 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
                        goto out;
                }
 
-               uptr = (struct ep11_target_dev __force __user *) xcrb->targets;
+               uptr = (struct ep11_target_dev __force __user *)xcrb->targets;
                if (z_copy_from_user(userspace, targets, uptr,
-                                  target_num * sizeof(*targets))) {
+                                    target_num * sizeof(*targets))) {
                        func_code = 0;
                        rc = -EFAULT;
                        goto out_free;
@@ -1132,7 +1132,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
        pref_zq = NULL;
        spin_lock(&zcrypt_list_lock);
        for_each_zcrypt_card(zc) {
-               /* Check for useable EP11 card */
+               /* Check for usable EP11 card */
                if (!zc->online || !zc->card->config || zc->card->chkstop ||
                    !(zc->card->functions & 0x04000000))
                        continue;
@@ -1155,7 +1155,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
                if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt))
                        continue;
                for_each_zcrypt_queue(zq, zc) {
-                       /* check if device is useable and eligible */
+                       /* check if device is usable and eligible */
                        if (!zq->online || !zq->ops->send_ep11_cprb ||
                            !zq->queue->config || zq->queue->chkstop ||
                            (targets &&
@@ -1184,11 +1184,11 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms,
        if (!pref_zq) {
                if (targets && target_num == 1) {
                        ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n",
-                                      __func__, (int) targets->ap_id,
-                                      (int) targets->dom_id);
+                                      __func__, (int)targets->ap_id,
+                                      (int)targets->dom_id);
                } else if (targets) {
                        ZCRYPT_DBF_DBG("%s no match for %d target addrs => ENODEV\n",
-                                      __func__, (int) target_num);
+                                      __func__, (int)target_num);
                } else {
                        ZCRYPT_DBF_DBG("%s no match for address ff.ffff => ENODEV\n",
                                       __func__);
@@ -1245,7 +1245,7 @@ static long zcrypt_rng(char *buffer)
        pref_zq = NULL;
        spin_lock(&zcrypt_list_lock);
        for_each_zcrypt_card(zc) {
-               /* Check for useable CCA card */
+               /* Check for usable CCA card */
                if (!zc->online || !zc->card->config || zc->card->chkstop ||
                    !(zc->card->functions & 0x10000000))
                        continue;
@@ -1254,7 +1254,7 @@ static long zcrypt_rng(char *buffer)
                if (!zcrypt_card_compare(zc, pref_zc, wgt, pref_wgt))
                        continue;
                for_each_zcrypt_queue(zq, zc) {
-                       /* check if device is useable and eligible */
+                       /* check if device is usable and eligible */
                        if (!zq->online || !zq->ops->rng ||
                            !zq->queue->config || zq->queue->chkstop)
                                continue;
@@ -1270,7 +1270,7 @@ static long zcrypt_rng(char *buffer)
 
        if (!pref_zq) {
                ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n",
-                       __func__);
+                              __func__);
                rc = -ENODEV;
                goto out;
        }
@@ -1381,8 +1381,8 @@ static void zcrypt_status_mask(char status[], size_t max_adapters)
        for_each_zcrypt_card(zc) {
                for_each_zcrypt_queue(zq, zc) {
                        card = AP_QID_CARD(zq->queue->qid);
-                       if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index
-                           || card >= max_adapters)
+                       if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index ||
+                           card >= max_adapters)
                                continue;
                        status[card] = zc->online ? zc->user_space_type : 0x0d;
                }
@@ -1402,8 +1402,8 @@ static void zcrypt_qdepth_mask(char qdepth[], size_t max_adapters)
        for_each_zcrypt_card(zc) {
                for_each_zcrypt_queue(zq, zc) {
                        card = AP_QID_CARD(zq->queue->qid);
-                       if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index
-                           || card >= max_adapters)
+                       if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index ||
+                           card >= max_adapters)
                                continue;
                        spin_lock(&zq->queue->lock);
                        qdepth[card] =
@@ -1429,13 +1429,13 @@ static void zcrypt_perdev_reqcnt(u32 reqcnt[], size_t max_adapters)
        for_each_zcrypt_card(zc) {
                for_each_zcrypt_queue(zq, zc) {
                        card = AP_QID_CARD(zq->queue->qid);
-                       if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index
-                           || card >= max_adapters)
+                       if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index ||
+                           card >= max_adapters)
                                continue;
                        spin_lock(&zq->queue->lock);
                        cnt = zq->queue->total_request_count;
                        spin_unlock(&zq->queue->lock);
-                       reqcnt[card] = (cnt < UINT_MAX) ? (u32) cnt : UINT_MAX;
+                       reqcnt[card] = (cnt < UINT_MAX) ? (u32)cnt : UINT_MAX;
                }
        }
        local_bh_enable();
@@ -1493,7 +1493,7 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg)
        int rc;
        struct zcrypt_track tr;
        struct ica_rsa_modexpo mex;
-       struct ica_rsa_modexpo __user *umex = (void __user *) arg;
+       struct ica_rsa_modexpo __user *umex = (void __user *)arg;
 
        memset(&tr, 0, sizeof(tr));
        if (copy_from_user(&mex, umex, sizeof(mex)))
@@ -1538,7 +1538,7 @@ static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg)
        int rc;
        struct zcrypt_track tr;
        struct ica_rsa_modexpo_crt crt;
-       struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg;
+       struct ica_rsa_modexpo_crt __user *ucrt = (void __user *)arg;
 
        memset(&tr, 0, sizeof(tr));
        if (copy_from_user(&crt, ucrt, sizeof(crt)))
@@ -1581,25 +1581,25 @@ static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg)
 static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
 {
        int rc;
-       struct ica_xcRB xcRB;
+       struct ica_xcRB xcrb;
        struct zcrypt_track tr;
-       struct ica_xcRB __user *uxcRB = (void __user *) arg;
+       struct ica_xcRB __user *uxcrb = (void __user *)arg;
 
        memset(&tr, 0, sizeof(tr));
-       if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB)))
+       if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb)))
                return -EFAULT;
 
 #ifdef CONFIG_ZCRYPT_DEBUG
-       if ((xcRB.status & 0x8000FFFF) == 0x80004649 /* 'FI' */) {
+       if ((xcrb.status & 0x8000FFFF) == 0x80004649 /* 'FI' */) {
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
-               tr.fi.cmd = (u16)(xcRB.status >> 16);
+               tr.fi.cmd = (u16)(xcrb.status >> 16);
        }
-       xcRB.status = 0;
+       xcrb.status = 0;
 #endif
 
        do {
-               rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB);
+               rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb);
                if (rc == -EAGAIN)
                        tr.again_counter++;
 #ifdef CONFIG_ZCRYPT_DEBUG
@@ -1610,7 +1610,7 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
        /* on failure: retry once again after a requested rescan */
        if ((rc == -ENODEV) && (zcrypt_process_rescan()))
                do {
-                       rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB);
+                       rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb);
                        if (rc == -EAGAIN)
                                tr.again_counter++;
                } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
@@ -1618,8 +1618,8 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
                rc = -EIO;
        if (rc)
                ZCRYPT_DBF_DBG("ioctl ZSENDCPRB rc=%d status=0x%x\n",
-                              rc, xcRB.status);
-       if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB)))
+                              rc, xcrb.status);
+       if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
                return -EFAULT;
        return rc;
 }
@@ -1674,7 +1674,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 {
        int rc;
        struct ap_perms *perms =
-               (struct ap_perms *) filp->private_data;
+               (struct ap_perms *)filp->private_data;
 
        rc = zcrypt_check_ioctl(perms, cmd);
        if (rc)
@@ -1698,7 +1698,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                if (!device_status)
                        return -ENOMEM;
                zcrypt_device_status_mask_ext(device_status);
-               if (copy_to_user((char __user *) arg, device_status,
+               if (copy_to_user((char __user *)arg, device_status,
                                 total_size))
                        rc = -EFAULT;
                kfree(device_status);
@@ -1708,7 +1708,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                char status[AP_DEVICES];
 
                zcrypt_status_mask(status, AP_DEVICES);
-               if (copy_to_user((char __user *) arg, status, sizeof(status)))
+               if (copy_to_user((char __user *)arg, status, sizeof(status)))
                        return -EFAULT;
                return 0;
        }
@@ -1716,7 +1716,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                char qdepth[AP_DEVICES];
 
                zcrypt_qdepth_mask(qdepth, AP_DEVICES);
-               if (copy_to_user((char __user *) arg, qdepth, sizeof(qdepth)))
+               if (copy_to_user((char __user *)arg, qdepth, sizeof(qdepth)))
                        return -EFAULT;
                return 0;
        }
@@ -1727,21 +1727,21 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                if (!reqcnt)
                        return -ENOMEM;
                zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES);
-               if (copy_to_user((int __user *) arg, reqcnt,
+               if (copy_to_user((int __user *)arg, reqcnt,
                                 sizeof(u32) * AP_DEVICES))
                        rc = -EFAULT;
                kfree(reqcnt);
                return rc;
        }
        case Z90STAT_REQUESTQ_COUNT:
-               return put_user(zcrypt_requestq_count(), (int __user *) arg);
+               return put_user(zcrypt_requestq_count(), (int __user *)arg);
        case Z90STAT_PENDINGQ_COUNT:
-               return put_user(zcrypt_pendingq_count(), (int __user *) arg);
+               return put_user(zcrypt_pendingq_count(), (int __user *)arg);
        case Z90STAT_TOTALOPEN_COUNT:
                return put_user(atomic_read(&zcrypt_open_count),
-                               (int __user *) arg);
+                               (int __user *)arg);
        case Z90STAT_DOMAIN_INDEX:
-               return put_user(ap_domain_index, (int __user *) arg);
+               return put_user(ap_domain_index, (int __user *)arg);
        /*
         * Deprecated ioctls
         */
@@ -1755,7 +1755,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                if (!device_status)
                        return -ENOMEM;
                zcrypt_device_status_mask(device_status);
-               if (copy_to_user((char __user *) arg, device_status,
+               if (copy_to_user((char __user *)arg, device_status,
                                 total_size))
                        rc = -EFAULT;
                kfree(device_status);
@@ -1766,7 +1766,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                char status[MAX_ZDEV_CARDIDS];
 
                zcrypt_status_mask(status, MAX_ZDEV_CARDIDS);
-               if (copy_to_user((char __user *) arg, status, sizeof(status)))
+               if (copy_to_user((char __user *)arg, status, sizeof(status)))
                        return -EFAULT;
                return 0;
        }
@@ -1775,7 +1775,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                char qdepth[MAX_ZDEV_CARDIDS];
 
                zcrypt_qdepth_mask(qdepth, MAX_ZDEV_CARDIDS);
-               if (copy_to_user((char __user *) arg, qdepth, sizeof(qdepth)))
+               if (copy_to_user((char __user *)arg, qdepth, sizeof(qdepth)))
                        return -EFAULT;
                return 0;
        }
@@ -1784,7 +1784,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
                u32 reqcnt[MAX_ZDEV_CARDIDS];
 
                zcrypt_perdev_reqcnt(reqcnt, MAX_ZDEV_CARDIDS);
-               if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt)))
+               if (copy_to_user((int __user *)arg, reqcnt, sizeof(reqcnt)))
                        return -EFAULT;
                return 0;
        }
@@ -1899,7 +1899,7 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp,
                        &ucrt32->outputdatalength);
 }
 
-struct compat_ica_xcRB {
+struct compat_ica_xcrb {
        unsigned short  agent_ID;
        unsigned int    user_defined;
        unsigned short  request_ID;
@@ -1919,66 +1919,66 @@ struct compat_ica_xcRB {
        unsigned int    status;
 } __packed;
 
-static long trans_xcRB32(struct ap_perms *perms, struct file *filp,
+static long trans_xcrb32(struct ap_perms *perms, struct file *filp,
                         unsigned int cmd, unsigned long arg)
 {
-       struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg);
-       struct compat_ica_xcRB xcRB32;
+       struct compat_ica_xcrb __user *uxcrb32 = compat_ptr(arg);
+       struct compat_ica_xcrb xcrb32;
        struct zcrypt_track tr;
-       struct ica_xcRB xcRB64;
+       struct ica_xcRB xcrb64;
        long rc;
 
        memset(&tr, 0, sizeof(tr));
-       if (copy_from_user(&xcRB32, uxcRB32, sizeof(xcRB32)))
+       if (copy_from_user(&xcrb32, uxcrb32, sizeof(xcrb32)))
                return -EFAULT;
-       xcRB64.agent_ID = xcRB32.agent_ID;
-       xcRB64.user_defined = xcRB32.user_defined;
-       xcRB64.request_ID = xcRB32.request_ID;
-       xcRB64.request_control_blk_length =
-               xcRB32.request_control_blk_length;
-       xcRB64.request_control_blk_addr =
-               compat_ptr(xcRB32.request_control_blk_addr);
-       xcRB64.request_data_length =
-               xcRB32.request_data_length;
-       xcRB64.request_data_address =
-               compat_ptr(xcRB32.request_data_address);
-       xcRB64.reply_control_blk_length =
-               xcRB32.reply_control_blk_length;
-       xcRB64.reply_control_blk_addr =
-               compat_ptr(xcRB32.reply_control_blk_addr);
-       xcRB64.reply_data_length = xcRB32.reply_data_length;
-       xcRB64.reply_data_addr =
-               compat_ptr(xcRB32.reply_data_addr);
-       xcRB64.priority_window = xcRB32.priority_window;
-       xcRB64.status = xcRB32.status;
+       xcrb64.agent_ID = xcrb32.agent_ID;
+       xcrb64.user_defined = xcrb32.user_defined;
+       xcrb64.request_ID = xcrb32.request_ID;
+       xcrb64.request_control_blk_length =
+               xcrb32.request_control_blk_length;
+       xcrb64.request_control_blk_addr =
+               compat_ptr(xcrb32.request_control_blk_addr);
+       xcrb64.request_data_length =
+               xcrb32.request_data_length;
+       xcrb64.request_data_address =
+               compat_ptr(xcrb32.request_data_address);
+       xcrb64.reply_control_blk_length =
+               xcrb32.reply_control_blk_length;
+       xcrb64.reply_control_blk_addr =
+               compat_ptr(xcrb32.reply_control_blk_addr);
+       xcrb64.reply_data_length = xcrb32.reply_data_length;
+       xcrb64.reply_data_addr =
+               compat_ptr(xcrb32.reply_data_addr);
+       xcrb64.priority_window = xcrb32.priority_window;
+       xcrb64.status = xcrb32.status;
        do {
-               rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB64);
+               rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb64);
                if (rc == -EAGAIN)
                        tr.again_counter++;
        } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
        /* on failure: retry once again after a requested rescan */
        if ((rc == -ENODEV) && (zcrypt_process_rescan()))
                do {
-                       rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB64);
+                       rc = _zcrypt_send_cprb(true, perms, &tr, &xcrb64);
                        if (rc == -EAGAIN)
                                tr.again_counter++;
                } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX);
        if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX)
                rc = -EIO;
-       xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length;
-       xcRB32.reply_data_length = xcRB64.reply_data_length;
-       xcRB32.status = xcRB64.status;
-       if (copy_to_user(uxcRB32, &xcRB32, sizeof(xcRB32)))
+       xcrb32.reply_control_blk_length = xcrb64.reply_control_blk_length;
+       xcrb32.reply_data_length = xcrb64.reply_data_length;
+       xcrb32.status = xcrb64.status;
+       if (copy_to_user(uxcrb32, &xcrb32, sizeof(xcrb32)))
                return -EFAULT;
        return rc;
 }
 
 static long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd,
-                        unsigned long arg)
+                               unsigned long arg)
 {
        int rc;
        struct ap_perms *perms =
-               (struct ap_perms *) filp->private_data;
+               (struct ap_perms *)filp->private_data;
 
        rc = zcrypt_check_ioctl(perms, cmd);
        if (rc)
@@ -1989,7 +1989,7 @@ static long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd,
        if (cmd == ICARSACRT)
                return trans_modexpo_crt32(perms, filp, cmd, arg);
        if (cmd == ZSECSENDCPRB)
-               return trans_xcRB32(perms, filp, cmd, arg);
+               return trans_xcrb32(perms, filp, cmd, arg);
        return zcrypt_unlocked_ioctl(filp, cmd, arg);
 }
 #endif
@@ -2033,10 +2033,10 @@ static int zcrypt_rng_data_read(struct hwrng *rng, u32 *data)
         * read method calls.
         */
        if (zcrypt_rng_buffer_index == 0) {
-               rc = zcrypt_rng((char *) zcrypt_rng_buffer);
+               rc = zcrypt_rng((char *)zcrypt_rng_buffer);
                /* on failure: retry once again after a requested rescan */
                if ((rc == -ENODEV) && (zcrypt_process_rescan()))
-                       rc = zcrypt_rng((char *) zcrypt_rng_buffer);
+                       rc = zcrypt_rng((char *)zcrypt_rng_buffer);
                if (rc < 0)
                        return -EIO;
                zcrypt_rng_buffer_index = rc / sizeof(*data);
@@ -2057,7 +2057,7 @@ int zcrypt_rng_device_add(void)
 
        mutex_lock(&zcrypt_rng_mutex);
        if (zcrypt_rng_device_count == 0) {
-               zcrypt_rng_buffer = (u32 *) get_zeroed_page(GFP_KERNEL);
+               zcrypt_rng_buffer = (u32 *)get_zeroed_page(GFP_KERNEL);
                if (!zcrypt_rng_buffer) {
                        rc = -ENOMEM;
                        goto out;
@@ -2069,13 +2069,14 @@ int zcrypt_rng_device_add(void)
                if (rc)
                        goto out_free;
                zcrypt_rng_device_count = 1;
-       } else
+       } else {
                zcrypt_rng_device_count++;
+       }
        mutex_unlock(&zcrypt_rng_mutex);
        return 0;
 
 out_free:
-       free_page((unsigned long) zcrypt_rng_buffer);
+       free_page((unsigned long)zcrypt_rng_buffer);
 out:
        mutex_unlock(&zcrypt_rng_mutex);
        return rc;
@@ -2087,7 +2088,7 @@ void zcrypt_rng_device_remove(void)
        zcrypt_rng_device_count--;
        if (zcrypt_rng_device_count == 0) {
                hwrng_unregister(&zcrypt_rng_dev);
-               free_page((unsigned long) zcrypt_rng_buffer);
+               free_page((unsigned long)zcrypt_rng_buffer);
        }
        mutex_unlock(&zcrypt_rng_mutex);
 }
index 93e77e83ad147513d42411a3feee6bd660d504d7..f299deb8b8c7650a4e7564c0d829820ebc8ae3cd 100644 (file)
@@ -170,7 +170,7 @@ static inline unsigned long z_copy_from_user(bool userspace,
 {
        if (likely(userspace))
                return copy_from_user(to, from, n);
-       memcpy(to, (void __force *) from, n);
+       memcpy(to, (void __force *)from, n);
        return 0;
 }
 
@@ -181,7 +181,7 @@ static inline unsigned long z_copy_to_user(bool userspace,
 {
        if (likely(userspace))
                return copy_to_user(to, from, n);
-       memcpy((void __force *) to, from, n);
+       memcpy((void __force *)to, from, n);
        return 0;
 }
 
index fcbd537530e8469bdbf006145f03fe10d54fff64..6ca675042416c90ba5214e2543a339f70c98a0b2 100644 (file)
@@ -138,7 +138,7 @@ struct zcrypt_card *zcrypt_card_alloc(void)
 {
        struct zcrypt_card *zc;
 
-       zc = kzalloc(sizeof(struct zcrypt_card), GFP_KERNEL);
+       zc = kzalloc(sizeof(*zc), GFP_KERNEL);
        if (!zc)
                return NULL;
        INIT_LIST_HEAD(&zc->list);
index f09bb850763b59035a1dec8a084d782ee57501e4..6229ba9c56d9c8209e00540c88be38c11b9e0ab5 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef _ZCRYPT_CCA_KEY_H_
 #define _ZCRYPT_CCA_KEY_H_
 
-struct T6_keyBlock_hdr {
+struct t6_keyblock_hdr {
        unsigned short blen;
        unsigned short ulen;
        unsigned short flags;
@@ -63,7 +63,7 @@ struct cca_public_sec {
  * complement of the residue modulo 8 of the sum of
  * (p_len + q_len + dp_len + dq_len + u_len).
  */
-struct cca_pvt_ext_CRT_sec {
+struct cca_pvt_ext_crt_sec {
        unsigned char  section_identifier;
        unsigned char  version;
        unsigned short section_length;
@@ -108,9 +108,9 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p)
                .section_identifier     =  0x04,
        };
        struct {
-               struct T6_keyBlock_hdr t6_hdr;
-               struct cca_token_hdr pubHdr;
-               struct cca_public_sec pubSec;
+               struct t6_keyblock_hdr t6_hdr;
+               struct cca_token_hdr pubhdr;
+               struct cca_public_sec pubsec;
                char exponent[0];
        } __packed *key = p;
        unsigned char *temp;
@@ -127,8 +127,8 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p)
 
        memset(key, 0, sizeof(*key));
 
-       key->pubHdr = static_pub_hdr;
-       key->pubSec = static_pub_sec;
+       key->pubhdr = static_pub_hdr;
+       key->pubsec = static_pub_sec;
 
        /* key parameter block */
        temp = key->exponent;
@@ -146,16 +146,16 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p)
        if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength))
                return -EFAULT;
 
-       key->pubSec.modulus_bit_len = 8 * mex->inputdatalength;
-       key->pubSec.modulus_byte_len = mex->inputdatalength;
-       key->pubSec.exponent_len = mex->inputdatalength - i;
-       key->pubSec.section_length = sizeof(key->pubSec) +
-                                       2*mex->inputdatalength - i;
-       key->pubHdr.token_length =
-               key->pubSec.section_length + sizeof(key->pubHdr);
-       key->t6_hdr.ulen = key->pubHdr.token_length + 4;
-       key->t6_hdr.blen = key->pubHdr.token_length + 6;
-       return sizeof(*key) + 2*mex->inputdatalength - i;
+       key->pubsec.modulus_bit_len = 8 * mex->inputdatalength;
+       key->pubsec.modulus_byte_len = mex->inputdatalength;
+       key->pubsec.exponent_len = mex->inputdatalength - i;
+       key->pubsec.section_length = sizeof(key->pubsec) +
+                                       2 * mex->inputdatalength - i;
+       key->pubhdr.token_length =
+               key->pubsec.section_length + sizeof(key->pubhdr);
+       key->t6_hdr.ulen = key->pubhdr.token_length + 4;
+       key->t6_hdr.blen = key->pubhdr.token_length + 6;
+       return sizeof(*key) + 2 * mex->inputdatalength - i;
 }
 
 /**
@@ -177,9 +177,9 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p)
        };
        static char pk_exponent[3] = { 0x01, 0x00, 0x01 };
        struct {
-               struct T6_keyBlock_hdr t6_hdr;
+               struct t6_keyblock_hdr t6_hdr;
                struct cca_token_hdr token;
-               struct cca_pvt_ext_CRT_sec pvt;
+               struct cca_pvt_ext_crt_sec pvt;
                char key_parts[0];
        } __packed *key = p;
        struct cca_public_sec *pub;
@@ -198,8 +198,8 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p)
 
        short_len = (crt->inputdatalength + 1) / 2;
        long_len = short_len + 8;
-       pad_len = -(3*long_len + 2*short_len) & 7;
-       key_len = 3*long_len + 2*short_len + pad_len + crt->inputdatalength;
+       pad_len = -(3 * long_len + 2 * short_len) & 7;
+       key_len = 3 * long_len + 2 * short_len + pad_len + crt->inputdatalength;
        size = sizeof(*key) + key_len + sizeof(*pub) + 3;
 
        /* parameter block.key block */
@@ -223,15 +223,15 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p)
        /* key parts */
        if (copy_from_user(key->key_parts, crt->np_prime, long_len) ||
            copy_from_user(key->key_parts + long_len,
-                                       crt->nq_prime, short_len) ||
+                          crt->nq_prime, short_len) ||
            copy_from_user(key->key_parts + long_len + short_len,
-                                       crt->bp_key, long_len) ||
-           copy_from_user(key->key_parts + 2*long_len + short_len,
-                                       crt->bq_key, short_len) ||
-           copy_from_user(key->key_parts + 2*long_len + 2*short_len,
-                                       crt->u_mult_inv, long_len))
+                          crt->bp_key, long_len) ||
+           copy_from_user(key->key_parts + 2 * long_len + short_len,
+                          crt->bq_key, short_len) ||
+           copy_from_user(key->key_parts + 2 * long_len + 2 * short_len,
+                          crt->u_mult_inv, long_len))
                return -EFAULT;
-       memset(key->key_parts + 3*long_len + 2*short_len + pad_len,
+       memset(key->key_parts + 3 * long_len + 2 * short_len + pad_len,
               0xff, crt->inputdatalength);
        pub = (struct cca_public_sec *)(key->key_parts + key_len);
        *pub = static_cca_pub_sec;
@@ -241,7 +241,7 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p)
         * section. So, an arbitrary public exponent of 0x010001 will be
         * used.
         */
-       memcpy((char *) (pub + 1), pk_exponent, 3);
+       memcpy((char *)(pub + 1), pk_exponent, 3);
        return size;
 }
 
index 6a3c2b460965296f26d0de7a3c8cc2034567923c..60ba20a133bed0b7ed263210fae90b9efffa3c10 100644 (file)
@@ -53,26 +53,26 @@ static DEFINE_SPINLOCK(cca_info_list_lock);
 int cca_check_secaeskeytoken(debug_info_t *dbg, int dbflvl,
                             const u8 *token, int keybitsize)
 {
-       struct secaeskeytoken *t = (struct secaeskeytoken *) token;
+       struct secaeskeytoken *t = (struct secaeskeytoken *)token;
 
 #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
 
        if (t->type != TOKTYPE_CCA_INTERNAL) {
                if (dbg)
                        DBF("%s token check failed, type 0x%02x != 0x%02x\n",
-                           __func__, (int) t->type, TOKTYPE_CCA_INTERNAL);
+                           __func__, (int)t->type, TOKTYPE_CCA_INTERNAL);
                return -EINVAL;
        }
        if (t->version != TOKVER_CCA_AES) {
                if (dbg)
                        DBF("%s token check failed, version 0x%02x != 0x%02x\n",
-                           __func__, (int) t->version, TOKVER_CCA_AES);
+                           __func__, (int)t->version, TOKVER_CCA_AES);
                return -EINVAL;
        }
        if (keybitsize > 0 && t->bitsize != keybitsize) {
                if (dbg)
                        DBF("%s token check failed, bitsize %d != %d\n",
-                           __func__, (int) t->bitsize, keybitsize);
+                           __func__, (int)t->bitsize, keybitsize);
                return -EINVAL;
        }
 
@@ -93,7 +93,7 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl,
                              const u8 *token, int keybitsize,
                              int checkcpacfexport)
 {
-       struct cipherkeytoken *t = (struct cipherkeytoken *) token;
+       struct cipherkeytoken *t = (struct cipherkeytoken *)token;
        bool keybitsizeok = true;
 
 #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
@@ -101,37 +101,37 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl,
        if (t->type != TOKTYPE_CCA_INTERNAL) {
                if (dbg)
                        DBF("%s token check failed, type 0x%02x != 0x%02x\n",
-                           __func__, (int) t->type, TOKTYPE_CCA_INTERNAL);
+                           __func__, (int)t->type, TOKTYPE_CCA_INTERNAL);
                return -EINVAL;
        }
        if (t->version != TOKVER_CCA_VLSC) {
                if (dbg)
                        DBF("%s token check failed, version 0x%02x != 0x%02x\n",
-                           __func__, (int) t->version, TOKVER_CCA_VLSC);
+                           __func__, (int)t->version, TOKVER_CCA_VLSC);
                return -EINVAL;
        }
        if (t->algtype != 0x02) {
                if (dbg)
                        DBF("%s token check failed, algtype 0x%02x != 0x02\n",
-                           __func__, (int) t->algtype);
+                           __func__, (int)t->algtype);
                return -EINVAL;
        }
        if (t->keytype != 0x0001) {
                if (dbg)
                        DBF("%s token check failed, keytype 0x%04x != 0x0001\n",
-                           __func__, (int) t->keytype);
+                           __func__, (int)t->keytype);
                return -EINVAL;
        }
        if (t->plfver != 0x00 && t->plfver != 0x01) {
                if (dbg)
                        DBF("%s token check failed, unknown plfver 0x%02x\n",
-                           __func__, (int) t->plfver);
+                           __func__, (int)t->plfver);
                return -EINVAL;
        }
        if (t->wpllen != 512 && t->wpllen != 576 && t->wpllen != 640) {
                if (dbg)
                        DBF("%s token check failed, unknown wpllen %d\n",
-                           __func__, (int) t->wpllen);
+                           __func__, (int)t->wpllen);
                return -EINVAL;
        }
        if (keybitsize > 0) {
@@ -180,26 +180,26 @@ int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl,
                             const u8 *token, size_t keysize,
                             int checkcpacfexport)
 {
-       struct eccprivkeytoken *t = (struct eccprivkeytoken *) token;
+       struct eccprivkeytoken *t = (struct eccprivkeytoken *)token;
 
 #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
 
        if (t->type != TOKTYPE_CCA_INTERNAL_PKA) {
                if (dbg)
                        DBF("%s token check failed, type 0x%02x != 0x%02x\n",
-                           __func__, (int) t->type, TOKTYPE_CCA_INTERNAL_PKA);
+                           __func__, (int)t->type, TOKTYPE_CCA_INTERNAL_PKA);
                return -EINVAL;
        }
        if (t->len > keysize) {
                if (dbg)
                        DBF("%s token check failed, len %d > keysize %zu\n",
-                           __func__, (int) t->len, keysize);
+                           __func__, (int)t->len, keysize);
                return -EINVAL;
        }
        if (t->secid != 0x20) {
                if (dbg)
                        DBF("%s token check failed, secid 0x%02x != 0x20\n",
-                           __func__, (int) t->secid);
+                           __func__, (int)t->secid);
                return -EINVAL;
        }
        if (checkcpacfexport && !(t->kutc & 0x01)) {
@@ -222,9 +222,9 @@ EXPORT_SYMBOL(cca_check_sececckeytoken);
  * on failure.
  */
 static int alloc_and_prep_cprbmem(size_t paramblen,
-                                 u8 **pcprbmem,
-                                 struct CPRBX **preqCPRB,
-                                 struct CPRBX **prepCPRB)
+                                 u8 **p_cprb_mem,
+                                 struct CPRBX **p_req_cprb,
+                                 struct CPRBX **p_rep_cprb)
 {
        u8 *cprbmem;
        size_t cprbplusparamblen = sizeof(struct CPRBX) + paramblen;
@@ -238,8 +238,8 @@ static int alloc_and_prep_cprbmem(size_t paramblen,
        if (!cprbmem)
                return -ENOMEM;
 
-       preqcblk = (struct CPRBX *) cprbmem;
-       prepcblk = (struct CPRBX *) (cprbmem + cprbplusparamblen);
+       preqcblk = (struct CPRBX *)cprbmem;
+       prepcblk = (struct CPRBX *)(cprbmem + cprbplusparamblen);
 
        /* fill request cprb struct */
        preqcblk->cprb_len = sizeof(struct CPRBX);
@@ -248,14 +248,14 @@ static int alloc_and_prep_cprbmem(size_t paramblen,
        preqcblk->rpl_msgbl = cprbplusparamblen;
        if (paramblen) {
                preqcblk->req_parmb =
-                       ((u8 __user *) preqcblk) + sizeof(struct CPRBX);
+                       ((u8 __user *)preqcblk) + sizeof(struct CPRBX);
                preqcblk->rpl_parmb =
-                       ((u8 __user *) prepcblk) + sizeof(struct CPRBX);
+                       ((u8 __user *)prepcblk) + sizeof(struct CPRBX);
        }
 
-       *pcprbmem = cprbmem;
-       *preqCPRB = preqcblk;
-       *prepCPRB = prepcblk;
+       *p_cprb_mem = cprbmem;
+       *p_req_cprb = preqcblk;
+       *p_rep_cprb = prepcblk;
 
        return 0;
 }
@@ -286,9 +286,9 @@ static inline void prep_xcrb(struct ica_xcRB *pxcrb,
        pxcrb->user_defined = (cardnr == 0xFFFF ? AUTOSELECT : cardnr);
        pxcrb->request_control_blk_length =
                preqcblk->cprb_len + preqcblk->req_parml;
-       pxcrb->request_control_blk_addr = (void __user *) preqcblk;
+       pxcrb->request_control_blk_addr = (void __user *)preqcblk;
        pxcrb->reply_control_blk_length = preqcblk->rpl_msgbl;
-       pxcrb->reply_control_blk_addr = (void __user *) prepcblk;
+       pxcrb->reply_control_blk_addr = (void __user *)prepcblk;
 }
 
 /*
@@ -345,7 +345,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with KG request */
-       preqparm = (struct kgreqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct kgreqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "KG", 2);
        preqparm->rule_array_len = sizeof(preqparm->rule_array_len);
        preqparm->lv1.len = sizeof(struct lv1);
@@ -387,7 +387,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
        rc = zcrypt_send_cprb(&xcrb);
        if (rc) {
                DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, errno %d\n",
-                         __func__, (int) cardnr, (int) domain, rc);
+                         __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -395,16 +395,16 @@ int cca_genseckey(u16 cardnr, u16 domain,
        if (prepcblk->ccp_rtcode != 0) {
                DEBUG_ERR("%s secure key generate failure, card response %d/%d\n",
                          __func__,
-                         (int) prepcblk->ccp_rtcode,
-                         (int) prepcblk->ccp_rscode);
+                         (int)prepcblk->ccp_rtcode,
+                         (int)prepcblk->ccp_rscode);
                rc = -EIO;
                goto out;
        }
 
        /* process response cprb param block */
-       ptr =  ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct kgrepparm *) ptr;
+       ptr =  ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct kgrepparm *)ptr;
 
        /* check length of the returned secure key token */
        seckeysize = prepparm->lv3.keyblock.toklen
@@ -419,7 +419,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
 
        /* check secure key token */
        rc = cca_check_secaeskeytoken(zcrypt_dbf_info, DBF_ERR,
-                                     prepparm->lv3.keyblock.tok, 8*keysize);
+                                     prepparm->lv3.keyblock.tok, 8 * keysize);
        if (rc) {
                rc = -EIO;
                goto out;
@@ -486,7 +486,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with CM request */
-       preqparm = (struct cmreqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct cmreqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "CM", 2);
        memcpy(preqparm->rule_array, "AES     ", 8);
        preqparm->rule_array_len =
@@ -512,7 +512,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
        }
        preqparm->lv1.len = sizeof(struct lv1) + keysize;
        memcpy(preqparm->lv1.clrkey, clrkey, keysize);
-       plv2 = (struct lv2 *) (((u8 *) &preqparm->lv2) + keysize);
+       plv2 = (struct lv2 *)(((u8 *)&preqparm->lv2) + keysize);
        plv2->len = sizeof(struct lv2);
        plv2->keyid.len = sizeof(struct keyid);
        plv2->keyid.attr = 0x30;
@@ -525,7 +525,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
        rc = zcrypt_send_cprb(&xcrb);
        if (rc) {
                DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                         __func__, (int) cardnr, (int) domain, rc);
+                         __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -533,16 +533,16 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
        if (prepcblk->ccp_rtcode != 0) {
                DEBUG_ERR("%s clear key import failure, card response %d/%d\n",
                          __func__,
-                         (int) prepcblk->ccp_rtcode,
-                         (int) prepcblk->ccp_rscode);
+                         (int)prepcblk->ccp_rtcode,
+                         (int)prepcblk->ccp_rscode);
                rc = -EIO;
                goto out;
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct cmrepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct cmrepparm *)ptr;
 
        /* check length of the returned secure key token */
        seckeysize = prepparm->lv3.keyblock.toklen
@@ -557,7 +557,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
 
        /* check secure key token */
        rc = cca_check_secaeskeytoken(zcrypt_dbf_info, DBF_ERR,
-                                     prepparm->lv3.keyblock.tok, 8*keysize);
+                                     prepparm->lv3.keyblock.tok, 8 * keysize);
        if (rc) {
                rc = -EIO;
                goto out;
@@ -632,7 +632,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with USK request */
-       preqparm = (struct uskreqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct uskreqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "US", 2);
        preqparm->rule_array_len = sizeof(preqparm->rule_array_len);
        preqparm->lv1.len = sizeof(struct lv1);
@@ -652,7 +652,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
        rc = zcrypt_send_cprb(&xcrb);
        if (rc) {
                DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                         __func__, (int) cardnr, (int) domain, rc);
+                         __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -660,8 +660,8 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
        if (prepcblk->ccp_rtcode != 0) {
                DEBUG_ERR("%s unwrap secure key failure, card response %d/%d\n",
                          __func__,
-                         (int) prepcblk->ccp_rtcode,
-                         (int) prepcblk->ccp_rscode);
+                         (int)prepcblk->ccp_rtcode,
+                         (int)prepcblk->ccp_rscode);
                if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290)
                        rc = -EAGAIN;
                else
@@ -671,37 +671,37 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
        if (prepcblk->ccp_rscode != 0) {
                DEBUG_WARN("%s unwrap secure key warning, card response %d/%d\n",
                           __func__,
-                          (int) prepcblk->ccp_rtcode,
-                          (int) prepcblk->ccp_rscode);
+                          (int)prepcblk->ccp_rtcode,
+                          (int)prepcblk->ccp_rscode);
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct uskrepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct uskrepparm *)ptr;
 
        /* check the returned keyblock */
        if (prepparm->lv3.ckb.version != 0x01 &&
            prepparm->lv3.ckb.version != 0x02) {
                DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x\n",
-                         __func__, (int) prepparm->lv3.ckb.version);
+                         __func__, (int)prepparm->lv3.ckb.version);
                rc = -EIO;
                goto out;
        }
 
        /* copy the tanslated protected key */
        switch (prepparm->lv3.ckb.len) {
-       case 16+32:
+       case 16 + 32:
                /* AES 128 protected key */
                if (protkeytype)
                        *protkeytype = PKEY_KEYTYPE_AES_128;
                break;
-       case 24+32:
+       case 24 + 32:
                /* AES 192 protected key */
                if (protkeytype)
                        *protkeytype = PKEY_KEYTYPE_AES_192;
                break;
-       case 32+32:
+       case 32 + 32:
                /* AES 256 protected key */
                if (protkeytype)
                        *protkeytype = PKEY_KEYTYPE_AES_256;
@@ -751,7 +751,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
        struct gkreqparm {
                u8  subfunc_code[2];
                u16 rule_array_len;
-               char rule_array[2*8];
+               char rule_array[2 * 8];
                struct {
                        u16 len;
                        u8  key_type_1[8];
@@ -827,10 +827,10 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
        preqcblk->req_parml = sizeof(struct gkreqparm);
 
        /* prepare request param block with GK request */
-       preqparm = (struct gkreqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct gkreqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "GK", 2);
        preqparm->rule_array_len =  sizeof(uint16_t) + 2 * 8;
-       memcpy(preqparm->rule_array, "AES     OP      ", 2*8);
+       memcpy(preqparm->rule_array, "AES     OP      ", 2 * 8);
 
        /* prepare vud block */
        preqparm->vud.len = sizeof(preqparm->vud);
@@ -869,9 +869,9 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
 
        /* patch the skeleton key token export flags inside the kb block */
        if (keygenflags) {
-               t = (struct cipherkeytoken *) preqparm->kb.tlv3.gen_key_id_1;
-               t->kmf1 |= (u16) (keygenflags & 0x0000FF00);
-               t->kmf1 &= (u16) ~(keygenflags & 0x000000FF);
+               t = (struct cipherkeytoken *)preqparm->kb.tlv3.gen_key_id_1;
+               t->kmf1 |= (u16)(keygenflags & 0x0000FF00);
+               t->kmf1 &= (u16)~(keygenflags & 0x000000FF);
        }
 
        /* prepare xcrb struct */
@@ -882,7 +882,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                       __func__, (int) cardnr, (int) domain, rc);
+                       __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -891,16 +891,16 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
                DEBUG_ERR(
                        "%s cipher key generate failure, card response %d/%d\n",
                        __func__,
-                       (int) prepcblk->ccp_rtcode,
-                       (int) prepcblk->ccp_rscode);
+                       (int)prepcblk->ccp_rtcode,
+                       (int)prepcblk->ccp_rscode);
                rc = -EIO;
                goto out;
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct gkrepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct gkrepparm *)ptr;
 
        /* do some plausibility checks on the key block */
        if (prepparm->kb.len < 120 + 5 * sizeof(uint16_t) ||
@@ -921,7 +921,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
        }
 
        /* copy the generated vlsc key token */
-       t = (struct cipherkeytoken *) prepparm->kb.tlv1.gen_key;
+       t = (struct cipherkeytoken *)prepparm->kb.tlv1.gen_key;
        if (keybuf) {
                if (*keybufsize >= t->len)
                        memcpy(keybuf, t, t->len);
@@ -1006,7 +1006,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        preqcblk->req_parml = 0;
 
        /* prepare request param block with IP request */
-       preq_ra_block = (struct rule_array_block __force *) preqcblk->req_parmb;
+       preq_ra_block = (struct rule_array_block __force *)preqcblk->req_parmb;
        memcpy(preq_ra_block->subfunc_code, "IP", 2);
        preq_ra_block->rule_array_len =  sizeof(uint16_t) + 2 * 8;
        memcpy(preq_ra_block->rule_array, rule_array_1, 8);
@@ -1050,7 +1050,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                       __func__, (int) cardnr, (int) domain, rc);
+                       __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -1059,16 +1059,16 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
                DEBUG_ERR(
                        "%s CSNBKPI2 failure, card response %d/%d\n",
                        __func__,
-                       (int) prepcblk->ccp_rtcode,
-                       (int) prepcblk->ccp_rscode);
+                       (int)prepcblk->ccp_rtcode,
+                       (int)prepcblk->ccp_rscode);
                rc = -EIO;
                goto out;
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct iprepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct iprepparm *)ptr;
 
        /* do some plausibility checks on the key block */
        if (prepparm->kb.len < 120 + 3 * sizeof(uint16_t) ||
@@ -1082,7 +1082,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        /* do not check the key here, it may be incomplete */
 
        /* copy the vlsc key token back */
-       t = (struct cipherkeytoken *) prepparm->kb.tlv1.key_token;
+       t = (struct cipherkeytoken *)prepparm->kb.tlv1.key_token;
        memcpy(key_token, t, t->len);
        *key_token_size = t->len;
 
@@ -1117,9 +1117,9 @@ int cca_clr2cipherkey(u16 card, u16 dom, u32 keybitsize, u32 keygenflags,
 
        /* patch the skeleton key token export flags */
        if (keygenflags) {
-               t = (struct cipherkeytoken *) token;
-               t->kmf1 |= (u16) (keygenflags & 0x0000FF00);
-               t->kmf1 &= (u16) ~(keygenflags & 0x000000FF);
+               t = (struct cipherkeytoken *)token;
+               t->kmf1 |= (u16)(keygenflags & 0x0000FF00);
+               t->kmf1 &= (u16)~(keygenflags & 0x000000FF);
        }
 
        /*
@@ -1241,7 +1241,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with AU request */
-       preqparm = (struct aureqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct aureqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "AU", 2);
        preqparm->rule_array_len =
                sizeof(preqparm->rule_array_len)
@@ -1267,7 +1267,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                       __func__, (int) cardnr, (int) domain, rc);
+                       __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -1276,8 +1276,8 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
                DEBUG_ERR(
                        "%s unwrap secure key failure, card response %d/%d\n",
                        __func__,
-                       (int) prepcblk->ccp_rtcode,
-                       (int) prepcblk->ccp_rscode);
+                       (int)prepcblk->ccp_rtcode,
+                       (int)prepcblk->ccp_rscode);
                if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290)
                        rc = -EAGAIN;
                else
@@ -1288,44 +1288,44 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
                DEBUG_WARN(
                        "%s unwrap secure key warning, card response %d/%d\n",
                        __func__,
-                       (int) prepcblk->ccp_rtcode,
-                       (int) prepcblk->ccp_rscode);
+                       (int)prepcblk->ccp_rtcode,
+                       (int)prepcblk->ccp_rscode);
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct aurepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct aurepparm *)ptr;
 
        /* check the returned keyblock */
        if (prepparm->vud.ckb.version != 0x01 &&
            prepparm->vud.ckb.version != 0x02) {
                DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x\n",
-                         __func__, (int) prepparm->vud.ckb.version);
+                         __func__, (int)prepparm->vud.ckb.version);
                rc = -EIO;
                goto out;
        }
        if (prepparm->vud.ckb.algo != 0x02) {
                DEBUG_ERR(
                        "%s reply param keyblock algo mismatch 0x%02x != 0x02\n",
-                       __func__, (int) prepparm->vud.ckb.algo);
+                       __func__, (int)prepparm->vud.ckb.algo);
                rc = -EIO;
                goto out;
        }
 
        /* copy the translated protected key */
        switch (prepparm->vud.ckb.keylen) {
-       case 16+32:
+       case 16 + 32:
                /* AES 128 protected key */
                if (protkeytype)
                        *protkeytype = PKEY_KEYTYPE_AES_128;
                break;
-       case 24+32:
+       case 24 + 32:
                /* AES 192 protected key */
                if (protkeytype)
                        *protkeytype = PKEY_KEYTYPE_AES_192;
                break;
-       case 32+32:
+       case 32 + 32:
                /* AES 256 protected key */
                if (protkeytype)
                        *protkeytype = PKEY_KEYTYPE_AES_256;
@@ -1410,7 +1410,7 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with AU request */
-       preqparm = (struct aureqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct aureqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "AU", 2);
        preqparm->rule_array_len =
                sizeof(preqparm->rule_array_len)
@@ -1436,7 +1436,7 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                       __func__, (int) cardnr, (int) domain, rc);
+                       __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -1445,8 +1445,8 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
                DEBUG_ERR(
                        "%s unwrap secure key failure, card response %d/%d\n",
                        __func__,
-                       (int) prepcblk->ccp_rtcode,
-                       (int) prepcblk->ccp_rscode);
+                       (int)prepcblk->ccp_rtcode,
+                       (int)prepcblk->ccp_rscode);
                if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290)
                        rc = -EAGAIN;
                else
@@ -1457,26 +1457,26 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key,
                DEBUG_WARN(
                        "%s unwrap secure key warning, card response %d/%d\n",
                        __func__,
-                       (int) prepcblk->ccp_rtcode,
-                       (int) prepcblk->ccp_rscode);
+                       (int)prepcblk->ccp_rtcode,
+                       (int)prepcblk->ccp_rscode);
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct aurepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct aurepparm *)ptr;
 
        /* check the returned keyblock */
        if (prepparm->vud.ckb.version != 0x02) {
                DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x != 0x02\n",
-                         __func__, (int) prepparm->vud.ckb.version);
+                         __func__, (int)prepparm->vud.ckb.version);
                rc = -EIO;
                goto out;
        }
        if (prepparm->vud.ckb.algo != 0x81) {
                DEBUG_ERR(
                        "%s reply param keyblock algo mismatch 0x%02x != 0x81\n",
-                       __func__, (int) prepparm->vud.ckb.algo);
+                       __func__, (int)prepparm->vud.ckb.algo);
                rc = -EIO;
                goto out;
        }
@@ -1537,7 +1537,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with FQ request */
-       preqparm = (struct fqreqparm __force *) preqcblk->req_parmb;
+       preqparm = (struct fqreqparm __force *)preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "FQ", 2);
        memcpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array));
        preqparm->rule_array_len =
@@ -1553,7 +1553,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
        rc = zcrypt_send_cprb(&xcrb);
        if (rc) {
                DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n",
-                         __func__, (int) cardnr, (int) domain, rc);
+                         __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -1561,20 +1561,20 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
        if (prepcblk->ccp_rtcode != 0) {
                DEBUG_ERR("%s unwrap secure key failure, card response %d/%d\n",
                          __func__,
-                         (int) prepcblk->ccp_rtcode,
-                         (int) prepcblk->ccp_rscode);
+                         (int)prepcblk->ccp_rtcode,
+                         (int)prepcblk->ccp_rscode);
                rc = -EIO;
                goto out;
        }
 
        /* process response cprb param block */
-       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepcblk->rpl_parmb = (u8 __user *) ptr;
-       prepparm = (struct fqrepparm *) ptr;
+       ptr = ((u8 *)prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *)ptr;
+       prepparm = (struct fqrepparm *)ptr;
        ptr = prepparm->lvdata;
 
        /* check and possibly copy reply rule array */
-       len = *((u16 *) ptr);
+       len = *((u16 *)ptr);
        if (len > sizeof(u16)) {
                ptr += sizeof(u16);
                len -= sizeof(u16);
@@ -1585,7 +1585,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
                ptr += len;
        }
        /* check and possible copy reply var array */
-       len = *((u16 *) ptr);
+       len = *((u16 *)ptr);
        if (len > sizeof(u16)) {
                ptr += sizeof(u16);
                len -= sizeof(u16);
@@ -1696,21 +1696,30 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
        ci->hwtype = devstat.hwtype;
 
        /* prep page for rule array and var array use */
-       pg = (u8 *) __get_free_page(GFP_KERNEL);
+       pg = (u8 *)__get_free_page(GFP_KERNEL);
        if (!pg)
                return -ENOMEM;
        rarray = pg;
-       varray = pg + PAGE_SIZE/2;
-       rlen = vlen = PAGE_SIZE/2;
+       varray = pg + PAGE_SIZE / 2;
+       rlen = vlen = PAGE_SIZE / 2;
 
        /* QF for this card/domain */
        rc = cca_query_crypto_facility(cardnr, domain, "STATICSA",
                                       rarray, &rlen, varray, &vlen);
-       if (rc == 0 && rlen >= 10*8 && vlen >= 204) {
+       if (rc == 0 && rlen >= 10 * 8 && vlen >= 204) {
                memcpy(ci->serial, rarray, 8);
-               ci->new_aes_mk_state = (char) rarray[7*8];
-               ci->cur_aes_mk_state = (char) rarray[8*8];
-               ci->old_aes_mk_state = (char) rarray[9*8];
+               ci->new_asym_mk_state = (char)rarray[4 * 8];
+               ci->cur_asym_mk_state = (char)rarray[5 * 8];
+               ci->old_asym_mk_state = (char)rarray[6 * 8];
+               if (ci->old_asym_mk_state == '2')
+                       memcpy(ci->old_asym_mkvp, varray + 64, 16);
+               if (ci->cur_asym_mk_state == '2')
+                       memcpy(ci->cur_asym_mkvp, varray + 84, 16);
+               if (ci->new_asym_mk_state == '3')
+                       memcpy(ci->new_asym_mkvp, varray + 104, 16);
+               ci->new_aes_mk_state = (char)rarray[7 * 8];
+               ci->cur_aes_mk_state = (char)rarray[8 * 8];
+               ci->old_aes_mk_state = (char)rarray[9 * 8];
                if (ci->old_aes_mk_state == '2')
                        memcpy(&ci->old_aes_mkvp, varray + 172, 8);
                if (ci->cur_aes_mk_state == '2')
@@ -1721,13 +1730,13 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
        }
        if (!found)
                goto out;
-       rlen = vlen = PAGE_SIZE/2;
+       rlen = vlen = PAGE_SIZE / 2;
        rc = cca_query_crypto_facility(cardnr, domain, "STATICSB",
                                       rarray, &rlen, varray, &vlen);
-       if (rc == 0 && rlen >= 13*8 && vlen >= 240) {
-               ci->new_apka_mk_state = (char) rarray[10*8];
-               ci->cur_apka_mk_state = (char) rarray[11*8];
-               ci->old_apka_mk_state = (char) rarray[12*8];
+       if (rc == 0 && rlen >= 13 * 8 && vlen >= 240) {
+               ci->new_apka_mk_state = (char)rarray[10 * 8];
+               ci->cur_apka_mk_state = (char)rarray[11 * 8];
+               ci->old_apka_mk_state = (char)rarray[12 * 8];
                if (ci->old_apka_mk_state == '2')
                        memcpy(&ci->old_apka_mkvp, varray + 208, 8);
                if (ci->cur_apka_mk_state == '2')
@@ -1738,7 +1747,7 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci)
        }
 
 out:
-       free_page((unsigned long) pg);
+       free_page((unsigned long)pg);
        return found == 2 ? 0 : -ENOENT;
 }
 
@@ -1846,8 +1855,9 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain,
                if (pdomain)
                        *pdomain = dom;
                rc = (i < MAX_ZDEV_ENTRIES_EXT ? 0 : 1);
-       } else
+       } else {
                rc = -ENODEV;
+       }
 
        kvfree(device_status);
        return rc;
@@ -1861,7 +1871,7 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify)
 {
        u64 mkvp;
        int minhwtype = 0;
-       const struct keytoken_header *hdr = (struct keytoken_header *) key;
+       const struct keytoken_header *hdr = (struct keytoken_header *)key;
 
        if (hdr->type != TOKTYPE_CCA_INTERNAL)
                return -EINVAL;
@@ -1954,7 +1964,7 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
                }
                /* apqn passed all filtering criterons, add to the array */
                if (_nr_apqns < 256)
-                       _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16) dom);
+                       _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16)dom);
        }
 
        /* nothing found ? */
index 3513cd8ab9bc1a39a6c38bda7c04c82a5b494a50..78bf5631848e1d16814677e01850c46e5cfe154a 100644 (file)
@@ -251,12 +251,18 @@ struct cca_info {
        char new_apka_mk_state; /* '1' empty, '2' partially full, '3' full */
        char cur_apka_mk_state; /* '1' invalid, '2' valid */
        char old_apka_mk_state; /* '1' invalid, '2' valid */
+       char new_asym_mk_state; /* '1' empty, '2' partially full, '3' full */
+       char cur_asym_mk_state; /* '1' invalid, '2' valid */
+       char old_asym_mk_state; /* '1' invalid, '2' valid */
        u64  new_aes_mkvp;      /* truncated sha256 of new aes master key */
        u64  cur_aes_mkvp;      /* truncated sha256 of current aes master key */
        u64  old_aes_mkvp;      /* truncated sha256 of old aes master key */
        u64  new_apka_mkvp;     /* truncated sha256 of new apka master key */
        u64  cur_apka_mkvp;     /* truncated sha256 of current apka mk */
        u64  old_apka_mkvp;     /* truncated sha256 of old apka mk */
+       u8   new_asym_mkvp[16]; /* verify pattern of new asym master key */
+       u8   cur_asym_mkvp[16]; /* verify pattern of current asym master key */
+       u8   old_asym_mkvp[16]; /* verify pattern of old asym master key */
        char serial[9];         /* serial number (8 ascii numbers + 0x00) */
 };
 
index 2bd49950ba8161387bcf1c39fc46a1c772297434..83f692c9c1975bd25345537935d37c310dc6d995 100644 (file)
 
 #define CEX3A_MAX_RESPONSE_SIZE        0x210   /* 512 bit modulus
                                         * (max outputdatalength) +
-                                        * type80_hdr*/
+                                        * type80_hdr
+                                        */
 #define CEX3A_MAX_MESSAGE_SIZE sizeof(struct type50_crb3_msg)
 
-#define CEX2A_CLEANUP_TIME     (15*HZ)
+#define CEX2A_CLEANUP_TIME     (15 * HZ)
 #define CEX3A_CLEANUP_TIME     CEX2A_CLEANUP_TIME
 
 MODULE_AUTHOR("IBM Corporation");
@@ -117,9 +118,8 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
        zc->online = 1;
 
        rc = zcrypt_card_register(zc);
-       if (rc) {
+       if (rc)
                zcrypt_card_free(zc);
-       }
 
        return rc;
 }
@@ -176,9 +176,8 @@ static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
        aq->request_timeout = CEX2A_CLEANUP_TIME;
        dev_set_drvdata(&ap_dev->device, zq);
        rc = zcrypt_queue_register(zq);
-       if (rc) {
+       if (rc)
                zcrypt_queue_free(zq);
-       }
 
        return rc;
 }
index 6360fdd061606f81c1374913498b7511a87e1ae5..cb7849defce38a7f7649636d362b3ef7a3805040 100644 (file)
@@ -31,8 +31,8 @@
 #define CEX2C_MAX_MOD_SIZE     256     /* 2048 bits    */
 #define CEX3C_MIN_MOD_SIZE      16     /*  128 bits    */
 #define CEX3C_MAX_MOD_SIZE     512     /* 4096 bits    */
-#define CEX2C_MAX_XCRB_MESSAGE_SIZE (12*1024)
-#define CEX2C_CLEANUP_TIME     (15*HZ)
+#define CEX2C_MAX_XCRB_MESSAGE_SIZE (12 * 1024)
+#define CEX2C_CLEANUP_TIME     (15 * HZ)
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("CEX2C/CEX3C Cryptographic Coprocessor device driver, " \
@@ -200,11 +200,11 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq)
        int rc, i;
 
        ap_init_message(&ap_msg);
-       ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL);
+       ap_msg.msg = (void *)get_zeroed_page(GFP_KERNEL);
        if (!ap_msg.msg)
                return -ENOMEM;
 
-       rng_type6CPRB_msgX(&ap_msg, 4, &domain);
+       rng_type6cprb_msgx(&ap_msg, 4, &domain);
 
        msg = ap_msg.msg;
        msg->cprbx.domain = AP_QID_QUEUE(aq->qid);
@@ -233,7 +233,7 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq)
        else
                rc = 0;
 out_free:
-       free_page((unsigned long) ap_msg.msg);
+       free_page((unsigned long)ap_msg.msg);
        return rc;
 }
 
index fe5664c7589e9293c65b8122f2be69020543e34c..b03916b7538bc5a9d68fffd4337ee380997e6a4a 100644 (file)
@@ -33,7 +33,7 @@
  * But the maximum time limit managed by the stomper code is set to 60sec.
  * Hence we have to wait at least that time period.
  */
-#define CEX4_CLEANUP_TIME      (900*HZ)
+#define CEX4_CLEANUP_TIME      (900 * HZ)
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("CEX[45678] Cryptographic Card device driver, " \
@@ -123,11 +123,12 @@ static ssize_t cca_mkvps_show(struct device *dev,
                     &ci, zq->online);
 
        if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3')
-               n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n",
-                             new_state[ci.new_aes_mk_state - '1'],
-                             ci.new_aes_mkvp);
+               n += scnprintf(buf + n, PAGE_SIZE,
+                              "AES NEW: %s 0x%016llx\n",
+                              new_state[ci.new_aes_mk_state - '1'],
+                              ci.new_aes_mkvp);
        else
-               n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n");
+               n += scnprintf(buf + n, PAGE_SIZE, "AES NEW: - -\n");
 
        if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2')
                n += scnprintf(buf + n, PAGE_SIZE - n,
@@ -169,6 +170,33 @@ static ssize_t cca_mkvps_show(struct device *dev,
        else
                n += scnprintf(buf + n, PAGE_SIZE - n, "APKA OLD: - -\n");
 
+       if (ci.new_asym_mk_state >= '1' && ci.new_asym_mk_state <= '3')
+               n += scnprintf(buf + n, PAGE_SIZE,
+                              "ASYM NEW: %s 0x%016llx%016llx\n",
+                              new_state[ci.new_asym_mk_state - '1'],
+                              *((u64 *)(ci.new_asym_mkvp)),
+                              *((u64 *)(ci.new_asym_mkvp + sizeof(u64))));
+       else
+               n += scnprintf(buf + n, PAGE_SIZE, "ASYM NEW: - -\n");
+
+       if (ci.cur_asym_mk_state >= '1' && ci.cur_asym_mk_state <= '2')
+               n += scnprintf(buf + n, PAGE_SIZE - n,
+                              "ASYM CUR: %s 0x%016llx%016llx\n",
+                              cao_state[ci.cur_asym_mk_state - '1'],
+                              *((u64 *)(ci.cur_asym_mkvp)),
+                              *((u64 *)(ci.cur_asym_mkvp + sizeof(u64))));
+       else
+               n += scnprintf(buf + n, PAGE_SIZE - n, "ASYM CUR: - -\n");
+
+       if (ci.old_asym_mk_state >= '1' && ci.old_asym_mk_state <= '2')
+               n += scnprintf(buf + n, PAGE_SIZE - n,
+                              "ASYM OLD: %s 0x%016llx%016llx\n",
+                              cao_state[ci.old_asym_mk_state - '1'],
+                              *((u64 *)(ci.old_asym_mkvp)),
+                              *((u64 *)(ci.old_asym_mkvp + sizeof(u64))));
+       else
+               n += scnprintf(buf + n, PAGE_SIZE - n, "ASYM OLD: - -\n");
+
        return n;
 }
 
@@ -336,8 +364,9 @@ static ssize_t ep11_mkvps_show(struct device *dev,
                bin2hex(buf + n, di.cur_wkvp, sizeof(di.cur_wkvp));
                n += 2 * sizeof(di.cur_wkvp);
                n += scnprintf(buf + n, PAGE_SIZE - n, "\n");
-       } else
+       } else {
                n = scnprintf(buf, PAGE_SIZE, "WK CUR: - -\n");
+       }
 
        if (di.new_wk_state == '0') {
                n += scnprintf(buf + n, PAGE_SIZE - n, "WK NEW: %s -\n",
@@ -348,8 +377,9 @@ static ssize_t ep11_mkvps_show(struct device *dev,
                bin2hex(buf + n, di.new_wkvp, sizeof(di.new_wkvp));
                n += 2 * sizeof(di.new_wkvp);
                n += scnprintf(buf + n, PAGE_SIZE - n, "\n");
-       } else
+       } else {
                n += scnprintf(buf + n, PAGE_SIZE - n, "WK NEW: - -\n");
+       }
 
        return n;
 }
index 98d33f932b0bbb74b40bbdce81fd625f2d155643..b1c29017be5bcf8d42247af6bc1cce6b06371ea4 100644 (file)
@@ -119,8 +119,8 @@ static void __exit card_cache_free(void)
 int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl,
                                const u8 *key, size_t keylen, int checkcpacfexp)
 {
-       struct ep11kblob_header *hdr = (struct ep11kblob_header *) key;
-       struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr));
+       struct ep11kblob_header *hdr = (struct ep11kblob_header *)key;
+       struct ep11keyblob *kb = (struct ep11keyblob *)(key + sizeof(*hdr));
 
 #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
 
@@ -133,38 +133,38 @@ int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl,
        if (hdr->type != TOKTYPE_NON_CCA) {
                if (dbg)
                        DBF("%s key check failed, type 0x%02x != 0x%02x\n",
-                           __func__, (int) hdr->type, TOKTYPE_NON_CCA);
+                           __func__, (int)hdr->type, TOKTYPE_NON_CCA);
                return -EINVAL;
        }
        if (hdr->hver != 0x00) {
                if (dbg)
                        DBF("%s key check failed, header version 0x%02x != 0x00\n",
-                           __func__, (int) hdr->hver);
+                           __func__, (int)hdr->hver);
                return -EINVAL;
        }
        if (hdr->version != TOKVER_EP11_AES_WITH_HEADER) {
                if (dbg)
                        DBF("%s key check failed, version 0x%02x != 0x%02x\n",
-                           __func__, (int) hdr->version, TOKVER_EP11_AES_WITH_HEADER);
+                           __func__, (int)hdr->version, TOKVER_EP11_AES_WITH_HEADER);
                return -EINVAL;
        }
        if (hdr->len > keylen) {
                if (dbg)
                        DBF("%s key check failed, header len %d keylen %zu mismatch\n",
-                           __func__, (int) hdr->len, keylen);
+                           __func__, (int)hdr->len, keylen);
                return -EINVAL;
        }
        if (hdr->len < sizeof(*hdr) + sizeof(*kb)) {
                if (dbg)
                        DBF("%s key check failed, header len %d < %zu\n",
-                           __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb));
+                           __func__, (int)hdr->len, sizeof(*hdr) + sizeof(*kb));
                return -EINVAL;
        }
 
        if (kb->version != EP11_STRUCT_MAGIC) {
                if (dbg)
                        DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n",
-                           __func__, (int) kb->version, EP11_STRUCT_MAGIC);
+                           __func__, (int)kb->version, EP11_STRUCT_MAGIC);
                return -EINVAL;
        }
        if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
@@ -186,8 +186,8 @@ EXPORT_SYMBOL(ep11_check_aes_key_with_hdr);
 int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl,
                                const u8 *key, size_t keylen, int checkcpacfexp)
 {
-       struct ep11kblob_header *hdr = (struct ep11kblob_header *) key;
-       struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr));
+       struct ep11kblob_header *hdr = (struct ep11kblob_header *)key;
+       struct ep11keyblob *kb = (struct ep11keyblob *)(key + sizeof(*hdr));
 
 #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
 
@@ -200,38 +200,38 @@ int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl,
        if (hdr->type != TOKTYPE_NON_CCA) {
                if (dbg)
                        DBF("%s key check failed, type 0x%02x != 0x%02x\n",
-                           __func__, (int) hdr->type, TOKTYPE_NON_CCA);
+                           __func__, (int)hdr->type, TOKTYPE_NON_CCA);
                return -EINVAL;
        }
        if (hdr->hver != 0x00) {
                if (dbg)
                        DBF("%s key check failed, header version 0x%02x != 0x00\n",
-                           __func__, (int) hdr->hver);
+                           __func__, (int)hdr->hver);
                return -EINVAL;
        }
        if (hdr->version != TOKVER_EP11_ECC_WITH_HEADER) {
                if (dbg)
                        DBF("%s key check failed, version 0x%02x != 0x%02x\n",
-                           __func__, (int) hdr->version, TOKVER_EP11_ECC_WITH_HEADER);
+                           __func__, (int)hdr->version, TOKVER_EP11_ECC_WITH_HEADER);
                return -EINVAL;
        }
        if (hdr->len > keylen) {
                if (dbg)
                        DBF("%s key check failed, header len %d keylen %zu mismatch\n",
-                           __func__, (int) hdr->len, keylen);
+                           __func__, (int)hdr->len, keylen);
                return -EINVAL;
        }
        if (hdr->len < sizeof(*hdr) + sizeof(*kb)) {
                if (dbg)
                        DBF("%s key check failed, header len %d < %zu\n",
-                           __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb));
+                           __func__, (int)hdr->len, sizeof(*hdr) + sizeof(*kb));
                return -EINVAL;
        }
 
        if (kb->version != EP11_STRUCT_MAGIC) {
                if (dbg)
                        DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n",
-                           __func__, (int) kb->version, EP11_STRUCT_MAGIC);
+                           __func__, (int)kb->version, EP11_STRUCT_MAGIC);
                return -EINVAL;
        }
        if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
@@ -254,7 +254,7 @@ EXPORT_SYMBOL(ep11_check_ecc_key_with_hdr);
 int ep11_check_aes_key(debug_info_t *dbg, int dbflvl,
                       const u8 *key, size_t keylen, int checkcpacfexp)
 {
-       struct ep11keyblob *kb = (struct ep11keyblob *) key;
+       struct ep11keyblob *kb = (struct ep11keyblob *)key;
 
 #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__)
 
@@ -267,32 +267,32 @@ int ep11_check_aes_key(debug_info_t *dbg, int dbflvl,
        if (kb->head.type != TOKTYPE_NON_CCA) {
                if (dbg)
                        DBF("%s key check failed, type 0x%02x != 0x%02x\n",
-                           __func__, (int) kb->head.type, TOKTYPE_NON_CCA);
+                           __func__, (int)kb->head.type, TOKTYPE_NON_CCA);
                return -EINVAL;
        }
        if (kb->head.version != TOKVER_EP11_AES) {
                if (dbg)
                        DBF("%s key check failed, version 0x%02x != 0x%02x\n",
-                           __func__, (int) kb->head.version, TOKVER_EP11_AES);
+                           __func__, (int)kb->head.version, TOKVER_EP11_AES);
                return -EINVAL;
        }
        if (kb->head.len > keylen) {
                if (dbg)
                        DBF("%s key check failed, header len %d keylen %zu mismatch\n",
-                           __func__, (int) kb->head.len, keylen);
+                           __func__, (int)kb->head.len, keylen);
                return -EINVAL;
        }
        if (kb->head.len < sizeof(*kb)) {
                if (dbg)
                        DBF("%s key check failed, header len %d < %zu\n",
-                           __func__, (int) kb->head.len, sizeof(*kb));
+                           __func__, (int)kb->head.len, sizeof(*kb));
                return -EINVAL;
        }
 
        if (kb->version != EP11_STRUCT_MAGIC) {
                if (dbg)
                        DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n",
-                           __func__, (int) kb->version, EP11_STRUCT_MAGIC);
+                           __func__, (int)kb->version, EP11_STRUCT_MAGIC);
                return -EINVAL;
        }
        if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) {
@@ -347,11 +347,11 @@ static int asn1tag_write(u8 *ptr, u8 tag, const u8 *pvalue, u16 valuelen)
        }
        if (valuelen > 127) {
                ptr[1] = 0x81;
-               ptr[2] = (u8) valuelen;
+               ptr[2] = (u8)valuelen;
                memcpy(ptr + 3, pvalue, valuelen);
                return 3 + valuelen;
        }
-       ptr[1] = (u8) valuelen;
+       ptr[1] = (u8)valuelen;
        memcpy(ptr + 2, pvalue, valuelen);
        return 2 + valuelen;
 }
@@ -389,11 +389,11 @@ static inline void prep_urb(struct ep11_urb *u,
                            struct ep11_cprb *req, size_t req_len,
                            struct ep11_cprb *rep, size_t rep_len)
 {
-       u->targets = (u8 __user *) t;
+       u->targets = (u8 __user *)t;
        u->targets_num = nt;
-       u->req = (u8 __user *) req;
+       u->req = (u8 __user *)req;
        u->req_len = req_len;
-       u->resp = (u8 __user *) rep;
+       u->resp = (u8 __user *)rep;
        u->resp_len = rep_len;
 }
 
@@ -462,7 +462,6 @@ static int check_reply_pl(const u8 *pl, const char *func)
        return 0;
 }
 
-
 /*
  * Helper function which does an ep11 query with given query type.
  */
@@ -496,7 +495,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
        req = alloc_cprb(sizeof(struct ep11_info_req_pl));
        if (!req)
                goto out;
-       req_pl = (struct ep11_info_req_pl *) (((u8 *) req) + sizeof(*req));
+       req_pl = (struct ep11_info_req_pl *)(((u8 *)req) + sizeof(*req));
        prep_head(&req_pl->head, sizeof(*req_pl), api, 38); /* get xcp info */
        req_pl->query_type_tag = 0x04;
        req_pl->query_type_len = sizeof(u32);
@@ -508,10 +507,10 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
        rep = alloc_cprb(sizeof(struct ep11_info_rep_pl) + buflen);
        if (!rep)
                goto out;
-       rep_pl = (struct ep11_info_rep_pl *) (((u8 *) rep) + sizeof(*rep));
+       rep_pl = (struct ep11_info_rep_pl *)(((u8 *)rep) + sizeof(*rep));
 
        /* urb and target */
-       urb = kmalloc(sizeof(struct ep11_urb), GFP_KERNEL);
+       urb = kmalloc(sizeof(*urb), GFP_KERNEL);
        if (!urb)
                goto out;
        target.ap_id = cardnr;
@@ -524,7 +523,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
-                       __func__, (int) cardnr, (int) domain, rc);
+                       __func__, (int)cardnr, (int)domain, rc);
                goto out;
        }
 
@@ -543,7 +542,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
                goto out;
        }
 
-       memcpy(buf, ((u8 *) rep_pl) + sizeof(*rep_pl), rep_pl->data_len);
+       memcpy(buf, ((u8 *)rep_pl) + sizeof(*rep_pl), rep_pl->data_len);
 
 out:
        kfree(req);
@@ -592,7 +591,7 @@ int ep11_get_card_info(u16 card, struct ep11_card_info *info, int verify)
                        return -ENOMEM;
                rc = ep11_query_info(card, AUTOSEL_DOM,
                                     0x01 /* module info query */,
-                                    sizeof(*pmqi), (u8 *) pmqi);
+                                    sizeof(*pmqi), (u8 *)pmqi);
                if (rc) {
                        if (rc == -ENODEV)
                                card_cache_scrub(card);
@@ -632,7 +631,7 @@ int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info)
                return -ENOMEM;
 
        rc = ep11_query_info(card, domain, 0x03 /* domain info query */,
-                            sizeof(*p_dom_info), (u8 *) p_dom_info);
+                            sizeof(*p_dom_info), (u8 *)p_dom_info);
        if (rc)
                goto out;
 
@@ -644,8 +643,8 @@ int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info)
                        info->cur_wk_state = '1';
                        memcpy(info->cur_wkvp, p_dom_info->cur_WK_VP, 32);
                }
-               if (p_dom_info->dom_flags & 0x04 /* new wk present */
-                   || p_dom_info->dom_flags & 0x08 /* new wk committed */) {
+               if (p_dom_info->dom_flags & 0x04 || /* new wk present */
+                   p_dom_info->dom_flags & 0x08 /* new wk committed */) {
                        info->new_wk_state =
                                p_dom_info->dom_flags & 0x08 ? '2' : '1';
                        memcpy(info->new_wkvp, p_dom_info->new_WK_VP, 32);
@@ -722,7 +721,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        req = alloc_cprb(sizeof(struct keygen_req_pl));
        if (!req)
                goto out;
-       req_pl = (struct keygen_req_pl *) (((u8 *) req) + sizeof(*req));
+       req_pl = (struct keygen_req_pl *)(((u8 *)req) + sizeof(*req));
        api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1;
        prep_head(&req_pl->head, sizeof(*req_pl), api, 21); /* GenerateKey */
        req_pl->var_tag = 0x04;
@@ -746,10 +745,10 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        rep = alloc_cprb(sizeof(struct keygen_rep_pl));
        if (!rep)
                goto out;
-       rep_pl = (struct keygen_rep_pl *) (((u8 *) rep) + sizeof(*rep));
+       rep_pl = (struct keygen_rep_pl *)(((u8 *)rep) + sizeof(*rep));
 
        /* urb and target */
-       urb = kmalloc(sizeof(struct ep11_urb), GFP_KERNEL);
+       urb = kmalloc(sizeof(*urb), GFP_KERNEL);
        if (!urb)
                goto out;
        target.ap_id = card;
@@ -762,7 +761,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
-                       __func__, (int) card, (int) domain, rc);
+                       __func__, (int)card, (int)domain, rc);
                goto out;
        }
 
@@ -784,7 +783,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        /* copy key blob and set header values */
        memcpy(keybuf, rep_pl->data, rep_pl->data_len);
        *keybufsize = rep_pl->data_len;
-       kb = (struct ep11keyblob *) keybuf;
+       kb = (struct ep11keyblob *)keybuf;
        kb->head.type = TOKTYPE_NON_CCA;
        kb->head.len = rep_pl->data_len;
        kb->head.version = TOKVER_EP11_AES;
@@ -844,7 +843,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
        req = alloc_cprb(req_pl_size);
        if (!req)
                goto out;
-       req_pl = (struct crypt_req_pl *) (((u8 *) req) + sizeof(*req));
+       req_pl = (struct crypt_req_pl *)(((u8 *)req) + sizeof(*req));
        prep_head(&req_pl->head, req_pl_size, api, (mode ? 20 : 19));
        req_pl->var_tag = 0x04;
        req_pl->var_len = sizeof(u32);
@@ -852,7 +851,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
        req_pl->mech_tag = 0x04;
        req_pl->mech_len = sizeof(u32) + (iv ? 16 : 0);
        req_pl->mech = (mech ? mech : 0x00001085); /* CKM_AES_CBC_PAD */
-       p = ((u8 *) req_pl) + sizeof(*req_pl);
+       p = ((u8 *)req_pl) + sizeof(*req_pl);
        if (iv) {
                memcpy(p, iv, 16);
                p += 16;
@@ -866,10 +865,10 @@ static int ep11_cryptsingle(u16 card, u16 domain,
        rep = alloc_cprb(rep_pl_size);
        if (!rep)
                goto out;
-       rep_pl = (struct crypt_rep_pl *) (((u8 *) rep) + sizeof(*rep));
+       rep_pl = (struct crypt_rep_pl *)(((u8 *)rep) + sizeof(*rep));
 
        /* urb and target */
-       urb = kmalloc(sizeof(struct ep11_urb), GFP_KERNEL);
+       urb = kmalloc(sizeof(*urb), GFP_KERNEL);
        if (!urb)
                goto out;
        target.ap_id = card;
@@ -882,7 +881,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
-                       __func__, (int) card, (int) domain, rc);
+                       __func__, (int)card, (int)domain, rc);
                goto out;
        }
 
@@ -894,13 +893,13 @@ static int ep11_cryptsingle(u16 card, u16 domain,
                rc = -EIO;
                goto out;
        }
-       p = ((u8 *) rep_pl) + sizeof(*rep_pl);
-       if (rep_pl->data_lenfmt <= 127)
+       p = ((u8 *)rep_pl) + sizeof(*rep_pl);
+       if (rep_pl->data_lenfmt <= 127) {
                n = rep_pl->data_lenfmt;
-       else if (rep_pl->data_lenfmt == 0x81)
+       } else if (rep_pl->data_lenfmt == 0x81) {
                n = *p++;
-       else if (rep_pl->data_lenfmt == 0x82) {
-               n = *((u16 *) p);
+       else if (rep_pl->data_lenfmt == 0x82) {
+               n = *((u16 *)p);
                p += 2;
        } else {
                DEBUG_ERR("%s unknown reply data length format 0x%02hhx\n",
@@ -978,7 +977,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
        req = alloc_cprb(req_pl_size);
        if (!req)
                goto out;
-       req_pl = (struct uw_req_pl *) (((u8 *) req) + sizeof(*req));
+       req_pl = (struct uw_req_pl *)(((u8 *)req) + sizeof(*req));
        api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1;
        prep_head(&req_pl->head, req_pl_size, api, 34); /* UnwrapKey */
        req_pl->attr_tag = 0x04;
@@ -994,7 +993,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
        req_pl->mech_tag = 0x04;
        req_pl->mech_len = sizeof(u32) + (iv ? 16 : 0);
        req_pl->mech = (mech ? mech : 0x00001085); /* CKM_AES_CBC_PAD */
-       p = ((u8 *) req_pl) + sizeof(*req_pl);
+       p = ((u8 *)req_pl) + sizeof(*req_pl);
        if (iv) {
                memcpy(p, iv, 16);
                p += 16;
@@ -1014,10 +1013,10 @@ static int ep11_unwrapkey(u16 card, u16 domain,
        rep = alloc_cprb(sizeof(struct uw_rep_pl));
        if (!rep)
                goto out;
-       rep_pl = (struct uw_rep_pl *) (((u8 *) rep) + sizeof(*rep));
+       rep_pl = (struct uw_rep_pl *)(((u8 *)rep) + sizeof(*rep));
 
        /* urb and target */
-       urb = kmalloc(sizeof(struct ep11_urb), GFP_KERNEL);
+       urb = kmalloc(sizeof(*urb), GFP_KERNEL);
        if (!urb)
                goto out;
        target.ap_id = card;
@@ -1030,7 +1029,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
-                       __func__, (int) card, (int) domain, rc);
+                       __func__, (int)card, (int)domain, rc);
                goto out;
        }
 
@@ -1052,7 +1051,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
        /* copy key blob and set header values */
        memcpy(keybuf, rep_pl->data, rep_pl->data_len);
        *keybufsize = rep_pl->data_len;
-       kb = (struct ep11keyblob *) keybuf;
+       kb = (struct ep11keyblob *)keybuf;
        kb->head.type = TOKTYPE_NON_CCA;
        kb->head.len = rep_pl->data_len;
        kb->head.version = TOKVER_EP11_AES;
@@ -1105,7 +1104,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
        u8 *p;
 
        /* maybe the session field holds a header with key info */
-       kb = (struct ep11keyblob *) key;
+       kb = (struct ep11keyblob *)key;
        if (kb->head.type == TOKTYPE_NON_CCA &&
            kb->head.version == TOKVER_EP11_AES) {
                has_header = true;
@@ -1120,7 +1119,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
                goto out;
        if (!mech || mech == 0x80060001)
                req->flags |= 0x20; /* CPACF_WRAP needs special bit */
-       req_pl = (struct wk_req_pl *) (((u8 *) req) + sizeof(*req));
+       req_pl = (struct wk_req_pl *)(((u8 *)req) + sizeof(*req));
        api = (!mech || mech == 0x80060001) ? 4 : 1; /* CKM_IBM_CPACF_WRAP */
        prep_head(&req_pl->head, req_pl_size, api, 33); /* WrapKey */
        req_pl->var_tag = 0x04;
@@ -1129,7 +1128,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
        req_pl->mech_tag = 0x04;
        req_pl->mech_len = sizeof(u32) + (iv ? 16 : 0);
        req_pl->mech = (mech ? mech : 0x80060001); /* CKM_IBM_CPACF_WRAP */
-       p = ((u8 *) req_pl) + sizeof(*req_pl);
+       p = ((u8 *)req_pl) + sizeof(*req_pl);
        if (iv) {
                memcpy(p, iv, 16);
                p += 16;
@@ -1152,10 +1151,10 @@ static int ep11_wrapkey(u16 card, u16 domain,
        rep = alloc_cprb(sizeof(struct wk_rep_pl));
        if (!rep)
                goto out;
-       rep_pl = (struct wk_rep_pl *) (((u8 *) rep) + sizeof(*rep));
+       rep_pl = (struct wk_rep_pl *)(((u8 *)rep) + sizeof(*rep));
 
        /* urb and target */
-       urb = kmalloc(sizeof(struct ep11_urb), GFP_KERNEL);
+       urb = kmalloc(sizeof(*urb), GFP_KERNEL);
        if (!urb)
                goto out;
        target.ap_id = card;
@@ -1168,7 +1167,7 @@ static int ep11_wrapkey(u16 card, u16 domain,
        if (rc) {
                DEBUG_ERR(
                        "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n",
-                       __func__, (int) card, (int) domain, rc);
+                       __func__, (int)card, (int)domain, rc);
                goto out;
        }
 
@@ -1206,9 +1205,9 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        u8 encbuf[64], *kek = NULL;
        size_t clrkeylen, keklen, encbuflen = sizeof(encbuf);
 
-       if (keybitsize == 128 || keybitsize == 192 || keybitsize == 256)
+       if (keybitsize == 128 || keybitsize == 192 || keybitsize == 256) {
                clrkeylen = keybitsize / 8;
-       else {
+       else {
                DEBUG_ERR(
                        "%s unknown/unsupported keybitsize %d\n",
                        __func__, keybitsize);
@@ -1233,7 +1232,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
                        __func__, rc);
                goto out;
        }
-       kb = (struct ep11keyblob *) kek;
+       kb = (struct ep11keyblob *)kek;
        memset(&kb->head, 0, sizeof(kb->head));
 
        /* Step 2: encrypt clear key value with the kek key */
@@ -1282,17 +1281,17 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
        struct ep11kblob_header *hdr;
 
        /* key with or without header ? */
-       hdr = (struct ep11kblob_header *) keyblob;
-       if (hdr->type == TOKTYPE_NON_CCA
-           && (hdr->version == TOKVER_EP11_AES_WITH_HEADER
-               || hdr->version == TOKVER_EP11_ECC_WITH_HEADER)
-           && is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) {
+       hdr = (struct ep11kblob_header *)keyblob;
+       if (hdr->type == TOKTYPE_NON_CCA &&
+           (hdr->version == TOKVER_EP11_AES_WITH_HEADER ||
+            hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
+           is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) {
                /* EP11 AES or ECC key with header */
                key = keyblob + sizeof(struct ep11kblob_header);
                keylen = hdr->len - sizeof(struct ep11kblob_header);
-       } else if (hdr->type == TOKTYPE_NON_CCA
-                  && hdr->version == TOKVER_EP11_AES
-                  && is_ep11_keyblob(keyblob)) {
+       } else if (hdr->type == TOKTYPE_NON_CCA &&
+                  hdr->version == TOKVER_EP11_AES &&
+                  is_ep11_keyblob(keyblob)) {
                /* EP11 AES key (old style) */
                key = keyblob;
                keylen = hdr->len;
@@ -1300,8 +1299,9 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
                /* raw EP11 key blob */
                key = keyblob;
                keylen = keybloblen;
-       } else
+       } else {
                return -EINVAL;
+       }
 
        /* alloc temp working buffer */
        wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1));
@@ -1318,12 +1318,12 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
                        __func__, rc);
                goto out;
        }
-       wki = (struct wk_info *) wkbuf;
+       wki = (struct wk_info *)wkbuf;
 
        /* check struct version and pkey type */
        if (wki->version != 1 || wki->pkeytype < 1 || wki->pkeytype > 5) {
                DEBUG_ERR("%s wk info version %d or pkeytype %d mismatch.\n",
-                         __func__, (int) wki->version, (int) wki->pkeytype);
+                         __func__, (int)wki->version, (int)wki->pkeytype);
                rc = -EIO;
                goto out;
        }
@@ -1332,24 +1332,24 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
        switch (wki->pkeytype) {
        case 1: /* AES */
                switch (wki->pkeysize) {
-               case 16+32:
+               case 16 + 32:
                        /* AES 128 protected key */
                        if (protkeytype)
                                *protkeytype = PKEY_KEYTYPE_AES_128;
                        break;
-               case 24+32:
+               case 24 + 32:
                        /* AES 192 protected key */
                        if (protkeytype)
                                *protkeytype = PKEY_KEYTYPE_AES_192;
                        break;
-               case 32+32:
+               case 32 + 32:
                        /* AES 256 protected key */
                        if (protkeytype)
                                *protkeytype = PKEY_KEYTYPE_AES_256;
                        break;
                default:
                        DEBUG_ERR("%s unknown/unsupported AES pkeysize %d\n",
-                                 __func__, (int) wki->pkeysize);
+                                 __func__, (int)wki->pkeysize);
                        rc = -EIO;
                        goto out;
                }
@@ -1363,7 +1363,7 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
        case 2: /* TDES */
        default:
                DEBUG_ERR("%s unknown/unsupported key type %d\n",
-                         __func__, (int) wki->pkeytype);
+                         __func__, (int)wki->pkeytype);
                rc = -EIO;
                goto out;
        }
@@ -1445,7 +1445,7 @@ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain,
                }
                /* apqn passed all filtering criterons, add to the array */
                if (_nr_apqns < 256)
-                       _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16) dom);
+                       _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16)dom);
        }
 
        /* nothing found ? */
index 1e02b197c003522677ab6857265ce8915d41a342..07445041869feeca3b246e243b1ff1081dadc799 100644 (file)
@@ -50,7 +50,7 @@ struct ep11keyblob {
 /* check ep11 key magic to find out if this is an ep11 key blob */
 static inline bool is_ep11_keyblob(const u8 *key)
 {
-       struct ep11keyblob *kb = (struct ep11keyblob *) key;
+       struct ep11keyblob *kb = (struct ep11keyblob *)key;
 
        return (kb->version == EP11_STRUCT_MAGIC);
 }
index 8b0ce600b749e0999333a56866ec285754a02a27..d36177e65a3ddc9fb2b667eb32c790488b8e484a 100644 (file)
@@ -121,10 +121,11 @@ static inline int convert_error(struct zcrypt_queue *zq,
                        ZCRYPT_DBF_WARN(
                                "%s dev=%02x.%04x RY=0x%02x apfs=0x%x => bus rescan, rc=EAGAIN\n",
                                __func__, card, queue, ehdr->reply_code, apfs);
-               } else
+               } else {
                        ZCRYPT_DBF_WARN("%s dev=%02x.%04x RY=0x%02x => bus rescan, rc=EAGAIN\n",
                                        __func__, card, queue,
                                        ehdr->reply_code);
+               }
                return -EAGAIN;
        default:
                /* Assume request is valid and a retry will be worth it */
index 259145aa393f8c080c630868d104abf0d80b6576..7d245645fdd57548930e64422f42236281b9224b 100644 (file)
@@ -158,7 +158,6 @@ struct type80_hdr {
 
 int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode)
 {
-
        if (!mex->inputdatalength)
                return -EINVAL;
 
@@ -174,7 +173,6 @@ int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode)
 
 int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode)
 {
-
        if (!crt->inputdatalength)
                return -EINVAL;
 
@@ -239,8 +237,9 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
                mod = meb3->modulus + sizeof(meb3->modulus) - mod_len;
                exp = meb3->exponent + sizeof(meb3->exponent) - mod_len;
                inp = meb3->message + sizeof(meb3->message) - mod_len;
-       } else
+       } else {
                return -EINVAL;
+       }
 
        if (copy_from_user(mod, mex->n_modulus, mod_len) ||
            copy_from_user(exp, mex->b_key, mod_len) ||
@@ -323,8 +322,9 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
                dq = crb3->dq + sizeof(crb3->dq) - short_len;
                u = crb3->u + sizeof(crb3->u) - short_len;
                inp = crb3->message + sizeof(crb3->message) - mod_len;
-       } else
+       } else {
                return -EINVAL;
+       }
 
        /*
         * correct the offset of p, bp and mult_inv according zcrypt.h
@@ -392,7 +392,7 @@ static int convert_response_cex2a(struct zcrypt_queue *zq,
                                  unsigned int outputdatalength)
 {
        /* Response type byte is the second byte in the response. */
-       unsigned char rtype = ((unsigned char *) reply->msg)[1];
+       unsigned char rtype = ((unsigned char *)reply->msg)[1];
 
        switch (rtype) {
        case TYPE82_RSP_CODE:
@@ -406,11 +406,11 @@ static int convert_response_cex2a(struct zcrypt_queue *zq,
                pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                       AP_QID_CARD(zq->queue->qid),
                       AP_QID_QUEUE(zq->queue->qid),
-                      (int) rtype);
+                      (int)rtype);
                ZCRYPT_DBF_ERR(
                        "%s dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                        __func__, AP_QID_CARD(zq->queue->qid),
-                       AP_QID_QUEUE(zq->queue->qid), (int) rtype);
+                       AP_QID_QUEUE(zq->queue->qid), (int)rtype);
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
@@ -447,10 +447,11 @@ static void zcrypt_cex2a_receive(struct ap_queue *aq,
                        memcpy(msg->msg, reply->msg, len);
                        msg->len = len;
                }
-       } else
+       } else {
                memcpy(msg->msg, reply->msg, sizeof(error_reply));
+       }
 out:
-       complete((struct completion *) msg->private);
+       complete((struct completion *)msg->private);
 }
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
@@ -475,7 +476,7 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_cex2a_receive;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                atomic_inc_return(&zcrypt_step);
        ap_msg->private = &work;
        rc = ICAMEX_msg_to_type50MEX_msg(zq, ap_msg, mex);
@@ -492,9 +493,11 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
                        rc = convert_response_cex2a(zq, ap_msg,
                                                    mex->outputdata,
                                                    mex->outputdatalength);
-       } else
+       } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
+       }
+
 out:
        ap_msg->private = NULL;
        if (rc)
@@ -524,7 +527,7 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_cex2a_receive;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                atomic_inc_return(&zcrypt_step);
        ap_msg->private = &work;
        rc = ICACRT_msg_to_type50CRT_msg(zq, ap_msg, crt);
@@ -541,9 +544,11 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
                        rc = convert_response_cex2a(zq, ap_msg,
                                                    crt->outputdata,
                                                    crt->outputdatalength);
-       } else
+       } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
+       }
+
 out:
        ap_msg->private = NULL;
        if (rc)
index 57d885158cf000a3eda3cb99abf233be806f3d81..8fb34b8eeb189a3bb227f8378c113c5c5a82838e 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- *  Copyright IBM Corp. 2001, 2012
+ *  Copyright IBM Corp. 2001, 2022
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
 
 #define CEXXC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply      */
 
-#define CEIL4(x) ((((x)+3)/4)*4)
+#define CEIL4(x) ((((x) + 3) / 4) * 4)
 
 struct response_type {
        struct completion work;
        int type;
 };
+
 #define CEXXC_RESPONSE_TYPE_ICA  0
 #define CEXXC_RESPONSE_TYPE_XCRB 1
 #define CEXXC_RESPONSE_TYPE_EP11 2
@@ -44,63 +45,6 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
                   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-/*
- * CPRB
- *       Note that all shorts, ints and longs are little-endian.
- *       All pointer fields are 32-bits long, and mean nothing
- *
- *       A request CPRB is followed by a request_parameter_block.
- *
- *       The request (or reply) parameter block is organized thus:
- *         function code
- *         VUD block
- *         key block
- */
-struct CPRB {
-       unsigned short cprb_len;        /* CPRB length                   */
-       unsigned char cprb_ver_id;      /* CPRB version id.              */
-       unsigned char pad_000;          /* Alignment pad byte.           */
-       unsigned char srpi_rtcode[4];   /* SRPI return code LELONG       */
-       unsigned char srpi_verb;        /* SRPI verb type                */
-       unsigned char flags;            /* flags                         */
-       unsigned char func_id[2];       /* function id                   */
-       unsigned char checkpoint_flag;  /*                               */
-       unsigned char resv2;            /* reserved                      */
-       unsigned short req_parml;       /* request parameter buffer      */
-                                       /* length 16-bit little endian   */
-       unsigned char req_parmp[4];     /* request parameter buffer      *
-                                        * pointer (means nothing: the   *
-                                        * parameter buffer follows      *
-                                        * the CPRB).                    */
-       unsigned char req_datal[4];     /* request data buffer           */
-                                       /* length         ULELONG        */
-       unsigned char req_datap[4];     /* request data buffer           */
-                                       /* pointer                       */
-       unsigned short rpl_parml;       /* reply  parameter buffer       */
-                                       /* length 16-bit little endian   */
-       unsigned char pad_001[2];       /* Alignment pad bytes. ULESHORT */
-       unsigned char rpl_parmp[4];     /* reply parameter buffer        *
-                                        * pointer (means nothing: the   *
-                                        * parameter buffer follows      *
-                                        * the CPRB).                    */
-       unsigned char rpl_datal[4];     /* reply data buffer len ULELONG */
-       unsigned char rpl_datap[4];     /* reply data buffer             */
-                                       /* pointer                       */
-       unsigned short ccp_rscode;      /* server reason code   ULESHORT */
-       unsigned short ccp_rtcode;      /* server return code   ULESHORT */
-       unsigned char repd_parml[2];    /* replied parameter len ULESHORT*/
-       unsigned char mac_data_len[2];  /* Mac Data Length      ULESHORT */
-       unsigned char repd_datal[4];    /* replied data length  ULELONG  */
-       unsigned char req_pc[2];        /* PC identifier                 */
-       unsigned char res_origin[8];    /* resource origin               */
-       unsigned char mac_value[8];     /* Mac Value                     */
-       unsigned char logon_id[8];      /* Logon Identifier              */
-       unsigned char usage_domain[2];  /* cdx                           */
-       unsigned char resv3[18];        /* reserved for requestor        */
-       unsigned short svr_namel;       /* server name length  ULESHORT  */
-       unsigned char svr_name[8];      /* server name                   */
-} __packed;
-
 struct function_and_rules_block {
        unsigned char function_code[2];
        unsigned short ulen;
@@ -235,7 +179,6 @@ int speed_idx_ep11(int req_type)
        }
 }
 
-
 /*
  * Convert a ICAMEX message to a type6 MEX message.
  *
@@ -245,7 +188,7 @@ int speed_idx_ep11(int req_type)
  *
  * Returns 0 on success or negative errno value.
  */
-static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
+static int icamex_msg_to_type6mex_msgx(struct zcrypt_queue *zq,
                                       struct ap_message *ap_msg,
                                       struct ica_rsa_modexpo *mex)
 {
@@ -283,19 +226,19 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
                return -EFAULT;
 
        /* Set up key which is located after the variable length text. */
-       size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength);
+       size = zcrypt_type6_mex_key_en(mex, msg->text + mex->inputdatalength);
        if (size < 0)
                return size;
        size += sizeof(*msg) + mex->inputdatalength;
 
        /* message header, cprbx and f&r */
        msg->hdr = static_type6_hdrX;
-       msg->hdr.ToCardLen1 = size - sizeof(msg->hdr);
-       msg->hdr.FromCardLen1 = CEXXC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
+       msg->hdr.tocardlen1 = size - sizeof(msg->hdr);
+       msg->hdr.fromcardlen1 = CEXXC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
 
        msg->cprbx = static_cprbx;
        msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
-       msg->cprbx.rpl_msgbl = msg->hdr.FromCardLen1;
+       msg->cprbx.rpl_msgbl = msg->hdr.fromcardlen1;
 
        msg->fr = static_pke_fnr;
 
@@ -314,7 +257,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
  *
  * Returns 0 on success or negative errno value.
  */
-static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
+static int icacrt_msg_to_type6crt_msgx(struct zcrypt_queue *zq,
                                       struct ap_message *ap_msg,
                                       struct ica_rsa_modexpo_crt *crt)
 {
@@ -360,8 +303,8 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
 
        /* message header, cprbx and f&r */
        msg->hdr = static_type6_hdrX;
-       msg->hdr.ToCardLen1 = size -  sizeof(msg->hdr);
-       msg->hdr.FromCardLen1 = CEXXC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
+       msg->hdr.tocardlen1 = size -  sizeof(msg->hdr);
+       msg->hdr.fromcardlen1 = CEXXC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
 
        msg->cprbx = static_cprbx;
        msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid);
@@ -388,8 +331,8 @@ struct type86_fmt2_msg {
        struct type86_fmt2_ext fmt2;
 } __packed;
 
-static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg,
-                                     struct ica_xcRB *xcRB,
+static int xcrb_msg_to_type6cprb_msgx(bool userspace, struct ap_message *ap_msg,
+                                     struct ica_xcRB *xcrb,
                                      unsigned int *fcode,
                                      unsigned short **dom)
 {
@@ -402,19 +345,19 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg,
                struct CPRBX cprbx;
        } __packed * msg = ap_msg->msg;
 
-       int rcblen = CEIL4(xcRB->request_control_blk_length);
+       int rcblen = CEIL4(xcrb->request_control_blk_length);
        int req_sumlen, resp_sumlen;
        char *req_data = ap_msg->msg + sizeof(struct type6_hdr) + rcblen;
        char *function_code;
 
-       if (CEIL4(xcRB->request_control_blk_length) <
-                       xcRB->request_control_blk_length)
+       if (CEIL4(xcrb->request_control_blk_length) <
+                       xcrb->request_control_blk_length)
                return -EINVAL; /* overflow after alignment*/
 
        /* length checks */
        ap_msg->len = sizeof(struct type6_hdr) +
-               CEIL4(xcRB->request_control_blk_length) +
-               xcRB->request_data_length;
+               CEIL4(xcrb->request_control_blk_length) +
+               xcrb->request_data_length;
        if (ap_msg->len > ap_msg->bufsize)
                return -EINVAL;
 
@@ -422,48 +365,49 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg,
         * Overflow check
         * sum must be greater (or equal) than the largest operand
         */
-       req_sumlen = CEIL4(xcRB->request_control_blk_length) +
-                       xcRB->request_data_length;
-       if ((CEIL4(xcRB->request_control_blk_length) <=
-                                               xcRB->request_data_length) ?
-               (req_sumlen < xcRB->request_data_length) :
-               (req_sumlen < CEIL4(xcRB->request_control_blk_length))) {
+       req_sumlen = CEIL4(xcrb->request_control_blk_length) +
+                       xcrb->request_data_length;
+       if ((CEIL4(xcrb->request_control_blk_length) <=
+            xcrb->request_data_length) ?
+           req_sumlen < xcrb->request_data_length :
+           req_sumlen < CEIL4(xcrb->request_control_blk_length)) {
                return -EINVAL;
        }
 
-       if (CEIL4(xcRB->reply_control_blk_length) <
-                       xcRB->reply_control_blk_length)
+       if (CEIL4(xcrb->reply_control_blk_length) <
+                       xcrb->reply_control_blk_length)
                return -EINVAL; /* overflow after alignment*/
 
        /*
         * Overflow check
         * sum must be greater (or equal) than the largest operand
         */
-       resp_sumlen = CEIL4(xcRB->reply_control_blk_length) +
-                       xcRB->reply_data_length;
-       if ((CEIL4(xcRB->reply_control_blk_length) <= xcRB->reply_data_length) ?
-               (resp_sumlen < xcRB->reply_data_length) :
-               (resp_sumlen < CEIL4(xcRB->reply_control_blk_length))) {
+       resp_sumlen = CEIL4(xcrb->reply_control_blk_length) +
+                       xcrb->reply_data_length;
+       if ((CEIL4(xcrb->reply_control_blk_length) <=
+            xcrb->reply_data_length) ?
+           resp_sumlen < xcrb->reply_data_length :
+           resp_sumlen < CEIL4(xcrb->reply_control_blk_length)) {
                return -EINVAL;
        }
 
        /* prepare type6 header */
        msg->hdr = static_type6_hdrX;
-       memcpy(msg->hdr.agent_id, &(xcRB->agent_ID), sizeof(xcRB->agent_ID));
-       msg->hdr.ToCardLen1 = xcRB->request_control_blk_length;
-       if (xcRB->request_data_length) {
+       memcpy(msg->hdr.agent_id, &xcrb->agent_ID, sizeof(xcrb->agent_ID));
+       msg->hdr.tocardlen1 = xcrb->request_control_blk_length;
+       if (xcrb->request_data_length) {
                msg->hdr.offset2 = msg->hdr.offset1 + rcblen;
-               msg->hdr.ToCardLen2 = xcRB->request_data_length;
+               msg->hdr.tocardlen2 = xcrb->request_data_length;
        }
-       msg->hdr.FromCardLen1 = xcRB->reply_control_blk_length;
-       msg->hdr.FromCardLen2 = xcRB->reply_data_length;
+       msg->hdr.fromcardlen1 = xcrb->reply_control_blk_length;
+       msg->hdr.fromcardlen2 = xcrb->reply_data_length;
 
        /* prepare CPRB */
-       if (z_copy_from_user(userspace, &(msg->cprbx), xcRB->request_control_blk_addr,
-                            xcRB->request_control_blk_length))
+       if (z_copy_from_user(userspace, &msg->cprbx, xcrb->request_control_blk_addr,
+                            xcrb->request_control_blk_length))
                return -EFAULT;
        if (msg->cprbx.cprb_len + sizeof(msg->hdr.function_code) >
-           xcRB->request_control_blk_length)
+           xcrb->request_control_blk_length)
                return -EINVAL;
        function_code = ((unsigned char *)&msg->cprbx) + msg->cprbx.cprb_len;
        memcpy(msg->hdr.function_code, function_code,
@@ -473,8 +417,8 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg,
        *dom = (unsigned short *)&msg->cprbx.domain;
 
        /* check subfunction, US and AU need special flag with NQAP */
-       if (memcmp(function_code, "US", 2) == 0
-           || memcmp(function_code, "AU", 2) == 0)
+       if (memcmp(function_code, "US", 2) == 0 ||
+           memcmp(function_code, "AU", 2) == 0)
                ap_msg->flags |= AP_MSG_FLAG_SPECIAL;
 
 #ifdef CONFIG_ZCRYPT_DEBUG
@@ -500,16 +444,16 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg,
        }
 
        /* copy data block */
-       if (xcRB->request_data_length &&
-           z_copy_from_user(userspace, req_data, xcRB->request_data_address,
-                            xcRB->request_data_length))
+       if (xcrb->request_data_length &&
+           z_copy_from_user(userspace, req_data, xcrb->request_data_address,
+                            xcrb->request_data_length))
                return -EFAULT;
 
        return 0;
 }
 
 static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap_msg,
-                                          struct ep11_urb *xcRB,
+                                          struct ep11_urb *xcrb,
                                           unsigned int *fcode,
                                           unsigned int *domain)
 {
@@ -539,25 +483,25 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap
                unsigned int    dom_val;        /* domain id       */
        } __packed * payload_hdr = NULL;
 
-       if (CEIL4(xcRB->req_len) < xcRB->req_len)
+       if (CEIL4(xcrb->req_len) < xcrb->req_len)
                return -EINVAL; /* overflow after alignment*/
 
        /* length checks */
-       ap_msg->len = sizeof(struct type6_hdr) + CEIL4(xcRB->req_len);
+       ap_msg->len = sizeof(struct type6_hdr) + CEIL4(xcrb->req_len);
        if (ap_msg->len > ap_msg->bufsize)
                return -EINVAL;
 
-       if (CEIL4(xcRB->resp_len) < xcRB->resp_len)
+       if (CEIL4(xcrb->resp_len) < xcrb->resp_len)
                return -EINVAL; /* overflow after alignment*/
 
        /* prepare type6 header */
        msg->hdr = static_type6_ep11_hdr;
-       msg->hdr.ToCardLen1   = xcRB->req_len;
-       msg->hdr.FromCardLen1 = xcRB->resp_len;
+       msg->hdr.tocardlen1   = xcrb->req_len;
+       msg->hdr.fromcardlen1 = xcrb->resp_len;
 
        /* Import CPRB data from the ioctl input parameter */
-       if (z_copy_from_user(userspace, &(msg->cprbx.cprb_len),
-                            (char __force __user *)xcRB->req, xcRB->req_len)) {
+       if (z_copy_from_user(userspace, &msg->cprbx.cprb_len,
+                            (char __force __user *)xcrb->req, xcrb->req_len)) {
                return -EFAULT;
        }
 
@@ -575,7 +519,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap
        } else {
                lfmt = 1; /* length format #1 */
        }
-       payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
+       payload_hdr = (struct pld_hdr *)((&msg->pld_lenfmt) + lfmt);
        *fcode = payload_hdr->func_val & 0xFFFF;
 
        /* enable special processing based on the cprbs flags special bit */
@@ -624,9 +568,9 @@ struct type86_ep11_reply {
 } __packed;
 
 static int convert_type86_ica(struct zcrypt_queue *zq,
-                         struct ap_message *reply,
-                         char __user *outputdata,
-                         unsigned int outputdatalength)
+                             struct ap_message *reply,
+                             char __user *outputdata,
+                             unsigned int outputdatalength)
 {
        static unsigned char static_pad[] = {
                0x00, 0x02,
@@ -679,18 +623,18 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
                        ZCRYPT_DBF_WARN("%s dev=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n",
                                        __func__, AP_QID_CARD(zq->queue->qid),
                                        AP_QID_QUEUE(zq->queue->qid),
-                                       (int) service_rc, (int) service_rs);
+                                       (int)service_rc, (int)service_rs);
                        return -EINVAL;
                }
                zq->online = 0;
                pr_err("Crypto dev=%02x.%04x rc/rs=%d/%d online=0 rc=EAGAIN\n",
                       AP_QID_CARD(zq->queue->qid),
                       AP_QID_QUEUE(zq->queue->qid),
-                      (int) service_rc, (int) service_rs);
+                      (int)service_rc, (int)service_rs);
                ZCRYPT_DBF_ERR("%s dev=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n",
                               __func__, AP_QID_CARD(zq->queue->qid),
                               AP_QID_QUEUE(zq->queue->qid),
-                              (int) service_rc, (int) service_rs);
+                              (int)service_rc, (int)service_rs);
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
@@ -729,42 +673,42 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
  *
  * @zq: crypto device pointer
  * @reply: reply AP message.
- * @xcRB: pointer to XCRB
+ * @xcrb: pointer to XCRB
  *
  * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
  */
 static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
                               struct ap_message *reply,
-                              struct ica_xcRB *xcRB)
+                              struct ica_xcRB *xcrb)
 {
        struct type86_fmt2_msg *msg = reply->msg;
        char *data = reply->msg;
 
        /* Copy CPRB to user */
-       if (xcRB->reply_control_blk_length < msg->fmt2.count1) {
+       if (xcrb->reply_control_blk_length < msg->fmt2.count1) {
                ZCRYPT_DBF_DBG("%s reply_control_blk_length %u < required %u => EMSGSIZE\n",
-                              __func__, xcRB->reply_control_blk_length,
+                              __func__, xcrb->reply_control_blk_length,
                               msg->fmt2.count1);
                return -EMSGSIZE;
        }
-       if (z_copy_to_user(userspace, xcRB->reply_control_blk_addr,
+       if (z_copy_to_user(userspace, xcrb->reply_control_blk_addr,
                           data + msg->fmt2.offset1, msg->fmt2.count1))
                return -EFAULT;
-       xcRB->reply_control_blk_length = msg->fmt2.count1;
+       xcrb->reply_control_blk_length = msg->fmt2.count1;
 
        /* Copy data buffer to user */
        if (msg->fmt2.count2) {
-               if (xcRB->reply_data_length < msg->fmt2.count2) {
+               if (xcrb->reply_data_length < msg->fmt2.count2) {
                        ZCRYPT_DBF_DBG("%s reply_data_length %u < required %u => EMSGSIZE\n",
-                                      __func__, xcRB->reply_data_length,
+                                      __func__, xcrb->reply_data_length,
                                       msg->fmt2.count2);
                        return -EMSGSIZE;
                }
-               if (z_copy_to_user(userspace, xcRB->reply_data_addr,
+               if (z_copy_to_user(userspace, xcrb->reply_data_addr,
                                   data + msg->fmt2.offset2, msg->fmt2.count2))
                        return -EFAULT;
        }
-       xcRB->reply_data_length = msg->fmt2.count2;
+       xcrb->reply_data_length = msg->fmt2.count2;
 
        return 0;
 }
@@ -774,35 +718,35 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
  *
  * @zq: crypto device pointer
  * @reply: reply AP message.
- * @xcRB: pointer to EP11 user request block
+ * @xcrb: pointer to EP11 user request block
  *
  * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
  */
 static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
                                    struct ap_message *reply,
-                                   struct ep11_urb *xcRB)
+                                   struct ep11_urb *xcrb)
 {
        struct type86_fmt2_msg *msg = reply->msg;
        char *data = reply->msg;
 
-       if (xcRB->resp_len < msg->fmt2.count1) {
+       if (xcrb->resp_len < msg->fmt2.count1) {
                ZCRYPT_DBF_DBG("%s resp_len %u < required %u => EMSGSIZE\n",
-                              __func__, (unsigned int)xcRB->resp_len,
+                              __func__, (unsigned int)xcrb->resp_len,
                               msg->fmt2.count1);
                return -EMSGSIZE;
        }
 
        /* Copy response CPRB to user */
-       if (z_copy_to_user(userspace, (char __force __user *)xcRB->resp,
+       if (z_copy_to_user(userspace, (char __force __user *)xcrb->resp,
                           data + msg->fmt2.offset1, msg->fmt2.count1))
                return -EFAULT;
-       xcRB->resp_len = msg->fmt2.count1;
+       xcrb->resp_len = msg->fmt2.count1;
        return 0;
 }
 
 static int convert_type86_rng(struct zcrypt_queue *zq,
-                         struct ap_message *reply,
-                         char *buffer)
+                             struct ap_message *reply,
+                             char *buffer)
 {
        struct {
                struct type86_hdr hdr;
@@ -818,9 +762,9 @@ static int convert_type86_rng(struct zcrypt_queue *zq,
 }
 
 static int convert_response_ica(struct zcrypt_queue *zq,
-                           struct ap_message *reply,
-                           char __user *outputdata,
-                           unsigned int outputdatalength)
+                               struct ap_message *reply,
+                               char __user *outputdata,
+                               unsigned int outputdatalength)
 {
        struct type86x_reply *msg = reply->msg;
 
@@ -830,13 +774,14 @@ static int convert_response_ica(struct zcrypt_queue *zq,
                return convert_error(zq, reply);
        case TYPE86_RSP_CODE:
                if (msg->cprbx.ccp_rtcode &&
-                  (msg->cprbx.ccp_rscode == 0x14f) &&
-                  (outputdatalength > 256)) {
+                   msg->cprbx.ccp_rscode == 0x14f &&
+                   outputdatalength > 256) {
                        if (zq->zcard->max_exp_bit_length <= 17) {
                                zq->zcard->max_exp_bit_length = 17;
                                return -EAGAIN;
-                       } else
+                       } else {
                                return -EINVAL;
+                       }
                }
                if (msg->hdr.reply_code)
                        return convert_error(zq, reply);
@@ -850,11 +795,11 @@ static int convert_response_ica(struct zcrypt_queue *zq,
                pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                       AP_QID_CARD(zq->queue->qid),
                       AP_QID_QUEUE(zq->queue->qid),
-                      (int) msg->hdr.type);
+                      (int)msg->hdr.type);
                ZCRYPT_DBF_ERR(
                        "%s dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                        __func__, AP_QID_CARD(zq->queue->qid),
-                       AP_QID_QUEUE(zq->queue->qid), (int) msg->hdr.type);
+                       AP_QID_QUEUE(zq->queue->qid), (int)msg->hdr.type);
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
@@ -862,41 +807,41 @@ static int convert_response_ica(struct zcrypt_queue *zq,
 
 static int convert_response_xcrb(bool userspace, struct zcrypt_queue *zq,
                                 struct ap_message *reply,
-                                struct ica_xcRB *xcRB)
+                                struct ica_xcRB *xcrb)
 {
        struct type86x_reply *msg = reply->msg;
 
        switch (msg->hdr.type) {
        case TYPE82_RSP_CODE:
        case TYPE88_RSP_CODE:
-               xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
+               xcrb->status = 0x0008044DL; /* HDD_InvalidParm */
                return convert_error(zq, reply);
        case TYPE86_RSP_CODE:
                if (msg->hdr.reply_code) {
-                       memcpy(&(xcRB->status), msg->fmt2.apfs, sizeof(u32));
+                       memcpy(&xcrb->status, msg->fmt2.apfs, sizeof(u32));
                        return convert_error(zq, reply);
                }
                if (msg->cprbx.cprb_ver_id == 0x02)
-                       return convert_type86_xcrb(userspace, zq, reply, xcRB);
+                       return convert_type86_xcrb(userspace, zq, reply, xcrb);
                fallthrough;    /* wrong cprb version is an unknown response */
        default: /* Unknown response type, this should NEVER EVER happen */
-               xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
+               xcrb->status = 0x0008044DL; /* HDD_InvalidParm */
                zq->online = 0;
                pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                       AP_QID_CARD(zq->queue->qid),
                       AP_QID_QUEUE(zq->queue->qid),
-                      (int) msg->hdr.type);
+                      (int)msg->hdr.type);
                ZCRYPT_DBF_ERR(
                        "%s dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                        __func__, AP_QID_CARD(zq->queue->qid),
-                       AP_QID_QUEUE(zq->queue->qid), (int) msg->hdr.type);
+                       AP_QID_QUEUE(zq->queue->qid), (int)msg->hdr.type);
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
 }
 
 static int convert_response_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
-                                     struct ap_message *reply, struct ep11_urb *xcRB)
+                                     struct ap_message *reply, struct ep11_urb *xcrb)
 {
        struct type86_ep11_reply *msg = reply->msg;
 
@@ -908,26 +853,26 @@ static int convert_response_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
                if (msg->hdr.reply_code)
                        return convert_error(zq, reply);
                if (msg->cprbx.cprb_ver_id == 0x04)
-                       return convert_type86_ep11_xcrb(userspace, zq, reply, xcRB);
+                       return convert_type86_ep11_xcrb(userspace, zq, reply, xcrb);
                fallthrough;    /* wrong cprb version is an unknown resp */
        default: /* Unknown response type, this should NEVER EVER happen */
                zq->online = 0;
                pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                       AP_QID_CARD(zq->queue->qid),
                       AP_QID_QUEUE(zq->queue->qid),
-                      (int) msg->hdr.type);
+                      (int)msg->hdr.type);
                ZCRYPT_DBF_ERR(
                        "%s dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                        __func__, AP_QID_CARD(zq->queue->qid),
-                       AP_QID_QUEUE(zq->queue->qid), (int) msg->hdr.type);
+                       AP_QID_QUEUE(zq->queue->qid), (int)msg->hdr.type);
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
 }
 
 static int convert_response_rng(struct zcrypt_queue *zq,
-                                struct ap_message *reply,
-                                char *data)
+                               struct ap_message *reply,
+                               char *data)
 {
        struct type86x_reply *msg = reply->msg;
 
@@ -946,11 +891,11 @@ static int convert_response_rng(struct zcrypt_queue *zq,
                pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                       AP_QID_CARD(zq->queue->qid),
                       AP_QID_QUEUE(zq->queue->qid),
-                      (int) msg->hdr.type);
+                      (int)msg->hdr.type);
                ZCRYPT_DBF_ERR(
                        "%s dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
                        __func__, AP_QID_CARD(zq->queue->qid),
-                       AP_QID_QUEUE(zq->queue->qid), (int) msg->hdr.type);
+                       AP_QID_QUEUE(zq->queue->qid), (int)msg->hdr.type);
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
@@ -965,15 +910,15 @@ static int convert_response_rng(struct zcrypt_queue *zq,
  * @reply: pointer to the AP reply message
  */
 static void zcrypt_msgtype6_receive(struct ap_queue *aq,
-                                 struct ap_message *msg,
-                                 struct ap_message *reply)
+                                   struct ap_message *msg,
+                                   struct ap_message *reply)
 {
        static struct error_hdr error_reply = {
                .type = TYPE82_RSP_CODE,
                .reply_code = REP82_ERROR_MACHINE_FAILURE,
        };
        struct response_type *resp_type =
-               (struct response_type *) msg->private;
+               (struct response_type *)msg->private;
        struct type86x_reply *t86r;
        int len;
 
@@ -982,7 +927,7 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq,
                goto out;       /* ap_msg->rc indicates the error */
        t86r = reply->msg;
        if (t86r->hdr.type == TYPE86_RSP_CODE &&
-                t86r->cprbx.cprb_ver_id == 0x02) {
+           t86r->cprbx.cprb_ver_id == 0x02) {
                switch (resp_type->type) {
                case CEXXC_RESPONSE_TYPE_ICA:
                        len = sizeof(struct type86x_reply) + t86r->length - 2;
@@ -1005,10 +950,11 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq,
                default:
                        memcpy(msg->msg, &error_reply, sizeof(error_reply));
                }
-       } else
+       } else {
                memcpy(msg->msg, reply->msg, sizeof(error_reply));
+       }
 out:
-       complete(&(resp_type->work));
+       complete(&resp_type->work);
 }
 
 /*
@@ -1055,7 +1001,7 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq,
                memcpy(msg->msg, reply->msg, sizeof(error_reply));
        }
 out:
-       complete(&(resp_type->work));
+       complete(&resp_type->work);
 }
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
@@ -1076,15 +1022,15 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
        };
        int rc;
 
-       ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL);
+       ap_msg->msg = (void *)get_zeroed_page(GFP_KERNEL);
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->bufsize = PAGE_SIZE;
        ap_msg->receive = zcrypt_msgtype6_receive;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                atomic_inc_return(&zcrypt_step);
        ap_msg->private = &resp_type;
-       rc = ICAMEX_msg_to_type6MEX_msgX(zq, ap_msg, mex);
+       rc = icamex_msg_to_type6mex_msgx(zq, ap_msg, mex);
        if (rc)
                goto out_free;
        init_completion(&resp_type.work);
@@ -1098,11 +1044,13 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
                        rc = convert_response_ica(zq, ap_msg,
                                                  mex->outputdata,
                                                  mex->outputdatalength);
-       } else
+       } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
+       }
+
 out_free:
-       free_page((unsigned long) ap_msg->msg);
+       free_page((unsigned long)ap_msg->msg);
        ap_msg->private = NULL;
        ap_msg->msg = NULL;
        return rc;
@@ -1124,15 +1072,15 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
        };
        int rc;
 
-       ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL);
+       ap_msg->msg = (void *)get_zeroed_page(GFP_KERNEL);
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->bufsize = PAGE_SIZE;
        ap_msg->receive = zcrypt_msgtype6_receive;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                atomic_inc_return(&zcrypt_step);
        ap_msg->private = &resp_type;
-       rc = ICACRT_msg_to_type6CRT_msgX(zq, ap_msg, crt);
+       rc = icacrt_msg_to_type6crt_msgx(zq, ap_msg, crt);
        if (rc)
                goto out_free;
        init_completion(&resp_type.work);
@@ -1150,8 +1098,9 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
        }
+
 out_free:
-       free_page((unsigned long) ap_msg->msg);
+       free_page((unsigned long)ap_msg->msg);
        ap_msg->private = NULL;
        ap_msg->msg = NULL;
        return rc;
@@ -1166,7 +1115,7 @@ out_free:
  * by the caller with ap_init_message(). Also the caller has to
  * make sure ap_release_message() is always called even on failure.
  */
-int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcRB,
+int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb,
                    struct ap_message *ap_msg,
                    unsigned int *func_code, unsigned short **dom)
 {
@@ -1179,12 +1128,12 @@ int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcRB,
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_msgtype6_receive;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                                atomic_inc_return(&zcrypt_step);
        ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
        if (!ap_msg->private)
                return -ENOMEM;
-       return XCRB_msg_to_type6CPRB_msgX(userspace, ap_msg, xcRB, func_code, dom);
+       return xcrb_msg_to_type6cprb_msgx(userspace, ap_msg, xcrb, func_code, dom);
 }
 
 /*
@@ -1192,10 +1141,10 @@ int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcRB,
  * device to handle a send_cprb request.
  * @zq: pointer to zcrypt_queue structure that identifies the
  *     CEXxC device to the request distributor
- * @xcRB: pointer to the send_cprb request buffer
+ * @xcrb: pointer to the send_cprb request buffer
  */
 static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
-                                     struct ica_xcRB *xcRB,
+                                     struct ica_xcRB *xcrb,
                                      struct ap_message *ap_msg)
 {
        int rc;
@@ -1210,11 +1159,11 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
         * Set the queue's reply buffer length minus 128 byte padding
         * as reply limit for the card firmware.
         */
-       msg->hdr.FromCardLen1 = min_t(unsigned int, msg->hdr.FromCardLen1,
+       msg->hdr.fromcardlen1 = min_t(unsigned int, msg->hdr.fromcardlen1,
                                      zq->reply.bufsize - 128);
-       if (msg->hdr.FromCardLen2)
-               msg->hdr.FromCardLen2 =
-                       zq->reply.bufsize - msg->hdr.FromCardLen1 - 128;
+       if (msg->hdr.fromcardlen2)
+               msg->hdr.fromcardlen2 =
+                       zq->reply.bufsize - msg->hdr.fromcardlen1 - 128;
 
        init_completion(&rtype->work);
        rc = ap_queue_message(zq->queue, ap_msg);
@@ -1224,10 +1173,12 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
        if (rc == 0) {
                rc = ap_msg->rc;
                if (rc == 0)
-                       rc = convert_response_xcrb(userspace, zq, ap_msg, xcRB);
-       } else
+                       rc = convert_response_xcrb(userspace, zq, ap_msg, xcrb);
+       } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
+       }
+
 out:
        if (rc)
                ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n",
@@ -1258,7 +1209,7 @@ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb,
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_msgtype6_receive_ep11;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                                atomic_inc_return(&zcrypt_step);
        ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
        if (!ap_msg->private)
@@ -1272,7 +1223,7 @@ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb,
  * device to handle a send_ep11_cprb request.
  * @zq: pointer to zcrypt_queue structure that identifies the
  *       CEX4P device to the request distributor
- * @xcRB: pointer to the ep11 user request block
+ * @xcrb: pointer to the ep11 user request block
  */
 static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *zq,
                                           struct ep11_urb *xcrb,
@@ -1322,7 +1273,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
                } else {
                        lfmt = 1; /* length format #1 */
                }
-               payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
+               payload_hdr = (struct pld_hdr *)((&msg->pld_lenfmt) + lfmt);
                payload_hdr->dom_val = (unsigned int)
                                        AP_QID_QUEUE(zq->queue->qid);
        }
@@ -1331,7 +1282,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
         * Set the queue's reply buffer length minus the two prepend headers
         * as reply limit for the card firmware.
         */
-       msg->hdr.FromCardLen1 = zq->reply.bufsize -
+       msg->hdr.fromcardlen1 = zq->reply.bufsize -
                sizeof(struct type86_hdr) - sizeof(struct type86_fmt2_ext);
 
        init_completion(&rtype->work);
@@ -1343,9 +1294,11 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
                rc = ap_msg->rc;
                if (rc == 0)
                        rc = convert_response_ep11_xcrb(userspace, zq, ap_msg, xcrb);
-       } else
+       } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
+       }
+
 out:
        if (rc)
                ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n",
@@ -1366,13 +1319,13 @@ int prep_rng_ap_msg(struct ap_message *ap_msg, int *func_code,
        if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_msgtype6_receive;
-       ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
+       ap_msg->psmid = (((unsigned long long)current->pid) << 32) +
                                atomic_inc_return(&zcrypt_step);
        ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL);
        if (!ap_msg->private)
                return -ENOMEM;
 
-       rng_type6CPRB_msgX(ap_msg, ZCRYPT_RNG_BUFFER_SIZE, domain);
+       rng_type6cprb_msgx(ap_msg, ZCRYPT_RNG_BUFFER_SIZE, domain);
 
        *func_code = HWRNG;
        return 0;
@@ -1411,9 +1364,10 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq,
                rc = ap_msg->rc;
                if (rc == 0)
                        rc = convert_response_rng(zq, ap_msg, buffer);
-       } else
+       } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
+       }
 out:
        return rc;
 }
index 9da4f4175c44c34be0d24ede059365e511dfb55a..6f5ced8d6cdae66c99e7356ba6929ea8ddb42c2f 100644 (file)
@@ -45,14 +45,14 @@ struct type6_hdr {
        unsigned char reserved5[2];     /* 0x0000                       */
        unsigned char function_code[2]; /* for PKD, 0x5044 (ascii 'PD') */
        unsigned char reserved6[2];     /* 0x0000                       */
-       unsigned int  ToCardLen1;       /* (request CPRB len + 3) & -4  */
-       unsigned int  ToCardLen2;       /* db len 0x00000000 for PKD    */
-       unsigned int  ToCardLen3;       /* 0x00000000                   */
-       unsigned int  ToCardLen4;       /* 0x00000000                   */
-       unsigned int  FromCardLen1;     /* response buffer length       */
-       unsigned int  FromCardLen2;     /* db len 0x00000000 for PKD    */
-       unsigned int  FromCardLen3;     /* 0x00000000                   */
-       unsigned int  FromCardLen4;     /* 0x00000000                   */
+       unsigned int  tocardlen1;       /* (request CPRB len + 3) & -4  */
+       unsigned int  tocardlen2;       /* db len 0x00000000 for PKD    */
+       unsigned int  tocardlen3;       /* 0x00000000                   */
+       unsigned int  tocardlen4;       /* 0x00000000                   */
+       unsigned int  fromcardlen1;     /* response buffer length       */
+       unsigned int  fromcardlen2;     /* db len 0x00000000 for PKD    */
+       unsigned int  fromcardlen3;     /* 0x00000000                   */
+       unsigned int  fromcardlen4;     /* 0x00000000                   */
 } __packed;
 
 /**
@@ -116,7 +116,7 @@ int speed_idx_ep11(int);
  * @ap_dev: AP device pointer
  * @ap_msg: pointer to AP message
  */
-static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
+static inline void rng_type6cprb_msgx(struct ap_message *ap_msg,
                                      unsigned int random_number_length,
                                      unsigned int *domain)
 {
@@ -134,8 +134,8 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
                .offset1        = 0x00000058,
                .agent_id       = {'C', 'A'},
                .function_code  = {'R', 'L'},
-               .ToCardLen1     = sizeof(*msg) - sizeof(msg->hdr),
-               .FromCardLen1   = sizeof(*msg) - sizeof(msg->hdr),
+               .tocardlen1     = sizeof(*msg) - sizeof(msg->hdr),
+               .fromcardlen1   = sizeof(*msg) - sizeof(msg->hdr),
        };
        static struct CPRBX local_cprbx = {
                .cprb_len       = 0x00dc,
@@ -147,9 +147,9 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
        };
 
        msg->hdr = static_type6_hdrX;
-       msg->hdr.FromCardLen2 = random_number_length,
+       msg->hdr.fromcardlen2 = random_number_length;
        msg->cprbx = local_cprbx;
-       msg->cprbx.rpl_datal = random_number_length,
+       msg->cprbx.rpl_datal = random_number_length;
        memcpy(msg->function_code, msg->hdr.function_code, 0x02);
        msg->rule_length = 0x0a;
        memcpy(msg->rule, "RANDOM  ", 8);
index 1552a850a52ede68e854550b8902fc5fdd45ddd1..cdc5a4b2c01943fca455a64a8dc92fdec557f6bc 100644 (file)
@@ -114,7 +114,7 @@ struct zcrypt_queue *zcrypt_queue_alloc(size_t reply_buf_size)
 {
        struct zcrypt_queue *zq;
 
-       zq = kzalloc(sizeof(struct zcrypt_queue), GFP_KERNEL);
+       zq = kzalloc(sizeof(*zq), GFP_KERNEL);
        if (!zq)
                return NULL;
        zq->reply.msg = kmalloc(reply_buf_size, GFP_KERNEL);
index 88abfb5e8045c6135c3e58572f697880c16586e3..8ac213a551418da387e96825acea509001382c28 100644 (file)
@@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo)
                ctcm_clear_busy_do(dev);
        }
 
-       kfree(mpcginfo);
-
        return;
 
 }
@@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
                                                CTCM_FUNTAIL, dev->name);
                        priv->stats.rx_dropped++;
                        /* mpcginfo only used for non-data transfers */
-                       kfree(mpcginfo);
                        if (do_debug_data)
                                ctcmpc_dump_skb(pskb, -8);
                }
+               kfree(mpcginfo);
        }
 done:
 
@@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg)
                }
                break;
        }
-       kfree(mpcginfo);
 
        CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n",
                __func__, ch->id, grp->outstanding_xid2,
@@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg)
                mpc_validate_xid(mpcginfo);
                break;
        }
-       kfree(mpcginfo);
        return;
 }
 
index ded1930a00b2d8f04f7dc8c1f4a2cc20b29dbf47..e3813a7aa5e68ff1d3573e50bc6f624f8ef4cc6f 100644 (file)
@@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev,
        struct ctcm_priv *priv = dev_get_drvdata(dev);
        int rc;
 
-       ndev = priv->channel[CTCM_READ]->netdev;
-       if (!(priv && priv->channel[CTCM_READ] && ndev)) {
+       if (!(priv && priv->channel[CTCM_READ] &&
+             priv->channel[CTCM_READ]->netdev)) {
                CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev");
                return -ENODEV;
        }
+       ndev = priv->channel[CTCM_READ]->netdev;
 
        rc = kstrtouint(buf, 0, &bs1);
        if (rc)
index bab9b34926c6881d1388e6c328aa704b58868613..84c8981317b4602a2695f8eb29f67288eaeceecd 100644 (file)
@@ -1736,10 +1736,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
                        lcs_schedule_recovery(card);
                        break;
                case LCS_CMD_STOPLAN:
-                       pr_warn("Stoplan for %s initiated by LGW\n",
-                               card->dev->name);
-                       if (card->dev)
+                       if (card->dev) {
+                               pr_warn("Stoplan for %s initiated by LGW\n",
+                                       card->dev->name);
                                netif_carrier_off(card->dev);
+                       }
                        break;
                default:
                        LCS_DBF_TEXT(5, trace, "noLGWcmd");
index 37d06f993b761ec4feb2591435f1bd5805997156..1d9be771f3ee0508098d4e3445b345e527827b6e 100644 (file)
@@ -1172,9 +1172,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
        case SCSI_ACCESS_STATE_OPTIMAL:
        case SCSI_ACCESS_STATE_ACTIVE:
        case SCSI_ACCESS_STATE_LBA:
-               return BLK_STS_OK;
        case SCSI_ACCESS_STATE_TRANSITIONING:
-               return BLK_STS_AGAIN;
+               return BLK_STS_OK;
        default:
                req->rq_flags |= RQF_QUIET;
                return BLK_STS_IOERR;
index ef6e8cd8c26ae5adba9ab8b73457103799faed4a..872a26376ccbb84faf6cc6c5804ed5aeea6102cf 100644 (file)
@@ -1330,7 +1330,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
                    LPFC_SLI_INTF_IF_TYPE_0) {
                        /* FLOGI needs to be 3 for WQE FCFI */
-                       ct = ((SLI4_CT_FCFI >> 1) & 1) | (SLI4_CT_FCFI & 1);
+                       ct = SLI4_CT_FCFI;
                        bf_set(wqe_ct, &wqe->els_req.wqe_com, ct);
 
                        /* Set the fcfi to the fcfi we registered with */
index ba9dbb51b75f06b9536695a0ec943695844703f5..f6b83853f7eea122b8ac87085b3c486b3981dcab 100644 (file)
@@ -5528,7 +5528,9 @@ static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
 {
        struct bio *bio = scsi_cmd_to_rq(cmd)->bio;
 
-       return bio ? blkcg_get_fc_appid(bio) : NULL;
+       if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !bio)
+               return NULL;
+       return blkcg_get_fc_appid(bio);
 }
 
 /**
index bda2a7ba4e77fe5f19c7c60521ac6a9ea5d7424f..6adaf79e67cc034c9d4e833a7816125beaad2bdb 100644 (file)
@@ -10720,10 +10720,10 @@ __lpfc_sli_prep_gen_req_s4(struct lpfc_iocbq *cmdiocbq, struct lpfc_dmabuf *bmp,
 
        /* Words 0 - 2 */
        bde = (struct ulp_bde64_le *)&cmdwqe->generic.bde;
-       bde->addr_low = cpu_to_le32(putPaddrLow(bmp->phys));
-       bde->addr_high = cpu_to_le32(putPaddrHigh(bmp->phys));
+       bde->addr_low = bpl->addr_low;
+       bde->addr_high = bpl->addr_high;
        bde->type_size = cpu_to_le32(xmit_len);
-       bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BLP_64);
+       bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BDE_64);
 
        /* Word 3 */
        cmdwqe->gen_req.request_payload_len = xmit_len;
index 85dbf81f3204aeeb5ec01e0597ff4ad4be0576f8..6dfcfd8e73371cfe1538cdd5bef223f1ab96c815 100644 (file)
@@ -3826,6 +3826,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
 
        spin_lock_irqsave(&cmd->cmd_lock, flags);
        if (cmd->aborted) {
+               if (cmd->sg_mapped)
+                       qlt_unmap_sg(vha, cmd);
+
                spin_unlock_irqrestore(&cmd->cmd_lock, flags);
                /*
                 * It's normal to see 2 calls in this path:
index dc6e55761fd1f07239e60cd6f65c03ae47376cde..9694e2cfaf9a60f527f65af4b1c78d030389e806 100644 (file)
@@ -797,7 +797,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
        case SD_LBP_FULL:
        case SD_LBP_DISABLE:
                blk_queue_max_discard_sectors(q, 0);
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
                return;
 
        case SD_LBP_UNMAP:
@@ -830,7 +829,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
        }
 
        blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
 static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
index 81099b68bbfbd13c8b1226d76e5c2482a37e0553..588c0329b80ca5370c0f16910c0b0a766ff19759 100644 (file)
@@ -1254,6 +1254,13 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
        struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr;
        int data_seg_len;
 
+       data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
+               & MASK_RSP_UPIU_DATA_SEG_LEN;
+
+       /* If data segment length is zero, rsp_field is not valid */
+       if (!data_seg_len)
+               return;
+
        if (unlikely(lrbp->lun != rsp_field->lun)) {
                struct scsi_device *sdev;
                bool found = false;
@@ -1288,18 +1295,6 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
                return;
        }
 
-       data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
-               & MASK_RSP_UPIU_DATA_SEG_LEN;
-
-       /* To flush remained rsp_list, we queue the map_work task */
-       if (!data_seg_len) {
-               if (!ufshpb_is_general_lun(hpb->lun))
-                       return;
-
-               ufshpb_kick_map_work(hpb);
-               return;
-       }
-
        BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE);
 
        if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field))
index 12109e4c73d40ef032a482619dac8e603961fac8..51afc66e839d71f51d558f7b17d9ff67930f284a 100644 (file)
@@ -58,9 +58,6 @@
 
 #include <asm/xen/hypervisor.h>
 
-
-#define GRANT_INVALID_REF      0
-
 #define VSCSIFRONT_OP_ADD_LUN  1
 #define VSCSIFRONT_OP_DEL_LUN  2
 #define VSCSIFRONT_OP_READD_LUN        3
@@ -83,6 +80,8 @@ struct vscsifrnt_shadow {
        uint16_t rqid;
        uint16_t ref_rqid;
 
+       bool inflight;
+
        unsigned int nr_grants;         /* number of grants in gref[] */
        struct scsiif_request_segment *sg;      /* scatter/gather elements */
        struct scsiif_request_segment seg[VSCSIIF_SG_TABLESIZE];
@@ -104,7 +103,11 @@ struct vscsifrnt_info {
        struct xenbus_device *dev;
 
        struct Scsi_Host *host;
-       int host_active;
+       enum {
+               STATE_INACTIVE,
+               STATE_ACTIVE,
+               STATE_ERROR
+       }  host_active;
 
        unsigned int evtchn;
        unsigned int irq;
@@ -217,6 +220,8 @@ static int scsifront_do_request(struct vscsifrnt_info *info,
        for (i = 0; i < (shadow->nr_segments & ~VSCSIIF_SG_GRANT); i++)
                ring_req->seg[i] = shadow->seg[i];
 
+       shadow->inflight = true;
+
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
        if (notify)
                notify_remote_via_irq(info->irq);
@@ -224,6 +229,13 @@ static int scsifront_do_request(struct vscsifrnt_info *info,
        return 0;
 }
 
+static void scsifront_set_error(struct vscsifrnt_info *info, const char *msg)
+{
+       shost_printk(KERN_ERR, info->host, KBUILD_MODNAME "%s\n"
+                    "Disabling device for further use\n", msg);
+       info->host_active = STATE_ERROR;
+}
+
 static void scsifront_gnttab_done(struct vscsifrnt_info *info,
                                  struct vscsifrnt_shadow *shadow)
 {
@@ -234,15 +246,64 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info,
 
        for (i = 0; i < shadow->nr_grants; i++) {
                if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) {
-                       shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
-                                    "grant still in use by backend\n");
-                       BUG();
+                       scsifront_set_error(info, "grant still in use by backend");
+                       return;
                }
        }
 
        kfree(shadow->sg);
 }
 
+static unsigned int scsifront_host_byte(int32_t rslt)
+{
+       switch (XEN_VSCSIIF_RSLT_HOST(rslt)) {
+       case XEN_VSCSIIF_RSLT_HOST_OK:
+               return DID_OK;
+       case XEN_VSCSIIF_RSLT_HOST_NO_CONNECT:
+               return DID_NO_CONNECT;
+       case XEN_VSCSIIF_RSLT_HOST_BUS_BUSY:
+               return DID_BUS_BUSY;
+       case XEN_VSCSIIF_RSLT_HOST_TIME_OUT:
+               return DID_TIME_OUT;
+       case XEN_VSCSIIF_RSLT_HOST_BAD_TARGET:
+               return DID_BAD_TARGET;
+       case XEN_VSCSIIF_RSLT_HOST_ABORT:
+               return DID_ABORT;
+       case XEN_VSCSIIF_RSLT_HOST_PARITY:
+               return DID_PARITY;
+       case XEN_VSCSIIF_RSLT_HOST_ERROR:
+               return DID_ERROR;
+       case XEN_VSCSIIF_RSLT_HOST_RESET:
+               return DID_RESET;
+       case XEN_VSCSIIF_RSLT_HOST_BAD_INTR:
+               return DID_BAD_INTR;
+       case XEN_VSCSIIF_RSLT_HOST_PASSTHROUGH:
+               return DID_PASSTHROUGH;
+       case XEN_VSCSIIF_RSLT_HOST_SOFT_ERROR:
+               return DID_SOFT_ERROR;
+       case XEN_VSCSIIF_RSLT_HOST_IMM_RETRY:
+               return DID_IMM_RETRY;
+       case XEN_VSCSIIF_RSLT_HOST_REQUEUE:
+               return DID_REQUEUE;
+       case XEN_VSCSIIF_RSLT_HOST_TRANSPORT_DISRUPTED:
+               return DID_TRANSPORT_DISRUPTED;
+       case XEN_VSCSIIF_RSLT_HOST_TRANSPORT_FAILFAST:
+               return DID_TRANSPORT_FAILFAST;
+       case XEN_VSCSIIF_RSLT_HOST_TARGET_FAILURE:
+               return DID_TARGET_FAILURE;
+       case XEN_VSCSIIF_RSLT_HOST_NEXUS_FAILURE:
+               return DID_NEXUS_FAILURE;
+       case XEN_VSCSIIF_RSLT_HOST_ALLOC_FAILURE:
+               return DID_ALLOC_FAILURE;
+       case XEN_VSCSIIF_RSLT_HOST_MEDIUM_ERROR:
+               return DID_MEDIUM_ERROR;
+       case XEN_VSCSIIF_RSLT_HOST_TRANSPORT_MARGINAL:
+               return DID_TRANSPORT_MARGINAL;
+       default:
+               return DID_ERROR;
+       }
+}
+
 static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
                                   struct vscsiif_response *ring_rsp)
 {
@@ -250,7 +311,6 @@ static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
        struct scsi_cmnd *sc;
        uint32_t id;
        uint8_t sense_len;
-       int result;
 
        id = ring_rsp->rqid;
        shadow = info->shadow[id];
@@ -259,14 +319,12 @@ static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
        BUG_ON(sc == NULL);
 
        scsifront_gnttab_done(info, shadow);
+       if (info->host_active == STATE_ERROR)
+               return;
        scsifront_put_rqid(info, id);
 
-       result = ring_rsp->rslt;
-       if (result >> 24)
-               set_host_byte(sc, DID_ERROR);
-       else
-               set_host_byte(sc, host_byte(result));
-       set_status_byte(sc, result & 0xff);
+       set_host_byte(sc, scsifront_host_byte(ring_rsp->rslt));
+       set_status_byte(sc, XEN_VSCSIIF_RSLT_STATUS(ring_rsp->rslt));
        scsi_set_resid(sc, ring_rsp->residual_len);
 
        sense_len = min_t(uint8_t, VSCSIIF_SENSE_BUFFERSIZE,
@@ -290,7 +348,10 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
        shadow->wait_reset = 1;
        switch (shadow->rslt_reset) {
        case RSLT_RESET_WAITING:
-               shadow->rslt_reset = ring_rsp->rslt;
+               if (ring_rsp->rslt == XEN_VSCSIIF_RSLT_RESET_SUCCESS)
+                       shadow->rslt_reset = SUCCESS;
+               else
+                       shadow->rslt_reset = FAILED;
                break;
        case RSLT_RESET_ERR:
                kick = _scsifront_put_rqid(info, id);
@@ -300,9 +361,7 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
                        scsifront_wake_up(info);
                return;
        default:
-               shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
-                            "bad reset state %d, possibly leaking %u\n",
-                            shadow->rslt_reset, id);
+               scsifront_set_error(info, "bad reset state");
                break;
        }
        spin_unlock_irqrestore(&info->shadow_lock, flags);
@@ -313,28 +372,41 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
 static void scsifront_do_response(struct vscsifrnt_info *info,
                                  struct vscsiif_response *ring_rsp)
 {
-       if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
-                test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
-                "illegal rqid %u returned by backend!\n", ring_rsp->rqid))
+       struct vscsifrnt_shadow *shadow;
+
+       if (ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
+           !info->shadow[ring_rsp->rqid]->inflight) {
+               scsifront_set_error(info, "illegal rqid returned by backend!");
                return;
+       }
+       shadow = info->shadow[ring_rsp->rqid];
+       shadow->inflight = false;
 
-       if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
+       if (shadow->act == VSCSIIF_ACT_SCSI_CDB)
                scsifront_cdb_cmd_done(info, ring_rsp);
        else
                scsifront_sync_cmd_done(info, ring_rsp);
 }
 
-static int scsifront_ring_drain(struct vscsifrnt_info *info)
+static int scsifront_ring_drain(struct vscsifrnt_info *info,
+                               unsigned int *eoiflag)
 {
-       struct vscsiif_response *ring_rsp;
+       struct vscsiif_response ring_rsp;
        RING_IDX i, rp;
        int more_to_do = 0;
 
-       rp = info->ring.sring->rsp_prod;
-       rmb();  /* ordering required respective to dom0 */
+       rp = READ_ONCE(info->ring.sring->rsp_prod);
+       virt_rmb();     /* ordering required respective to backend */
+       if (RING_RESPONSE_PROD_OVERFLOW(&info->ring, rp)) {
+               scsifront_set_error(info, "illegal number of responses");
+               return 0;
+       }
        for (i = info->ring.rsp_cons; i != rp; i++) {
-               ring_rsp = RING_GET_RESPONSE(&info->ring, i);
-               scsifront_do_response(info, ring_rsp);
+               RING_COPY_RESPONSE(&info->ring, i, &ring_rsp);
+               scsifront_do_response(info, &ring_rsp);
+               if (info->host_active == STATE_ERROR)
+                       return 0;
+               *eoiflag &= ~XEN_EOI_FLAG_SPURIOUS;
        }
 
        info->ring.rsp_cons = i;
@@ -347,14 +419,15 @@ static int scsifront_ring_drain(struct vscsifrnt_info *info)
        return more_to_do;
 }
 
-static int scsifront_cmd_done(struct vscsifrnt_info *info)
+static int scsifront_cmd_done(struct vscsifrnt_info *info,
+                             unsigned int *eoiflag)
 {
        int more_to_do;
        unsigned long flags;
 
        spin_lock_irqsave(info->host->host_lock, flags);
 
-       more_to_do = scsifront_ring_drain(info);
+       more_to_do = scsifront_ring_drain(info, eoiflag);
 
        info->wait_ring_available = 0;
 
@@ -368,20 +441,28 @@ static int scsifront_cmd_done(struct vscsifrnt_info *info)
 static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
 {
        struct vscsifrnt_info *info = dev_id;
+       unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
 
-       while (scsifront_cmd_done(info))
+       if (info->host_active == STATE_ERROR) {
+               xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+               return IRQ_HANDLED;
+       }
+
+       while (scsifront_cmd_done(info, &eoiflag))
                /* Yield point for this unbounded loop. */
                cond_resched();
 
+       xen_irq_lateeoi(irq, eoiflag);
+
        return IRQ_HANDLED;
 }
 
 static void scsifront_finish_all(struct vscsifrnt_info *info)
 {
-       unsigned i;
+       unsigned int i, dummy;
        struct vscsiif_response resp;
 
-       scsifront_ring_drain(info);
+       scsifront_ring_drain(info, &dummy);
 
        for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
                if (test_bit(i, info->shadow_free_bitmap))
@@ -538,6 +619,9 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
        unsigned long flags;
        int err;
 
+       if (info->host_active == STATE_ERROR)
+               return SCSI_MLQUEUE_HOST_BUSY;
+
        sc->result = 0;
 
        shadow->sc  = sc;
@@ -590,6 +674,9 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
        struct vscsifrnt_shadow *shadow, *s = scsi_cmd_priv(sc);
        int err = 0;
 
+       if (info->host_active == STATE_ERROR)
+               return FAILED;
+
        shadow = kzalloc(sizeof(*shadow), GFP_NOIO);
        if (!shadow)
                return FAILED;
@@ -661,6 +748,9 @@ static int scsifront_sdev_configure(struct scsi_device *sdev)
        struct vscsifrnt_info *info = shost_priv(sdev->host);
        int err;
 
+       if (info->host_active == STATE_ERROR)
+               return -EIO;
+
        if (info && current == info->curr) {
                err = xenbus_printf(XBT_NIL, info->dev->nodename,
                              info->dev_state_path, "%d", XenbusStateConnected);
@@ -708,27 +798,15 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info)
 {
        struct xenbus_device *dev = info->dev;
        struct vscsiif_sring *sring;
-       grant_ref_t gref;
-       int err = -ENOMEM;
+       int err;
 
        /***** Frontend to Backend ring start *****/
-       sring = (struct vscsiif_sring *)__get_free_page(GFP_KERNEL);
-       if (!sring) {
-               xenbus_dev_fatal(dev, err,
-                       "fail to allocate shared ring (Front to Back)");
+       err = xenbus_setup_ring(dev, GFP_KERNEL, (void **)&sring, 1,
+                               &info->ring_ref);
+       if (err)
                return err;
-       }
-       SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-       err = xenbus_grant_ring(dev, sring, 1, &gref);
-       if (err < 0) {
-               free_page((unsigned long)sring);
-               xenbus_dev_fatal(dev, err,
-                       "fail to grant shared ring (Front to Back)");
-               return err;
-       }
-       info->ring_ref = gref;
+       XEN_FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err) {
@@ -736,7 +814,7 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info)
                goto free_gnttab;
        }
 
-       err = bind_evtchn_to_irq(info->evtchn);
+       err = bind_evtchn_to_irq_lateeoi(info->evtchn);
        if (err <= 0) {
                xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
                goto free_gnttab;
@@ -757,8 +835,7 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info)
 free_irq:
        unbind_from_irqhandler(info->irq, info);
 free_gnttab:
-       gnttab_end_foreign_access(info->ring_ref,
-                                 (unsigned long)info->ring.sring);
+       xenbus_teardown_ring((void **)&sring, 1, &info->ring_ref);
 
        return err;
 }
@@ -766,8 +843,7 @@ free_gnttab:
 static void scsifront_free_ring(struct vscsifrnt_info *info)
 {
        unbind_from_irqhandler(info->irq, info);
-       gnttab_end_foreign_access(info->ring_ref,
-                                 (unsigned long)info->ring.sring);
+       xenbus_teardown_ring((void **)&info->ring.sring, 1, &info->ring_ref);
 }
 
 static int scsifront_init_ring(struct vscsifrnt_info *info)
@@ -866,7 +942,7 @@ static int scsifront_probe(struct xenbus_device *dev,
                goto free_sring;
        }
        info->host = host;
-       info->host_active = 1;
+       info->host_active = STATE_ACTIVE;
 
        xenbus_switch_state(dev, XenbusStateInitialised);
 
@@ -934,10 +1010,10 @@ static int scsifront_remove(struct xenbus_device *dev)
        pr_debug("%s: %s removed\n", __func__, dev->nodename);
 
        mutex_lock(&scsifront_mutex);
-       if (info->host_active) {
+       if (info->host_active != STATE_INACTIVE) {
                /* Scsi_host not yet removed */
                scsi_remove_host(info->host);
-               info->host_active = 0;
+               info->host_active = STATE_INACTIVE;
        }
        mutex_unlock(&scsifront_mutex);
 
@@ -961,9 +1037,9 @@ static void scsifront_disconnect(struct vscsifrnt_info *info)
         */
 
        mutex_lock(&scsifront_mutex);
-       if (info->host_active) {
+       if (info->host_active != STATE_INACTIVE) {
                scsi_remove_host(host);
-               info->host_active = 0;
+               info->host_active = STATE_INACTIVE;
        }
        mutex_unlock(&scsifront_mutex);
 
@@ -981,6 +1057,9 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
        unsigned int hst, chn, tgt, lun;
        struct scsi_device *sdev;
 
+       if (info->host_active == STATE_ERROR)
+               return;
+
        dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
        if (IS_ERR(dir))
                return;
index f04b961b96cd4de5513a46fc2b24d27f9ef22e35..ec58091fc948a26608baf717a44d67c679d180e0 100644 (file)
@@ -510,9 +510,9 @@ static int qcom_slim_probe(struct platform_device *pdev)
        }
 
        ctrl->irq = platform_get_irq(pdev, 0);
-       if (!ctrl->irq) {
+       if (ctrl->irq < 0) {
                dev_err(&pdev->dev, "no slimbus IRQ\n");
-               return -ENODEV;
+               return ctrl->irq;
        }
 
        sctrl = &ctrl->ctrl;
index 0868b7d406fbad2ffa83c20e994a84702b15a405..b1cf7d29dafd99b687b9273a377bcb161684f2c0 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/err.h>
+#include <linux/mutex.h>
 #include <linux/pm_clock.h>
 #include <linux/pm_domain.h>
 #include <linux/of_address.h>
@@ -16,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
+#include <soc/rockchip/pm_domains.h>
 #include <dt-bindings/power/px30-power.h>
 #include <dt-bindings/power/rk3036-power.h>
 #include <dt-bindings/power/rk3066-power.h>
@@ -139,6 +141,109 @@ struct rockchip_pmu {
 #define DOMAIN_RK3568(name, pwr, req, wakeup)          \
        DOMAIN_M(name, pwr, pwr, req, req, req, wakeup)
 
+/*
+ * Dynamic Memory Controller may need to coordinate with us -- see
+ * rockchip_pmu_block().
+ *
+ * dmc_pmu_mutex protects registration-time races, so DMC driver doesn't try to
+ * block() while we're initializing the PMU.
+ */
+static DEFINE_MUTEX(dmc_pmu_mutex);
+static struct rockchip_pmu *dmc_pmu;
+
+/*
+ * Block PMU transitions and make sure they don't interfere with ARM Trusted
+ * Firmware operations. There are two conflicts, noted in the comments below.
+ *
+ * Caller must unblock PMU transitions via rockchip_pmu_unblock().
+ */
+int rockchip_pmu_block(void)
+{
+       struct rockchip_pmu *pmu;
+       struct generic_pm_domain *genpd;
+       struct rockchip_pm_domain *pd;
+       int i, ret;
+
+       mutex_lock(&dmc_pmu_mutex);
+
+       /* No PMU (yet)? Then we just block rockchip_pmu_probe(). */
+       if (!dmc_pmu)
+               return 0;
+       pmu = dmc_pmu;
+
+       /*
+        * mutex blocks all idle transitions: we can't touch the
+        * PMU_BUS_IDLE_REQ (our ".idle_offset") register while ARM Trusted
+        * Firmware might be using it.
+        */
+       mutex_lock(&pmu->mutex);
+
+       /*
+        * Power domain clocks: Per Rockchip, we *must* keep certain clocks
+        * enabled for the duration of power-domain transitions. Most
+        * transitions are handled by this driver, but some cases (in
+        * particular, DRAM DVFS / memory-controller idle) must be handled by
+        * firmware. Firmware can handle most clock management via a special
+        * "ungate" register (PMU_CRU_GATEDIS_CON0), but unfortunately, this
+        * doesn't handle PLLs. We can assist this transition by doing the
+        * clock management on behalf of firmware.
+        */
+       for (i = 0; i < pmu->genpd_data.num_domains; i++) {
+               genpd = pmu->genpd_data.domains[i];
+               if (genpd) {
+                       pd = to_rockchip_pd(genpd);
+                       ret = clk_bulk_enable(pd->num_clks, pd->clks);
+                       if (ret < 0) {
+                               dev_err(pmu->dev,
+                                       "failed to enable clks for domain '%s': %d\n",
+                                       genpd->name, ret);
+                               goto err;
+                       }
+               }
+       }
+
+       return 0;
+
+err:
+       for (i = i - 1; i >= 0; i--) {
+               genpd = pmu->genpd_data.domains[i];
+               if (genpd) {
+                       pd = to_rockchip_pd(genpd);
+                       clk_bulk_disable(pd->num_clks, pd->clks);
+               }
+       }
+       mutex_unlock(&pmu->mutex);
+       mutex_unlock(&dmc_pmu_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rockchip_pmu_block);
+
+/* Unblock PMU transitions. */
+void rockchip_pmu_unblock(void)
+{
+       struct rockchip_pmu *pmu;
+       struct generic_pm_domain *genpd;
+       struct rockchip_pm_domain *pd;
+       int i;
+
+       if (dmc_pmu) {
+               pmu = dmc_pmu;
+               for (i = 0; i < pmu->genpd_data.num_domains; i++) {
+                       genpd = pmu->genpd_data.domains[i];
+                       if (genpd) {
+                               pd = to_rockchip_pd(genpd);
+                               clk_bulk_disable(pd->num_clks, pd->clks);
+                       }
+               }
+
+               mutex_unlock(&pmu->mutex);
+       }
+
+       mutex_unlock(&dmc_pmu_mutex);
+}
+EXPORT_SYMBOL_GPL(rockchip_pmu_unblock);
+
 static bool rockchip_pmu_domain_is_idle(struct rockchip_pm_domain *pd)
 {
        struct rockchip_pmu *pmu = pd->pmu;
@@ -690,6 +795,12 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
 
        error = -ENODEV;
 
+       /*
+        * Prevent any rockchip_pmu_block() from racing with the remainder of
+        * setup (clocks, register initialization).
+        */
+       mutex_lock(&dmc_pmu_mutex);
+
        for_each_available_child_of_node(np, node) {
                error = rockchip_pm_add_one_domain(pmu, node);
                if (error) {
@@ -719,10 +830,17 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
                goto err_out;
        }
 
+       /* We only expect one PMU. */
+       if (!WARN_ON_ONCE(dmc_pmu))
+               dmc_pmu = pmu;
+
+       mutex_unlock(&dmc_pmu_mutex);
+
        return 0;
 
 err_out:
        rockchip_pm_domain_cleanup(pmu);
+       mutex_unlock(&dmc_pmu_mutex);
        return error;
 }
 
index d2815eb361c0f29cf526ef94af5b57f11c0cb5d6..38117775ff79b38eaa56fd27208b86976d64291a 100644 (file)
@@ -101,6 +101,17 @@ config SPI_ARMADA_3700
          This enables support for the SPI controller present on the
          Marvell Armada 3700 SoCs.
 
+config SPI_ASPEED_SMC
+       tristate "Aspeed flash controllers in SPI mode"
+       depends on ARCH_ASPEED || COMPILE_TEST
+       depends on OF
+       help
+         This enables support for the Firmware Memory controller (FMC)
+         in the Aspeed AST2600, AST2500 and AST2400 SoCs when attached
+         to SPI NOR chips, and support for the SPI flash memory
+         controller (SPI) for the host firmware. The implementation
+         only supports SPI NOR.
+
 config SPI_ATMEL
        tristate "Atmel SPI Controller"
        depends on ARCH_AT91 || COMPILE_TEST
@@ -414,15 +425,14 @@ config SPI_IMG_SPFI
 config SPI_IMX
        tristate "Freescale i.MX SPI controllers"
        depends on ARCH_MXC || COMPILE_TEST
-       select SPI_BITBANG
        help
          This enables support for the Freescale i.MX SPI controllers.
 
 config SPI_INGENIC
-       tristate "Ingenic JZ47xx SoCs SPI controller"
+       tristate "Ingenic SoCs SPI controller"
        depends on MACH_INGENIC || COMPILE_TEST
        help
-         This enables support for the Ingenic JZ47xx SoCs SPI controller.
+         This enables support for the Ingenic SoCs SPI controller.
 
          To compile this driver as a module, choose M here: the module
          will be called spi-ingenic.
@@ -590,6 +600,16 @@ config SPI_MTK_NOR
          SPI interface as well as several SPI NOR specific instructions
          via SPI MEM interface.
 
+config SPI_MTK_SNFI
+       tristate "MediaTek SPI NAND Flash Interface"
+       depends on ARCH_MEDIATEK || COMPILE_TEST
+       depends on MTD_NAND_ECC_MEDIATEK
+       help
+         This enables support for SPI-NAND mode on the MediaTek NAND
+         Flash Interface found on MediaTek ARM SoCs. This controller
+         is implemented as a SPI-MEM controller with pipelined ECC
+         capcability.
+
 config SPI_NPCM_FIU
        tristate "Nuvoton NPCM FLASH Interface Unit"
        depends on ARCH_NPCM || COMPILE_TEST
index 3aa28ed3f7617c50d4e260c3f47f94f2f8ec026a..0f44eb6083a53c47f44536eb4720df55d6a9b523 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_SPI_ALTERA_CORE)         += spi-altera-core.o
 obj-$(CONFIG_SPI_ALTERA_DFL)           += spi-altera-dfl.o
 obj-$(CONFIG_SPI_AR934X)               += spi-ar934x.o
 obj-$(CONFIG_SPI_ARMADA_3700)          += spi-armada-3700.o
+obj-$(CONFIG_SPI_ASPEED_SMC)           += spi-aspeed-smc.o
 obj-$(CONFIG_SPI_ATMEL)                        += spi-atmel.o
 obj-$(CONFIG_SPI_ATMEL_QUADSPI)                += atmel-quadspi.o
 obj-$(CONFIG_SPI_AT91_USART)           += spi-at91-usart.o
@@ -76,6 +77,7 @@ obj-$(CONFIG_SPI_MPC52xx)             += spi-mpc52xx.o
 obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
 obj-$(CONFIG_SPI_MT7621)               += spi-mt7621.o
 obj-$(CONFIG_SPI_MTK_NOR)              += spi-mtk-nor.o
+obj-$(CONFIG_SPI_MTK_SNFI)             += spi-mtk-snfi.o
 obj-$(CONFIG_SPI_MXIC)                 += spi-mxic.o
 obj-$(CONFIG_SPI_MXS)                  += spi-mxs.o
 obj-$(CONFIG_SPI_NPCM_FIU)             += spi-npcm-fiu.o
index 938017a60c8ed2ae54ed01ecb5edad7a2e1b83b7..480c0c8c18e49d7a6cfb556d5db26e601b754389 100644 (file)
@@ -288,12 +288,6 @@ static bool atmel_qspi_supports_op(struct spi_mem *mem,
                op->dummy.nbytes == 0)
                return false;
 
-       /* DTR ops not supported. */
-       if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr)
-               return false;
-       if (op->cmd.nbytes != 1)
-               return false;
-
        return true;
 }
 
diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c
new file mode 100644 (file)
index 0000000..496f3e1
--- /dev/null
@@ -0,0 +1,1210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ASPEED FMC/SPI Memory Controller Driver
+ *
+ * Copyright (c) 2015-2022, IBM Corporation.
+ * Copyright (c) 2020, ASPEED Corporation.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#define DEVICE_NAME "spi-aspeed-smc"
+
+/* Type setting Register */
+#define CONFIG_REG                     0x0
+#define   CONFIG_TYPE_SPI              0x2
+
+/* CE Control Register */
+#define CE_CTRL_REG                    0x4
+
+/* CEx Control Register */
+#define CE0_CTRL_REG                   0x10
+#define   CTRL_IO_MODE_MASK            GENMASK(30, 28)
+#define   CTRL_IO_SINGLE_DATA          0x0
+#define   CTRL_IO_DUAL_DATA            BIT(29)
+#define   CTRL_IO_QUAD_DATA            BIT(30)
+#define   CTRL_COMMAND_SHIFT           16
+#define   CTRL_IO_ADDRESS_4B           BIT(13) /* AST2400 SPI only */
+#define   CTRL_IO_DUMMY_SET(dummy)                                     \
+       (((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6))
+#define   CTRL_FREQ_SEL_SHIFT          8
+#define   CTRL_FREQ_SEL_MASK           GENMASK(11, CTRL_FREQ_SEL_SHIFT)
+#define   CTRL_CE_STOP_ACTIVE          BIT(2)
+#define   CTRL_IO_MODE_CMD_MASK                GENMASK(1, 0)
+#define   CTRL_IO_MODE_NORMAL          0x0
+#define   CTRL_IO_MODE_READ            0x1
+#define   CTRL_IO_MODE_WRITE           0x2
+#define   CTRL_IO_MODE_USER            0x3
+
+#define   CTRL_IO_CMD_MASK             0xf0ff40c3
+
+/* CEx Address Decoding Range Register */
+#define CE0_SEGMENT_ADDR_REG           0x30
+
+/* CEx Read timing compensation register */
+#define CE0_TIMING_COMPENSATION_REG    0x94
+
+enum aspeed_spi_ctl_reg_value {
+       ASPEED_SPI_BASE,
+       ASPEED_SPI_READ,
+       ASPEED_SPI_WRITE,
+       ASPEED_SPI_MAX,
+};
+
+struct aspeed_spi;
+
+struct aspeed_spi_chip {
+       struct aspeed_spi       *aspi;
+       u32                      cs;
+       void __iomem            *ctl;
+       void __iomem            *ahb_base;
+       u32                      ahb_window_size;
+       u32                      ctl_val[ASPEED_SPI_MAX];
+       u32                      clk_freq;
+};
+
+struct aspeed_spi_data {
+       u32     ctl0;
+       u32     max_cs;
+       bool    hastype;
+       u32     mode_bits;
+       u32     we0;
+       u32     timing;
+       u32     hclk_mask;
+       u32     hdiv_max;
+
+       u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg);
+       u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg);
+       u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end);
+       int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv,
+                        const u8 *golden_buf, u8 *test_buf);
+};
+
+#define ASPEED_SPI_MAX_NUM_CS  5
+
+struct aspeed_spi {
+       const struct aspeed_spi_data    *data;
+
+       void __iomem            *regs;
+       void __iomem            *ahb_base;
+       u32                      ahb_base_phy;
+       u32                      ahb_window_size;
+       struct device           *dev;
+
+       struct clk              *clk;
+       u32                      clk_freq;
+
+       struct aspeed_spi_chip   chips[ASPEED_SPI_MAX_NUM_CS];
+};
+
+static u32 aspeed_spi_get_io_mode(const struct spi_mem_op *op)
+{
+       switch (op->data.buswidth) {
+       case 1:
+               return CTRL_IO_SINGLE_DATA;
+       case 2:
+               return CTRL_IO_DUAL_DATA;
+       case 4:
+               return CTRL_IO_QUAD_DATA;
+       default:
+               return CTRL_IO_SINGLE_DATA;
+       }
+}
+
+static void aspeed_spi_set_io_mode(struct aspeed_spi_chip *chip, u32 io_mode)
+{
+       u32 ctl;
+
+       if (io_mode > 0) {
+               ctl = readl(chip->ctl) & ~CTRL_IO_MODE_MASK;
+               ctl |= io_mode;
+               writel(ctl, chip->ctl);
+       }
+}
+
+static void aspeed_spi_start_user(struct aspeed_spi_chip *chip)
+{
+       u32 ctl = chip->ctl_val[ASPEED_SPI_BASE];
+
+       ctl |= CTRL_IO_MODE_USER | CTRL_CE_STOP_ACTIVE;
+       writel(ctl, chip->ctl);
+
+       ctl &= ~CTRL_CE_STOP_ACTIVE;
+       writel(ctl, chip->ctl);
+}
+
+static void aspeed_spi_stop_user(struct aspeed_spi_chip *chip)
+{
+       u32 ctl = chip->ctl_val[ASPEED_SPI_READ] |
+               CTRL_IO_MODE_USER | CTRL_CE_STOP_ACTIVE;
+
+       writel(ctl, chip->ctl);
+
+       /* Restore defaults */
+       writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
+}
+
+static int aspeed_spi_read_from_ahb(void *buf, void __iomem *src, size_t len)
+{
+       size_t offset = 0;
+
+       if (IS_ALIGNED((uintptr_t)src, sizeof(uintptr_t)) &&
+           IS_ALIGNED((uintptr_t)buf, sizeof(uintptr_t))) {
+               ioread32_rep(src, buf, len >> 2);
+               offset = len & ~0x3;
+               len -= offset;
+       }
+       ioread8_rep(src, (u8 *)buf + offset, len);
+       return 0;
+}
+
+static int aspeed_spi_write_to_ahb(void __iomem *dst, const void *buf, size_t len)
+{
+       size_t offset = 0;
+
+       if (IS_ALIGNED((uintptr_t)dst, sizeof(uintptr_t)) &&
+           IS_ALIGNED((uintptr_t)buf, sizeof(uintptr_t))) {
+               iowrite32_rep(dst, buf, len >> 2);
+               offset = len & ~0x3;
+               len -= offset;
+       }
+       iowrite8_rep(dst, (const u8 *)buf + offset, len);
+       return 0;
+}
+
+static int aspeed_spi_send_cmd_addr(struct aspeed_spi_chip *chip, u8 addr_nbytes,
+                                   u64 offset, u32 opcode)
+{
+       __be32 temp;
+       u32 cmdaddr;
+
+       switch (addr_nbytes) {
+       case 3:
+               cmdaddr = offset & 0xFFFFFF;
+               cmdaddr |= opcode << 24;
+
+               temp = cpu_to_be32(cmdaddr);
+               aspeed_spi_write_to_ahb(chip->ahb_base, &temp, 4);
+               break;
+       case 4:
+               temp = cpu_to_be32(offset);
+               aspeed_spi_write_to_ahb(chip->ahb_base, &opcode, 1);
+               aspeed_spi_write_to_ahb(chip->ahb_base, &temp, 4);
+               break;
+       default:
+               WARN_ONCE(1, "Unexpected address width %u", addr_nbytes);
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static int aspeed_spi_read_reg(struct aspeed_spi_chip *chip,
+                              const struct spi_mem_op *op)
+{
+       aspeed_spi_start_user(chip);
+       aspeed_spi_write_to_ahb(chip->ahb_base, &op->cmd.opcode, 1);
+       aspeed_spi_read_from_ahb(op->data.buf.in,
+                                chip->ahb_base, op->data.nbytes);
+       aspeed_spi_stop_user(chip);
+       return 0;
+}
+
+static int aspeed_spi_write_reg(struct aspeed_spi_chip *chip,
+                               const struct spi_mem_op *op)
+{
+       aspeed_spi_start_user(chip);
+       aspeed_spi_write_to_ahb(chip->ahb_base, &op->cmd.opcode, 1);
+       aspeed_spi_write_to_ahb(chip->ahb_base, op->data.buf.out,
+                               op->data.nbytes);
+       aspeed_spi_stop_user(chip);
+       return 0;
+}
+
+static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip,
+                                   const struct spi_mem_op *op,
+                                   u64 offset, size_t len, void *buf)
+{
+       int io_mode = aspeed_spi_get_io_mode(op);
+       u8 dummy = 0xFF;
+       int i;
+       int ret;
+
+       aspeed_spi_start_user(chip);
+
+       ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, offset, op->cmd.opcode);
+       if (ret < 0)
+               return ret;
+
+       if (op->dummy.buswidth && op->dummy.nbytes) {
+               for (i = 0; i < op->dummy.nbytes / op->dummy.buswidth; i++)
+                       aspeed_spi_write_to_ahb(chip->ahb_base, &dummy, sizeof(dummy));
+       }
+
+       aspeed_spi_set_io_mode(chip, io_mode);
+
+       aspeed_spi_read_from_ahb(buf, chip->ahb_base, len);
+       aspeed_spi_stop_user(chip);
+       return 0;
+}
+
+static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip,
+                                    const struct spi_mem_op *op)
+{
+       int ret;
+
+       aspeed_spi_start_user(chip);
+       ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, op->addr.val, op->cmd.opcode);
+       if (ret < 0)
+               return ret;
+       aspeed_spi_write_to_ahb(chip->ahb_base, op->data.buf.out, op->data.nbytes);
+       aspeed_spi_stop_user(chip);
+       return 0;
+}
+
+/* support for 1-1-1, 1-1-2 or 1-1-4 */
+static bool aspeed_spi_supports_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+       if (op->cmd.buswidth > 1)
+               return false;
+
+       if (op->addr.nbytes != 0) {
+               if (op->addr.buswidth > 1)
+                       return false;
+               if (op->addr.nbytes < 3 || op->addr.nbytes > 4)
+                       return false;
+       }
+
+       if (op->dummy.nbytes != 0) {
+               if (op->dummy.buswidth > 1 || op->dummy.nbytes > 7)
+                       return false;
+       }
+
+       if (op->data.nbytes != 0 && op->data.buswidth > 4)
+               return false;
+
+       return spi_mem_default_supports_op(mem, op);
+}
+
+static const struct aspeed_spi_data ast2400_spi_data;
+
+static int do_aspeed_spi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+       struct aspeed_spi *aspi = spi_controller_get_devdata(mem->spi->master);
+       struct aspeed_spi_chip *chip = &aspi->chips[mem->spi->chip_select];
+       u32 addr_mode, addr_mode_backup;
+       u32 ctl_val;
+       int ret = 0;
+
+       dev_dbg(aspi->dev,
+               "CE%d %s OP %#x mode:%d.%d.%d.%d naddr:%#x ndummies:%#x len:%#x",
+               chip->cs, op->data.dir == SPI_MEM_DATA_IN ? "read" : "write",
+               op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+               op->dummy.buswidth, op->data.buswidth,
+               op->addr.nbytes, op->dummy.nbytes, op->data.nbytes);
+
+       addr_mode = readl(aspi->regs + CE_CTRL_REG);
+       addr_mode_backup = addr_mode;
+
+       ctl_val = chip->ctl_val[ASPEED_SPI_BASE];
+       ctl_val &= ~CTRL_IO_CMD_MASK;
+
+       ctl_val |= op->cmd.opcode << CTRL_COMMAND_SHIFT;
+
+       /* 4BYTE address mode */
+       if (op->addr.nbytes) {
+               if (op->addr.nbytes == 4)
+                       addr_mode |= (0x11 << chip->cs);
+               else
+                       addr_mode &= ~(0x11 << chip->cs);
+
+               if (op->addr.nbytes == 4 && chip->aspi->data == &ast2400_spi_data)
+                       ctl_val |= CTRL_IO_ADDRESS_4B;
+       }
+
+       if (op->dummy.nbytes)
+               ctl_val |= CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth);
+
+       if (op->data.nbytes)
+               ctl_val |= aspeed_spi_get_io_mode(op);
+
+       if (op->data.dir == SPI_MEM_DATA_OUT)
+               ctl_val |= CTRL_IO_MODE_WRITE;
+       else
+               ctl_val |= CTRL_IO_MODE_READ;
+
+       if (addr_mode != addr_mode_backup)
+               writel(addr_mode, aspi->regs + CE_CTRL_REG);
+       writel(ctl_val, chip->ctl);
+
+       if (op->data.dir == SPI_MEM_DATA_IN) {
+               if (!op->addr.nbytes)
+                       ret = aspeed_spi_read_reg(chip, op);
+               else
+                       ret = aspeed_spi_read_user(chip, op, op->addr.val,
+                                                  op->data.nbytes, op->data.buf.in);
+       } else {
+               if (!op->addr.nbytes)
+                       ret = aspeed_spi_write_reg(chip, op);
+               else
+                       ret = aspeed_spi_write_user(chip, op);
+       }
+
+       /* Restore defaults */
+       if (addr_mode != addr_mode_backup)
+               writel(addr_mode_backup, aspi->regs + CE_CTRL_REG);
+       writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
+       return ret;
+}
+
+static int aspeed_spi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+       int ret;
+
+       ret = do_aspeed_spi_exec_op(mem, op);
+       if (ret)
+               dev_err(&mem->spi->dev, "operation failed: %d\n", ret);
+       return ret;
+}
+
+static const char *aspeed_spi_get_name(struct spi_mem *mem)
+{
+       struct aspeed_spi *aspi = spi_controller_get_devdata(mem->spi->master);
+       struct device *dev = aspi->dev;
+
+       return devm_kasprintf(dev, GFP_KERNEL, "%s.%d", dev_name(dev), mem->spi->chip_select);
+}
+
+struct aspeed_spi_window {
+       u32 cs;
+       u32 offset;
+       u32 size;
+};
+
+static void aspeed_spi_get_windows(struct aspeed_spi *aspi,
+                                  struct aspeed_spi_window windows[ASPEED_SPI_MAX_NUM_CS])
+{
+       const struct aspeed_spi_data *data = aspi->data;
+       u32 reg_val;
+       u32 cs;
+
+       for (cs = 0; cs < aspi->data->max_cs; cs++) {
+               reg_val = readl(aspi->regs + CE0_SEGMENT_ADDR_REG + cs * 4);
+               windows[cs].cs = cs;
+               windows[cs].size = data->segment_end(aspi, reg_val) -
+                       data->segment_start(aspi, reg_val);
+               windows[cs].offset = cs ? windows[cs - 1].offset + windows[cs - 1].size : 0;
+               dev_vdbg(aspi->dev, "CE%d offset=0x%.8x size=0x%x\n", cs,
+                        windows[cs].offset, windows[cs].size);
+       }
+}
+
+/*
+ * On the AST2600, some CE windows are closed by default at reset but
+ * U-Boot should open all.
+ */
+static int aspeed_spi_chip_set_default_window(struct aspeed_spi_chip *chip)
+{
+       struct aspeed_spi *aspi = chip->aspi;
+       struct aspeed_spi_window windows[ASPEED_SPI_MAX_NUM_CS] = { 0 };
+       struct aspeed_spi_window *win = &windows[chip->cs];
+
+       /* No segment registers for the AST2400 SPI controller */
+       if (aspi->data == &ast2400_spi_data) {
+               win->offset = 0;
+               win->size = aspi->ahb_window_size;
+       } else {
+               aspeed_spi_get_windows(aspi, windows);
+       }
+
+       chip->ahb_base = aspi->ahb_base + win->offset;
+       chip->ahb_window_size = win->size;
+
+       dev_dbg(aspi->dev, "CE%d default window [ 0x%.8x - 0x%.8x ] %dMB",
+               chip->cs, aspi->ahb_base_phy + win->offset,
+               aspi->ahb_base_phy + win->offset + win->size - 1,
+               win->size >> 20);
+
+       return chip->ahb_window_size ? 0 : -1;
+}
+
+static int aspeed_spi_set_window(struct aspeed_spi *aspi,
+                                const struct aspeed_spi_window *win)
+{
+       u32 start = aspi->ahb_base_phy + win->offset;
+       u32 end = start + win->size;
+       void __iomem *seg_reg = aspi->regs + CE0_SEGMENT_ADDR_REG + win->cs * 4;
+       u32 seg_val_backup = readl(seg_reg);
+       u32 seg_val = aspi->data->segment_reg(aspi, start, end);
+
+       if (seg_val == seg_val_backup)
+               return 0;
+
+       writel(seg_val, seg_reg);
+
+       /*
+        * Restore initial value if something goes wrong else we could
+        * loose access to the chip.
+        */
+       if (seg_val != readl(seg_reg)) {
+               dev_err(aspi->dev, "CE%d invalid window [ 0x%.8x - 0x%.8x ] %dMB",
+                       win->cs, start, end - 1, win->size >> 20);
+               writel(seg_val_backup, seg_reg);
+               return -EIO;
+       }
+
+       if (win->size)
+               dev_dbg(aspi->dev, "CE%d new window [ 0x%.8x - 0x%.8x ] %dMB",
+                       win->cs, start, end - 1,  win->size >> 20);
+       else
+               dev_dbg(aspi->dev, "CE%d window closed", win->cs);
+
+       return 0;
+}
+
+/*
+ * Yet to be done when possible :
+ * - Align mappings on flash size (we don't have the info)
+ * - ioremap each window, not strictly necessary since the overall window
+ *   is correct.
+ */
+static const struct aspeed_spi_data ast2500_spi_data;
+static const struct aspeed_spi_data ast2600_spi_data;
+static const struct aspeed_spi_data ast2600_fmc_data;
+
+static int aspeed_spi_chip_adjust_window(struct aspeed_spi_chip *chip,
+                                        u32 local_offset, u32 size)
+{
+       struct aspeed_spi *aspi = chip->aspi;
+       struct aspeed_spi_window windows[ASPEED_SPI_MAX_NUM_CS] = { 0 };
+       struct aspeed_spi_window *win = &windows[chip->cs];
+       int ret;
+
+       /* No segment registers for the AST2400 SPI controller */
+       if (aspi->data == &ast2400_spi_data)
+               return 0;
+
+       /*
+        * Due to an HW issue on the AST2500 SPI controller, the CE0
+        * window size should be smaller than the maximum 128MB.
+        */
+       if (aspi->data == &ast2500_spi_data && chip->cs == 0 && size == SZ_128M) {
+               size = 120 << 20;
+               dev_info(aspi->dev, "CE%d window resized to %dMB (AST2500 HW quirk)",
+                        chip->cs, size >> 20);
+       }
+
+       /*
+        * The decoding size of AST2600 SPI controller should set at
+        * least 2MB.
+        */
+       if ((aspi->data == &ast2600_spi_data || aspi->data == &ast2600_fmc_data) &&
+           size < SZ_2M) {
+               size = SZ_2M;
+               dev_info(aspi->dev, "CE%d window resized to %dMB (AST2600 Decoding)",
+                        chip->cs, size >> 20);
+       }
+
+       aspeed_spi_get_windows(aspi, windows);
+
+       /* Adjust this chip window */
+       win->offset += local_offset;
+       win->size = size;
+
+       if (win->offset + win->size > aspi->ahb_window_size) {
+               win->size = aspi->ahb_window_size - win->offset;
+               dev_warn(aspi->dev, "CE%d window resized to %dMB", chip->cs, win->size >> 20);
+       }
+
+       ret = aspeed_spi_set_window(aspi, win);
+       if (ret)
+               return ret;
+
+       /* Update chip mapping info */
+       chip->ahb_base = aspi->ahb_base + win->offset;
+       chip->ahb_window_size = win->size;
+
+       /*
+        * Also adjust next chip window to make sure that it does not
+        * overlap with the current window.
+        */
+       if (chip->cs < aspi->data->max_cs - 1) {
+               struct aspeed_spi_window *next = &windows[chip->cs + 1];
+
+               /* Change offset and size to keep the same end address */
+               if ((next->offset + next->size) > (win->offset + win->size))
+                       next->size = (next->offset + next->size) - (win->offset + win->size);
+               else
+                       next->size = 0;
+               next->offset = win->offset + win->size;
+
+               aspeed_spi_set_window(aspi, next);
+       }
+       return 0;
+}
+
+static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
+
+static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+       struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->master);
+       struct aspeed_spi_chip *chip = &aspi->chips[desc->mem->spi->chip_select];
+       struct spi_mem_op *op = &desc->info.op_tmpl;
+       u32 ctl_val;
+       int ret = 0;
+
+       chip->clk_freq = desc->mem->spi->max_speed_hz;
+
+       /* Only for reads */
+       if (op->data.dir != SPI_MEM_DATA_IN)
+               return -EOPNOTSUPP;
+
+       aspeed_spi_chip_adjust_window(chip, desc->info.offset, desc->info.length);
+
+       if (desc->info.length > chip->ahb_window_size)
+               dev_warn(aspi->dev, "CE%d window (%dMB) too small for mapping",
+                        chip->cs, chip->ahb_window_size >> 20);
+
+       /* Define the default IO read settings */
+       ctl_val = readl(chip->ctl) & ~CTRL_IO_CMD_MASK;
+       ctl_val |= aspeed_spi_get_io_mode(op) |
+               op->cmd.opcode << CTRL_COMMAND_SHIFT |
+               CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth) |
+               CTRL_IO_MODE_READ;
+
+       /* Tune 4BYTE address mode */
+       if (op->addr.nbytes) {
+               u32 addr_mode = readl(aspi->regs + CE_CTRL_REG);
+
+               if (op->addr.nbytes == 4)
+                       addr_mode |= (0x11 << chip->cs);
+               else
+                       addr_mode &= ~(0x11 << chip->cs);
+               writel(addr_mode, aspi->regs + CE_CTRL_REG);
+
+               /* AST2400 SPI controller sets 4BYTE address mode in
+                * CE0 Control Register
+                */
+               if (op->addr.nbytes == 4 && chip->aspi->data == &ast2400_spi_data)
+                       ctl_val |= CTRL_IO_ADDRESS_4B;
+       }
+
+       /* READ mode is the controller default setting */
+       chip->ctl_val[ASPEED_SPI_READ] = ctl_val;
+       writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
+
+       ret = aspeed_spi_do_calibration(chip);
+
+       dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n",
+                chip->cs, op->data.buswidth, chip->ctl_val[ASPEED_SPI_READ]);
+
+       return ret;
+}
+
+static ssize_t aspeed_spi_dirmap_read(struct spi_mem_dirmap_desc *desc,
+                                     u64 offset, size_t len, void *buf)
+{
+       struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->master);
+       struct aspeed_spi_chip *chip = &aspi->chips[desc->mem->spi->chip_select];
+
+       /* Switch to USER command mode if mapping window is too small */
+       if (chip->ahb_window_size < offset + len) {
+               int ret;
+
+               ret = aspeed_spi_read_user(chip, &desc->info.op_tmpl, offset, len, buf);
+               if (ret < 0)
+                       return ret;
+       } else {
+               memcpy_fromio(buf, chip->ahb_base + offset, len);
+       }
+
+       return len;
+}
+
+static const struct spi_controller_mem_ops aspeed_spi_mem_ops = {
+       .supports_op = aspeed_spi_supports_op,
+       .exec_op = aspeed_spi_exec_op,
+       .get_name = aspeed_spi_get_name,
+       .dirmap_create = aspeed_spi_dirmap_create,
+       .dirmap_read = aspeed_spi_dirmap_read,
+};
+
+static void aspeed_spi_chip_set_type(struct aspeed_spi *aspi, unsigned int cs, int type)
+{
+       u32 reg;
+
+       reg = readl(aspi->regs + CONFIG_REG);
+       reg &= ~(0x3 << (cs * 2));
+       reg |= type << (cs * 2);
+       writel(reg, aspi->regs + CONFIG_REG);
+}
+
+static void aspeed_spi_chip_enable(struct aspeed_spi *aspi, unsigned int cs, bool enable)
+{
+       u32 we_bit = BIT(aspi->data->we0 + cs);
+       u32 reg = readl(aspi->regs + CONFIG_REG);
+
+       if (enable)
+               reg |= we_bit;
+       else
+               reg &= ~we_bit;
+       writel(reg, aspi->regs + CONFIG_REG);
+}
+
+static int aspeed_spi_setup(struct spi_device *spi)
+{
+       struct aspeed_spi *aspi = spi_controller_get_devdata(spi->master);
+       const struct aspeed_spi_data *data = aspi->data;
+       unsigned int cs = spi->chip_select;
+       struct aspeed_spi_chip *chip = &aspi->chips[cs];
+
+       chip->aspi = aspi;
+       chip->cs = cs;
+       chip->ctl = aspi->regs + data->ctl0 + cs * 4;
+
+       /* The driver only supports SPI type flash */
+       if (data->hastype)
+               aspeed_spi_chip_set_type(aspi, cs, CONFIG_TYPE_SPI);
+
+       if (aspeed_spi_chip_set_default_window(chip) < 0) {
+               dev_warn(aspi->dev, "CE%d window invalid", cs);
+               return -EINVAL;
+       }
+
+       aspeed_spi_chip_enable(aspi, cs, true);
+
+       chip->ctl_val[ASPEED_SPI_BASE] = CTRL_CE_STOP_ACTIVE | CTRL_IO_MODE_USER;
+
+       dev_dbg(aspi->dev, "CE%d setup done\n", cs);
+       return 0;
+}
+
+static void aspeed_spi_cleanup(struct spi_device *spi)
+{
+       struct aspeed_spi *aspi = spi_controller_get_devdata(spi->master);
+       unsigned int cs = spi->chip_select;
+
+       aspeed_spi_chip_enable(aspi, cs, false);
+
+       dev_dbg(aspi->dev, "CE%d cleanup done\n", cs);
+}
+
+static void aspeed_spi_enable(struct aspeed_spi *aspi, bool enable)
+{
+       int cs;
+
+       for (cs = 0; cs < aspi->data->max_cs; cs++)
+               aspeed_spi_chip_enable(aspi, cs, enable);
+}
+
+static int aspeed_spi_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       const struct aspeed_spi_data *data;
+       struct spi_controller *ctlr;
+       struct aspeed_spi *aspi;
+       struct resource *res;
+       int ret;
+
+       data = of_device_get_match_data(&pdev->dev);
+       if (!data)
+               return -ENODEV;
+
+       ctlr = devm_spi_alloc_master(dev, sizeof(*aspi));
+       if (!ctlr)
+               return -ENOMEM;
+
+       aspi = spi_controller_get_devdata(ctlr);
+       platform_set_drvdata(pdev, aspi);
+       aspi->data = data;
+       aspi->dev = dev;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       aspi->regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(aspi->regs)) {
+               dev_err(dev, "missing AHB register window\n");
+               return PTR_ERR(aspi->regs);
+       }
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       aspi->ahb_base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(aspi->ahb_base)) {
+               dev_err(dev, "missing AHB mapping window\n");
+               return PTR_ERR(aspi->ahb_base);
+       }
+
+       aspi->ahb_window_size = resource_size(res);
+       aspi->ahb_base_phy = res->start;
+
+       aspi->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(aspi->clk)) {
+               dev_err(dev, "missing clock\n");
+               return PTR_ERR(aspi->clk);
+       }
+
+       aspi->clk_freq = clk_get_rate(aspi->clk);
+       if (!aspi->clk_freq) {
+               dev_err(dev, "invalid clock\n");
+               return -EINVAL;
+       }
+
+       ret = clk_prepare_enable(aspi->clk);
+       if (ret) {
+               dev_err(dev, "can not enable the clock\n");
+               return ret;
+       }
+
+       /* IRQ is for DMA, which the driver doesn't support yet */
+
+       ctlr->mode_bits = SPI_RX_DUAL | SPI_TX_DUAL | data->mode_bits;
+       ctlr->bus_num = pdev->id;
+       ctlr->mem_ops = &aspeed_spi_mem_ops;
+       ctlr->setup = aspeed_spi_setup;
+       ctlr->cleanup = aspeed_spi_cleanup;
+       ctlr->num_chipselect = data->max_cs;
+       ctlr->dev.of_node = dev->of_node;
+
+       ret = devm_spi_register_controller(dev, ctlr);
+       if (ret) {
+               dev_err(&pdev->dev, "spi_register_controller failed\n");
+               goto disable_clk;
+       }
+       return 0;
+
+disable_clk:
+       clk_disable_unprepare(aspi->clk);
+       return ret;
+}
+
+static int aspeed_spi_remove(struct platform_device *pdev)
+{
+       struct aspeed_spi *aspi = platform_get_drvdata(pdev);
+
+       aspeed_spi_enable(aspi, false);
+       clk_disable_unprepare(aspi->clk);
+       return 0;
+}
+
+/*
+ * AHB mappings
+ */
+
+/*
+ * The Segment Registers of the AST2400 and AST2500 use a 8MB unit.
+ * The address range is encoded with absolute addresses in the overall
+ * mapping window.
+ */
+static u32 aspeed_spi_segment_start(struct aspeed_spi *aspi, u32 reg)
+{
+       return ((reg >> 16) & 0xFF) << 23;
+}
+
+static u32 aspeed_spi_segment_end(struct aspeed_spi *aspi, u32 reg)
+{
+       return ((reg >> 24) & 0xFF) << 23;
+}
+
+static u32 aspeed_spi_segment_reg(struct aspeed_spi *aspi, u32 start, u32 end)
+{
+       return (((start >> 23) & 0xFF) << 16) | (((end >> 23) & 0xFF) << 24);
+}
+
+/*
+ * The Segment Registers of the AST2600 use a 1MB unit. The address
+ * range is encoded with offsets in the overall mapping window.
+ */
+
+#define AST2600_SEG_ADDR_MASK 0x0ff00000
+
+static u32 aspeed_spi_segment_ast2600_start(struct aspeed_spi *aspi,
+                                           u32 reg)
+{
+       u32 start_offset = (reg << 16) & AST2600_SEG_ADDR_MASK;
+
+       return aspi->ahb_base_phy + start_offset;
+}
+
+static u32 aspeed_spi_segment_ast2600_end(struct aspeed_spi *aspi,
+                                         u32 reg)
+{
+       u32 end_offset = reg & AST2600_SEG_ADDR_MASK;
+
+       /* segment is disabled */
+       if (!end_offset)
+               return aspi->ahb_base_phy;
+
+       return aspi->ahb_base_phy + end_offset + 0x100000;
+}
+
+static u32 aspeed_spi_segment_ast2600_reg(struct aspeed_spi *aspi,
+                                         u32 start, u32 end)
+{
+       /* disable zero size segments */
+       if (start == end)
+               return 0;
+
+       return ((start & AST2600_SEG_ADDR_MASK) >> 16) |
+               ((end - 1) & AST2600_SEG_ADDR_MASK);
+}
+
+/*
+ * Read timing compensation sequences
+ */
+
+#define CALIBRATE_BUF_SIZE SZ_16K
+
+static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip,
+                                  const u8 *golden_buf, u8 *test_buf)
+{
+       int i;
+
+       for (i = 0; i < 10; i++) {
+               memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
+               if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) {
+#if defined(VERBOSE_DEBUG)
+                       print_hex_dump_bytes(DEVICE_NAME "  fail: ", DUMP_PREFIX_NONE,
+                                            test_buf, 0x100);
+#endif
+                       return false;
+               }
+       }
+       return true;
+}
+
+#define FREAD_TPASS(i) (((i) / 2) | (((i) & 1) ? 0 : 8))
+
+/*
+ * The timing register is shared by all devices. Only update for CE0.
+ */
+static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
+                               const u8 *golden_buf, u8 *test_buf)
+{
+       struct aspeed_spi *aspi = chip->aspi;
+       const struct aspeed_spi_data *data = aspi->data;
+       int i;
+       int good_pass = -1, pass_count = 0;
+       u32 shift = (hdiv - 1) << 2;
+       u32 mask = ~(0xfu << shift);
+       u32 fread_timing_val = 0;
+
+       /* Try HCLK delay 0..5, each one with/without delay and look for a
+        * good pair.
+        */
+       for (i = 0; i < 12; i++) {
+               bool pass;
+
+               if (chip->cs == 0) {
+                       fread_timing_val &= mask;
+                       fread_timing_val |= FREAD_TPASS(i) << shift;
+                       writel(fread_timing_val, aspi->regs + data->timing);
+               }
+               pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
+               dev_dbg(aspi->dev,
+                       "  * [%08x] %d HCLK delay, %dns DI delay : %s",
+                       fread_timing_val, i / 2, (i & 1) ? 0 : 4,
+                       pass ? "PASS" : "FAIL");
+               if (pass) {
+                       pass_count++;
+                       if (pass_count == 3) {
+                               good_pass = i - 1;
+                               break;
+                       }
+               } else {
+                       pass_count = 0;
+               }
+       }
+
+       /* No good setting for this frequency */
+       if (good_pass < 0)
+               return -1;
+
+       /* We have at least one pass of margin, let's use first pass */
+       if (chip->cs == 0) {
+               fread_timing_val &= mask;
+               fread_timing_val |= FREAD_TPASS(good_pass) << shift;
+               writel(fread_timing_val, aspi->regs + data->timing);
+       }
+       dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]",
+               good_pass, fread_timing_val);
+       return 0;
+}
+
+static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size)
+{
+       const u32 *tb32 = (const u32 *)test_buf;
+       u32 i, cnt = 0;
+
+       /* We check if we have enough words that are neither all 0
+        * nor all 1's so the calibration can be considered valid.
+        *
+        * I use an arbitrary threshold for now of 64
+        */
+       size >>= 2;
+       for (i = 0; i < size; i++) {
+               if (tb32[i] != 0 && tb32[i] != 0xffffffff)
+                       cnt++;
+       }
+       return cnt >= 64;
+}
+
+static const u32 aspeed_spi_hclk_divs[] = {
+       0xf, /* HCLK */
+       0x7, /* HCLK/2 */
+       0xe, /* HCLK/3 */
+       0x6, /* HCLK/4 */
+       0xd, /* HCLK/5 */
+};
+
+#define ASPEED_SPI_HCLK_DIV(i) \
+       (aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT)
+
+static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip)
+{
+       struct aspeed_spi *aspi = chip->aspi;
+       const struct aspeed_spi_data *data = aspi->data;
+       u32 ahb_freq = aspi->clk_freq;
+       u32 max_freq = chip->clk_freq;
+       u32 ctl_val;
+       u8 *golden_buf = NULL;
+       u8 *test_buf = NULL;
+       int i, rc, best_div = -1;
+
+       dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz",
+               ahb_freq / 1000000);
+
+       /*
+        * use the related low frequency to get check calibration data
+        * and get golden data.
+        */
+       ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask;
+       writel(ctl_val, chip->ctl);
+
+       test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
+       if (!test_buf)
+               return -ENOMEM;
+
+       golden_buf = test_buf + CALIBRATE_BUF_SIZE;
+
+       memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
+       if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
+               dev_info(aspi->dev, "Calibration area too uniform, using low speed");
+               goto no_calib;
+       }
+
+#if defined(VERBOSE_DEBUG)
+       print_hex_dump_bytes(DEVICE_NAME "  good: ", DUMP_PREFIX_NONE,
+                            golden_buf, 0x100);
+#endif
+
+       /* Now we iterate the HCLK dividers until we find our breaking point */
+       for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) {
+               u32 tv, freq;
+
+               freq = ahb_freq / i;
+               if (freq > max_freq)
+                       continue;
+
+               /* Set the timing */
+               tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i);
+               writel(tv, chip->ctl);
+               dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv);
+               rc = data->calibrate(chip, i, golden_buf, test_buf);
+               if (rc == 0)
+                       best_div = i;
+       }
+
+       /* Nothing found ? */
+       if (best_div < 0) {
+               dev_warn(aspi->dev, "No good frequency, using dumb slow");
+       } else {
+               dev_dbg(aspi->dev, "Found good read timings at HCLK/%d", best_div);
+
+               /* Record the freq */
+               for (i = 0; i < ASPEED_SPI_MAX; i++)
+                       chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) |
+                               ASPEED_SPI_HCLK_DIV(best_div);
+       }
+
+no_calib:
+       writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
+       kfree(test_buf);
+       return 0;
+}
+
+#define TIMING_DELAY_DI                BIT(3)
+#define TIMING_DELAY_HCYCLE_MAX        5
+#define TIMING_REG_AST2600(chip)                               \
+       ((chip)->aspi->regs + (chip)->aspi->data->timing +      \
+        (chip)->cs * 4)
+
+static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
+                                       const u8 *golden_buf, u8 *test_buf)
+{
+       struct aspeed_spi *aspi = chip->aspi;
+       int hcycle;
+       u32 shift = (hdiv - 2) << 3;
+       u32 mask = ~(0xfu << shift);
+       u32 fread_timing_val = 0;
+
+       for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) {
+               int delay_ns;
+               bool pass = false;
+
+               fread_timing_val &= mask;
+               fread_timing_val |= hcycle << shift;
+
+               /* no DI input delay first  */
+               writel(fread_timing_val, TIMING_REG_AST2600(chip));
+               pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
+               dev_dbg(aspi->dev,
+                       "  * [%08x] %d HCLK delay, DI delay none : %s",
+                       fread_timing_val, hcycle, pass ? "PASS" : "FAIL");
+               if (pass)
+                       return 0;
+
+               /* Add DI input delays  */
+               fread_timing_val &= mask;
+               fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift;
+
+               for (delay_ns = 0; delay_ns < 0x10; delay_ns++) {
+                       fread_timing_val &= ~(0xf << (4 + shift));
+                       fread_timing_val |= delay_ns << (4 + shift);
+
+                       writel(fread_timing_val, TIMING_REG_AST2600(chip));
+                       pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
+                       dev_dbg(aspi->dev,
+                               "  * [%08x] %d HCLK delay, DI delay %d.%dns : %s",
+                               fread_timing_val, hcycle, (delay_ns + 1) / 2,
+                               (delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL");
+                       /*
+                        * TODO: This is optimistic. We should look
+                        * for a working interval and save the middle
+                        * value in the read timing register.
+                        */
+                       if (pass)
+                               return 0;
+               }
+       }
+
+       /* No good setting for this frequency */
+       return -1;
+}
+
+/*
+ * Platform definitions
+ */
+static const struct aspeed_spi_data ast2400_fmc_data = {
+       .max_cs        = 5,
+       .hastype       = true,
+       .we0           = 16,
+       .ctl0          = CE0_CTRL_REG,
+       .timing        = CE0_TIMING_COMPENSATION_REG,
+       .hclk_mask     = 0xfffff0ff,
+       .hdiv_max      = 1,
+       .calibrate     = aspeed_spi_calibrate,
+       .segment_start = aspeed_spi_segment_start,
+       .segment_end   = aspeed_spi_segment_end,
+       .segment_reg   = aspeed_spi_segment_reg,
+};
+
+static const struct aspeed_spi_data ast2400_spi_data = {
+       .max_cs        = 1,
+       .hastype       = false,
+       .we0           = 0,
+       .ctl0          = 0x04,
+       .timing        = 0x14,
+       .hclk_mask     = 0xfffff0ff,
+       .hdiv_max      = 1,
+       .calibrate     = aspeed_spi_calibrate,
+       /* No segment registers */
+};
+
+static const struct aspeed_spi_data ast2500_fmc_data = {
+       .max_cs        = 3,
+       .hastype       = true,
+       .we0           = 16,
+       .ctl0          = CE0_CTRL_REG,
+       .timing        = CE0_TIMING_COMPENSATION_REG,
+       .hclk_mask     = 0xffffd0ff,
+       .hdiv_max      = 1,
+       .calibrate     = aspeed_spi_calibrate,
+       .segment_start = aspeed_spi_segment_start,
+       .segment_end   = aspeed_spi_segment_end,
+       .segment_reg   = aspeed_spi_segment_reg,
+};
+
+static const struct aspeed_spi_data ast2500_spi_data = {
+       .max_cs        = 2,
+       .hastype       = false,
+       .we0           = 16,
+       .ctl0          = CE0_CTRL_REG,
+       .timing        = CE0_TIMING_COMPENSATION_REG,
+       .hclk_mask     = 0xffffd0ff,
+       .hdiv_max      = 1,
+       .calibrate     = aspeed_spi_calibrate,
+       .segment_start = aspeed_spi_segment_start,
+       .segment_end   = aspeed_spi_segment_end,
+       .segment_reg   = aspeed_spi_segment_reg,
+};
+
+static const struct aspeed_spi_data ast2600_fmc_data = {
+       .max_cs        = 3,
+       .hastype       = false,
+       .mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
+       .we0           = 16,
+       .ctl0          = CE0_CTRL_REG,
+       .timing        = CE0_TIMING_COMPENSATION_REG,
+       .hclk_mask     = 0xf0fff0ff,
+       .hdiv_max      = 2,
+       .calibrate     = aspeed_spi_ast2600_calibrate,
+       .segment_start = aspeed_spi_segment_ast2600_start,
+       .segment_end   = aspeed_spi_segment_ast2600_end,
+       .segment_reg   = aspeed_spi_segment_ast2600_reg,
+};
+
+static const struct aspeed_spi_data ast2600_spi_data = {
+       .max_cs        = 2,
+       .hastype       = false,
+       .mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
+       .we0           = 16,
+       .ctl0          = CE0_CTRL_REG,
+       .timing        = CE0_TIMING_COMPENSATION_REG,
+       .hclk_mask     = 0xf0fff0ff,
+       .hdiv_max      = 2,
+       .calibrate     = aspeed_spi_ast2600_calibrate,
+       .segment_start = aspeed_spi_segment_ast2600_start,
+       .segment_end   = aspeed_spi_segment_ast2600_end,
+       .segment_reg   = aspeed_spi_segment_ast2600_reg,
+};
+
+static const struct of_device_id aspeed_spi_matches[] = {
+       { .compatible = "aspeed,ast2400-fmc", .data = &ast2400_fmc_data },
+       { .compatible = "aspeed,ast2400-spi", .data = &ast2400_spi_data },
+       { .compatible = "aspeed,ast2500-fmc", .data = &ast2500_fmc_data },
+       { .compatible = "aspeed,ast2500-spi", .data = &ast2500_spi_data },
+       { .compatible = "aspeed,ast2600-fmc", .data = &ast2600_fmc_data },
+       { .compatible = "aspeed,ast2600-spi", .data = &ast2600_spi_data },
+       { }
+};
+MODULE_DEVICE_TABLE(of, aspeed_spi_matches);
+
+static struct platform_driver aspeed_spi_driver = {
+       .probe                  = aspeed_spi_probe,
+       .remove                 = aspeed_spi_remove,
+       .driver = {
+               .name           = DEVICE_NAME,
+               .of_match_table = aspeed_spi_matches,
+       }
+};
+
+module_platform_driver(aspeed_spi_driver);
+
+MODULE_DESCRIPTION("ASPEED Static Memory Controller Driver");
+MODULE_AUTHOR("Chin-Ting Kuo <chin-ting_kuo@aspeedtech.com>");
+MODULE_AUTHOR("Cedric Le Goater <clg@kaod.org>");
+MODULE_LICENSE("GPL v2");
index 4b59a1b1bf7ed5d401cb9e704d16979b0a7470d3..e008761298da49d8cab2c5957debfa0676449086 100644 (file)
@@ -405,7 +405,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
                dma_unmap_single(hw->dev, dma_tx_addr, t->len,
                        DMA_TO_DEVICE);
 
-       return hw->rx_count < hw->tx_count ? hw->rx_count : hw->tx_count;
+       return min(hw->rx_count, hw->tx_count);
 }
 
 static irqreturn_t au1550_spi_dma_irq_callback(struct au1550_spi *hw)
@@ -539,7 +539,7 @@ static int au1550_spi_pio_txrxb(struct spi_device *spi, struct spi_transfer *t)
 
        wait_for_completion(&hw->master_done);
 
-       return hw->rx_count < hw->tx_count ? hw->rx_count : hw->tx_count;
+       return min(hw->rx_count, hw->tx_count);
 }
 
 static irqreturn_t au1550_spi_pio_irq_callback(struct au1550_spi *hw)
index 19686fb47bb352727fc9f2038403dd300909469a..2b9fc8449a622ef260d5159fc637ad4058be00bf 100644 (file)
@@ -43,6 +43,8 @@
 /* Capabilities */
 #define CQSPI_SUPPORTS_OCTAL           BIT(0)
 
+#define CQSPI_OP_WIDTH(part) ((part).nbytes ? ilog2((part).buswidth) : 0)
+
 struct cqspi_st;
 
 struct cqspi_flash_pdata {
@@ -53,16 +55,12 @@ struct cqspi_flash_pdata {
        u32             tsd2d_ns;
        u32             tchsh_ns;
        u32             tslch_ns;
-       u8              inst_width;
-       u8              addr_width;
-       u8              data_width;
-       bool            dtr;
        u8              cs;
 };
 
 struct cqspi_st {
        struct platform_device  *pdev;
-
+       struct spi_master       *master;
        struct clk              *clk;
        unsigned int            sclk;
 
@@ -343,18 +341,18 @@ static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
        return IRQ_HANDLED;
 }
 
-static unsigned int cqspi_calc_rdreg(struct cqspi_flash_pdata *f_pdata)
+static unsigned int cqspi_calc_rdreg(const struct spi_mem_op *op)
 {
        u32 rdreg = 0;
 
-       rdreg |= f_pdata->inst_width << CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB;
-       rdreg |= f_pdata->addr_width << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB;
-       rdreg |= f_pdata->data_width << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
+       rdreg |= CQSPI_OP_WIDTH(op->cmd) << CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB;
+       rdreg |= CQSPI_OP_WIDTH(op->addr) << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB;
+       rdreg |= CQSPI_OP_WIDTH(op->data) << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
 
        return rdreg;
 }
 
-static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr)
+static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op)
 {
        unsigned int dummy_clk;
 
@@ -362,66 +360,12 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr)
                return 0;
 
        dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth);
-       if (dtr)
+       if (op->cmd.dtr)
                dummy_clk /= 2;
 
        return dummy_clk;
 }
 
-static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
-                             const struct spi_mem_op *op)
-{
-       /*
-        * For an op to be DTR, cmd phase along with every other non-empty
-        * phase should have dtr field set to 1. If an op phase has zero
-        * nbytes, ignore its dtr field; otherwise, check its dtr field.
-        */
-       f_pdata->dtr = op->cmd.dtr &&
-                      (!op->addr.nbytes || op->addr.dtr) &&
-                      (!op->data.nbytes || op->data.dtr);
-
-       f_pdata->inst_width = 0;
-       if (op->cmd.buswidth)
-               f_pdata->inst_width = ilog2(op->cmd.buswidth);
-
-       f_pdata->addr_width = 0;
-       if (op->addr.buswidth)
-               f_pdata->addr_width = ilog2(op->addr.buswidth);
-
-       f_pdata->data_width = 0;
-       if (op->data.buswidth)
-               f_pdata->data_width = ilog2(op->data.buswidth);
-
-       /* Right now we only support 8-8-8 DTR mode. */
-       if (f_pdata->dtr) {
-               switch (op->cmd.buswidth) {
-               case 0:
-               case 8:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               switch (op->addr.buswidth) {
-               case 0:
-               case 8:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               switch (op->data.buswidth) {
-               case 0:
-               case 8:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
 static int cqspi_wait_idle(struct cqspi_st *cqspi)
 {
        const unsigned int poll_idle_retry = 3;
@@ -503,8 +447,7 @@ static int cqspi_setup_opcode_ext(struct cqspi_flash_pdata *f_pdata,
 }
 
 static int cqspi_enable_dtr(struct cqspi_flash_pdata *f_pdata,
-                           const struct spi_mem_op *op, unsigned int shift,
-                           bool enable)
+                           const struct spi_mem_op *op, unsigned int shift)
 {
        struct cqspi_st *cqspi = f_pdata->cqspi;
        void __iomem *reg_base = cqspi->iobase;
@@ -517,7 +460,7 @@ static int cqspi_enable_dtr(struct cqspi_flash_pdata *f_pdata,
         * We enable dual byte opcode here. The callers have to set up the
         * extension opcode based on which type of operation it is.
         */
-       if (enable) {
+       if (op->cmd.dtr) {
                reg |= CQSPI_REG_CONFIG_DTR_PROTO;
                reg |= CQSPI_REG_CONFIG_DUAL_OPCODE;
 
@@ -549,12 +492,7 @@ static int cqspi_command_read(struct cqspi_flash_pdata *f_pdata,
        size_t read_len;
        int status;
 
-       status = cqspi_set_protocol(f_pdata, op);
-       if (status)
-               return status;
-
-       status = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_STIG_LSB,
-                                 f_pdata->dtr);
+       status = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_STIG_LSB);
        if (status)
                return status;
 
@@ -565,17 +503,17 @@ static int cqspi_command_read(struct cqspi_flash_pdata *f_pdata,
                return -EINVAL;
        }
 
-       if (f_pdata->dtr)
+       if (op->cmd.dtr)
                opcode = op->cmd.opcode >> 8;
        else
                opcode = op->cmd.opcode;
 
        reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
 
-       rdreg = cqspi_calc_rdreg(f_pdata);
+       rdreg = cqspi_calc_rdreg(op);
        writel(rdreg, reg_base + CQSPI_REG_RD_INSTR);
 
-       dummy_clk = cqspi_calc_dummy(op, f_pdata->dtr);
+       dummy_clk = cqspi_calc_dummy(op);
        if (dummy_clk > CQSPI_DUMMY_CLKS_MAX)
                return -EOPNOTSUPP;
 
@@ -622,12 +560,7 @@ static int cqspi_command_write(struct cqspi_flash_pdata *f_pdata,
        size_t write_len;
        int ret;
 
-       ret = cqspi_set_protocol(f_pdata, op);
-       if (ret)
-               return ret;
-
-       ret = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_STIG_LSB,
-                              f_pdata->dtr);
+       ret = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_STIG_LSB);
        if (ret)
                return ret;
 
@@ -638,10 +571,10 @@ static int cqspi_command_write(struct cqspi_flash_pdata *f_pdata,
                return -EINVAL;
        }
 
-       reg = cqspi_calc_rdreg(f_pdata);
+       reg = cqspi_calc_rdreg(op);
        writel(reg, reg_base + CQSPI_REG_RD_INSTR);
 
-       if (f_pdata->dtr)
+       if (op->cmd.dtr)
                opcode = op->cmd.opcode >> 8;
        else
                opcode = op->cmd.opcode;
@@ -688,21 +621,20 @@ static int cqspi_read_setup(struct cqspi_flash_pdata *f_pdata,
        int ret;
        u8 opcode;
 
-       ret = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_READ_LSB,
-                              f_pdata->dtr);
+       ret = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_READ_LSB);
        if (ret)
                return ret;
 
-       if (f_pdata->dtr)
+       if (op->cmd.dtr)
                opcode = op->cmd.opcode >> 8;
        else
                opcode = op->cmd.opcode;
 
        reg = opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB;
-       reg |= cqspi_calc_rdreg(f_pdata);
+       reg |= cqspi_calc_rdreg(op);
 
        /* Setup dummy clock cycles */
-       dummy_clk = cqspi_calc_dummy(op, f_pdata->dtr);
+       dummy_clk = cqspi_calc_dummy(op);
 
        if (dummy_clk > CQSPI_DUMMY_CLKS_MAX)
                return -EOPNOTSUPP;
@@ -947,22 +879,21 @@ static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata,
        void __iomem *reg_base = cqspi->iobase;
        u8 opcode;
 
-       ret = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_WRITE_LSB,
-                              f_pdata->dtr);
+       ret = cqspi_enable_dtr(f_pdata, op, CQSPI_REG_OP_EXT_WRITE_LSB);
        if (ret)
                return ret;
 
-       if (f_pdata->dtr)
+       if (op->cmd.dtr)
                opcode = op->cmd.opcode >> 8;
        else
                opcode = op->cmd.opcode;
 
        /* Set opcode. */
        reg = opcode << CQSPI_REG_WR_INSTR_OPCODE_LSB;
-       reg |= f_pdata->data_width << CQSPI_REG_WR_INSTR_TYPE_DATA_LSB;
-       reg |= f_pdata->addr_width << CQSPI_REG_WR_INSTR_TYPE_ADDR_LSB;
+       reg |= CQSPI_OP_WIDTH(op->data) << CQSPI_REG_WR_INSTR_TYPE_DATA_LSB;
+       reg |= CQSPI_OP_WIDTH(op->addr) << CQSPI_REG_WR_INSTR_TYPE_ADDR_LSB;
        writel(reg, reg_base + CQSPI_REG_WR_INSTR);
-       reg = cqspi_calc_rdreg(f_pdata);
+       reg = cqspi_calc_rdreg(op);
        writel(reg, reg_base + CQSPI_REG_RD_INSTR);
 
        /*
@@ -1244,10 +1175,6 @@ static ssize_t cqspi_write(struct cqspi_flash_pdata *f_pdata,
        const u_char *buf = op->data.buf.out;
        int ret;
 
-       ret = cqspi_set_protocol(f_pdata, op);
-       if (ret)
-               return ret;
-
        ret = cqspi_write_setup(f_pdata, op);
        if (ret)
                return ret;
@@ -1260,7 +1187,7 @@ static ssize_t cqspi_write(struct cqspi_flash_pdata *f_pdata,
         * mode. So, we can not use direct mode when in DTR mode for writing
         * data.
         */
-       if (!f_pdata->dtr && cqspi->use_direct_mode &&
+       if (!op->cmd.dtr && cqspi->use_direct_mode &&
            ((to + len) <= cqspi->ahb_size)) {
                memcpy_toio(cqspi->ahb_base + to, buf, len);
                return cqspi_wait_idle(cqspi);
@@ -1348,9 +1275,6 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
        int ret;
 
        ddata = of_device_get_match_data(dev);
-       ret = cqspi_set_protocol(f_pdata, op);
-       if (ret)
-               return ret;
 
        ret = cqspi_read_setup(f_pdata, op);
        if (ret)
@@ -1423,13 +1347,7 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem,
                        return false;
                if (op->data.nbytes && op->data.buswidth != 8)
                        return false;
-       } else if (all_false) {
-               /* Only 1-1-X ops are supported without DTR */
-               if (op->cmd.nbytes && op->cmd.buswidth > 1)
-                       return false;
-               if (op->addr.nbytes && op->addr.buswidth > 1)
-                       return false;
-       } else {
+       } else if (!all_false) {
                /* Mixed DTR modes are not supported. */
                return false;
        }
@@ -1563,6 +1481,7 @@ static int cqspi_request_mmap_dma(struct cqspi_st *cqspi)
        cqspi->rx_chan = dma_request_chan_by_mask(&mask);
        if (IS_ERR(cqspi->rx_chan)) {
                int ret = PTR_ERR(cqspi->rx_chan);
+
                cqspi->rx_chan = NULL;
                return dev_err_probe(&cqspi->pdev->dev, ret, "No Rx DMA available\n");
        }
@@ -1639,7 +1558,7 @@ static int cqspi_probe(struct platform_device *pdev)
        int ret;
        int irq;
 
-       master = spi_alloc_master(&pdev->dev, sizeof(*cqspi));
+       master = devm_spi_alloc_master(&pdev->dev, sizeof(*cqspi));
        if (!master) {
                dev_err(&pdev->dev, "spi_alloc_master failed\n");
                return -ENOMEM;
@@ -1652,6 +1571,7 @@ static int cqspi_probe(struct platform_device *pdev)
        cqspi = spi_master_get_devdata(master);
 
        cqspi->pdev = pdev;
+       cqspi->master = master;
        platform_set_drvdata(pdev, cqspi);
 
        /* Obtain configuration from OF. */
@@ -1700,11 +1620,9 @@ static int cqspi_probe(struct platform_device *pdev)
        }
 
        pm_runtime_enable(dev);
-       ret = pm_runtime_get_sync(dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(dev);
+       ret = pm_runtime_resume_and_get(dev);
+       if (ret < 0)
                goto probe_master_put;
-       }
 
        ret = clk_prepare_enable(cqspi->clk);
        if (ret) {
@@ -1784,7 +1702,7 @@ static int cqspi_probe(struct platform_device *pdev)
                        goto probe_setup_failed;
        }
 
-       ret = devm_spi_register_master(dev, master);
+       ret = spi_register_master(master);
        if (ret) {
                dev_err(&pdev->dev, "failed to register SPI ctlr %d\n", ret);
                goto probe_setup_failed;
@@ -1807,6 +1725,7 @@ static int cqspi_remove(struct platform_device *pdev)
 {
        struct cqspi_st *cqspi = platform_get_drvdata(pdev);
 
+       spi_unregister_master(cqspi->master);
        cqspi_controller_enable(cqspi, 0);
 
        if (cqspi->rx_chan)
@@ -1865,7 +1784,7 @@ static const struct cqspi_driver_platdata intel_lgm_qspi = {
 };
 
 static const struct cqspi_driver_platdata socfpga_qspi = {
-       .quirks = CQSPI_NO_SUPPORT_WR_COMPLETION,
+       .quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_NO_SUPPORT_WR_COMPLETION,
 };
 
 static const struct cqspi_driver_platdata versal_ospi = {
@@ -1894,11 +1813,11 @@ static const struct of_device_id cqspi_dt_ids[] = {
        },
        {
                .compatible = "xlnx,versal-ospi-1.0",
-               .data = (void *)&versal_ospi,
+               .data = &versal_ospi,
        },
        {
                .compatible = "intel,socfpga-qspi",
-               .data = (void *)&socfpga_qspi,
+               .data = &socfpga_qspi,
        },
        { /* end of table */ }
 };
index ceb16e70d235af4014f1266dfc786a8d57dfd68c..a23d4f6329f501f0ce32bac18bf1307e7e827494 100644 (file)
@@ -342,7 +342,8 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
 {
        struct spi_master *master = dev_id;
        struct cdns_spi *xspi = spi_master_get_devdata(master);
-       u32 intr_status, status;
+       irqreturn_t status;
+       u32 intr_status;
 
        status = IRQ_NONE;
        intr_status = cdns_spi_read(xspi, CDNS_SPI_ISR);
@@ -657,7 +658,7 @@ static int __maybe_unused cdns_spi_resume(struct device *dev)
  *
  * Return:     0 on success and error value on error
  */
-static int __maybe_unused cnds_runtime_resume(struct device *dev)
+static int __maybe_unused cdns_spi_runtime_resume(struct device *dev)
 {
        struct spi_master *master = dev_get_drvdata(dev);
        struct cdns_spi *xspi = spi_master_get_devdata(master);
@@ -686,7 +687,7 @@ static int __maybe_unused cnds_runtime_resume(struct device *dev)
  *
  * Return:     Always 0
  */
-static int __maybe_unused cnds_runtime_suspend(struct device *dev)
+static int __maybe_unused cdns_spi_runtime_suspend(struct device *dev)
 {
        struct spi_master *master = dev_get_drvdata(dev);
        struct cdns_spi *xspi = spi_master_get_devdata(master);
@@ -698,8 +699,8 @@ static int __maybe_unused cnds_runtime_suspend(struct device *dev)
 }
 
 static const struct dev_pm_ops cdns_spi_dev_pm_ops = {
-       SET_RUNTIME_PM_OPS(cnds_runtime_suspend,
-                          cnds_runtime_resume, NULL)
+       SET_RUNTIME_PM_OPS(cdns_spi_runtime_suspend,
+                          cdns_spi_runtime_resume, NULL)
        SET_SYSTEM_SLEEP_PM_OPS(cdns_spi_suspend, cdns_spi_resume)
 };
 
index 0bef5ce0809445516fffff99387fcdc4c2a69f37..c005ed26a3e1bf531591b047c315b0a2124e13cd 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/clk.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -89,6 +90,7 @@ static irqreturn_t spi_clps711x_isr(int irq, void *dev_id)
 
 static int spi_clps711x_probe(struct platform_device *pdev)
 {
+       struct device_node *np = pdev->dev.of_node;
        struct spi_clps711x_data *hw;
        struct spi_master *master;
        int irq, ret;
@@ -117,8 +119,7 @@ static int spi_clps711x_probe(struct platform_device *pdev)
                goto err_out;
        }
 
-       hw->syscon =
-               syscon_regmap_lookup_by_compatible("cirrus,ep7209-syscon3");
+       hw->syscon = syscon_regmap_lookup_by_phandle(np, "syscon");
        if (IS_ERR(hw->syscon)) {
                ret = PTR_ERR(hw->syscon);
                goto err_out;
index 9851551ebbe05d9381d3344469df10242f52b4dd..46ae46a944c5ce89818ddef2f2a622a96ee95036 100644 (file)
@@ -876,6 +876,10 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
                                        "QuadSPI-memory");
+       if (!res) {
+               ret = -EINVAL;
+               goto err_put_ctrl;
+       }
        q->memmap_phy = res->start;
        /* Since there are 4 cs, map size required is 4 times ahb_buf_size */
        q->ahb_addr = devm_ioremap(dev, q->memmap_phy,
index 5f05d519fbbd09188032e40f9626af7e24b5a498..71376b6df89dbb279dd7f5dda18ed41f305b9155 100644 (file)
@@ -731,7 +731,7 @@ static int img_spfi_resume(struct device *dev)
        int ret;
 
        ret = pm_runtime_get_sync(dev);
-       if (ret) {
+       if (ret < 0) {
                pm_runtime_put_noidle(dev);
                return ret;
        }
index b2dd0a4d2446298ee15bcee1cd26a56fa26c6939..bc97337fddf5f0fc444621083b833ba2a05fade7 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/spi_bitbang.h>
 #include <linux/types.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -32,6 +31,12 @@ static bool use_dma = true;
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Enable usage of DMA when available (default)");
 
+/* define polling limits */
+static unsigned int polling_limit_us = 30;
+module_param(polling_limit_us, uint, 0664);
+MODULE_PARM_DESC(polling_limit_us,
+                "time in us to run a transfer in polling mode\n");
+
 #define MXC_RPM_TIMEOUT                2000 /* 2000ms */
 
 #define MXC_CSPIRXDATA         0x00
@@ -64,15 +69,15 @@ enum spi_imx_devtype {
 struct spi_imx_data;
 
 struct spi_imx_devtype_data {
-       void (*intctrl)(struct spi_imx_data *, int);
-       int (*prepare_message)(struct spi_imx_data *, struct spi_message *);
-       int (*prepare_transfer)(struct spi_imx_data *, struct spi_device *);
-       void (*trigger)(struct spi_imx_data *);
-       int (*rx_available)(struct spi_imx_data *);
-       void (*reset)(struct spi_imx_data *);
-       void (*setup_wml)(struct spi_imx_data *);
-       void (*disable)(struct spi_imx_data *);
-       void (*disable_dma)(struct spi_imx_data *);
+       void (*intctrl)(struct spi_imx_data *spi_imx, int enable);
+       int (*prepare_message)(struct spi_imx_data *spi_imx, struct spi_message *msg);
+       int (*prepare_transfer)(struct spi_imx_data *spi_imx, struct spi_device *spi);
+       void (*trigger)(struct spi_imx_data *spi_imx);
+       int (*rx_available)(struct spi_imx_data *spi_imx);
+       void (*reset)(struct spi_imx_data *spi_imx);
+       void (*setup_wml)(struct spi_imx_data *spi_imx);
+       void (*disable)(struct spi_imx_data *spi_imx);
+       void (*disable_dma)(struct spi_imx_data *spi_imx);
        bool has_dmamode;
        bool has_slavemode;
        unsigned int fifo_size;
@@ -86,7 +91,7 @@ struct spi_imx_devtype_data {
 };
 
 struct spi_imx_data {
-       struct spi_bitbang bitbang;
+       struct spi_controller *controller;
        struct device *dev;
 
        struct completion xfer_done;
@@ -102,12 +107,13 @@ struct spi_imx_data {
        unsigned int spi_drctl;
 
        unsigned int count, remainder;
-       void (*tx)(struct spi_imx_data *);
-       void (*rx)(struct spi_imx_data *);
+       void (*tx)(struct spi_imx_data *spi_imx);
+       void (*rx)(struct spi_imx_data *spi_imx);
        void *rx_buf;
        const void *tx_buf;
        unsigned int txfifo; /* number of words pushed in tx FIFO */
        unsigned int dynamic_burst;
+       bool rx_only;
 
        /* Slave mode */
        bool slave_mode;
@@ -225,15 +231,15 @@ static int spi_imx_bytes_per_word(const int bits_per_word)
                return 4;
 }
 
-static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
+static bool spi_imx_can_dma(struct spi_controller *controller, struct spi_device *spi,
                         struct spi_transfer *transfer)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
 
-       if (!use_dma || master->fallback)
+       if (!use_dma || controller->fallback)
                return false;
 
-       if (!master->dma_rx)
+       if (!controller->dma_rx)
                return false;
 
        if (spi_imx->slave_mode)
@@ -289,17 +295,16 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
 static void spi_imx_buf_rx_swap_u32(struct spi_imx_data *spi_imx)
 {
        unsigned int val = readl(spi_imx->base + MXC_CSPIRXDATA);
-#ifdef __LITTLE_ENDIAN
-       unsigned int bytes_per_word;
-#endif
 
        if (spi_imx->rx_buf) {
 #ifdef __LITTLE_ENDIAN
+               unsigned int bytes_per_word;
+
                bytes_per_word = spi_imx_bytes_per_word(spi_imx->bits_per_word);
                if (bytes_per_word == 1)
-                       val = cpu_to_be32(val);
+                       swab32s(&val);
                else if (bytes_per_word == 2)
-                       val = (val << 16) | (val >> 16);
+                       swahw32s(&val);
 #endif
                *(u32 *)spi_imx->rx_buf = val;
                spi_imx->rx_buf += sizeof(u32);
@@ -353,9 +358,9 @@ static void spi_imx_buf_tx_swap_u32(struct spi_imx_data *spi_imx)
        bytes_per_word = spi_imx_bytes_per_word(spi_imx->bits_per_word);
 
        if (bytes_per_word == 1)
-               val = cpu_to_be32(val);
+               swab32s(&val);
        else if (bytes_per_word == 2)
-               val = (val << 16) | (val >> 16);
+               swahw32s(&val);
 #endif
        writel(val, spi_imx->base + MXC_CSPITXDATA);
 }
@@ -469,7 +474,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx,
 
 static void mx51_ecspi_intctrl(struct spi_imx_data *spi_imx, int enable)
 {
-       unsigned val = 0;
+       unsigned int val = 0;
 
        if (enable & MXC_INT_TE)
                val |= MX51_ECSPI_INT_TEEN;
@@ -515,6 +520,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
        u32 min_speed_hz = ~0U;
        u32 testreg, delay;
        u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
+       u32 current_cfg = cfg;
 
        /* set Master or Slave mode */
        if (spi_imx->slave_mode)
@@ -554,11 +560,6 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
        else
                cfg |= MX51_ECSPI_CONFIG_SBBCTRL(spi->chip_select);
 
-       if (spi->mode & SPI_CPHA)
-               cfg |= MX51_ECSPI_CONFIG_SCLKPHA(spi->chip_select);
-       else
-               cfg &= ~MX51_ECSPI_CONFIG_SCLKPHA(spi->chip_select);
-
        if (spi->mode & SPI_CPOL) {
                cfg |= MX51_ECSPI_CONFIG_SCLKPOL(spi->chip_select);
                cfg |= MX51_ECSPI_CONFIG_SCLKCTL(spi->chip_select);
@@ -572,6 +573,9 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
        else
                cfg &= ~MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select);
 
+       if (cfg == current_cfg)
+               return 0;
+
        writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG);
 
        /*
@@ -585,7 +589,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
         * the SPI communication as the device on the other end would consider
         * the change of SCLK polarity as a clock tick already.
         *
-        * Because spi_imx->spi_bus_clk is only set in bitbang prepare_message
+        * Because spi_imx->spi_bus_clk is only set in prepare_message
         * callback, iterate over all the transfers in spi_message, find the
         * one with lowest bus frequency, and use that bus frequency for the
         * delay calculation. In case all transfers have speed_hz == 0, then
@@ -606,6 +610,24 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
        return 0;
 }
 
+static void mx51_configure_cpha(struct spi_imx_data *spi_imx,
+                               struct spi_device *spi)
+{
+       bool cpha = (spi->mode & SPI_CPHA);
+       bool flip_cpha = (spi->mode & SPI_RX_CPHA_FLIP) && spi_imx->rx_only;
+       u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
+
+       /* Flip cpha logical value iff flip_cpha */
+       cpha ^= flip_cpha;
+
+       if (cpha)
+               cfg |= MX51_ECSPI_CONFIG_SCLKPHA(spi->chip_select);
+       else
+               cfg &= ~MX51_ECSPI_CONFIG_SCLKPHA(spi->chip_select);
+
+       writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG);
+}
+
 static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
                                       struct spi_device *spi)
 {
@@ -627,6 +649,8 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
        ctrl |= mx51_ecspi_clkdiv(spi_imx, spi_imx->spi_bus_clk, &clk);
        spi_imx->spi_bus_clk = clk;
 
+       mx51_configure_cpha(spi_imx, spi);
+
        /*
         * ERR009165: work in XHC mode instead of SMC as PIO on the chips
         * before i.mx6ul.
@@ -1153,12 +1177,12 @@ static irqreturn_t spi_imx_isr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static int spi_imx_dma_configure(struct spi_master *master)
+static int spi_imx_dma_configure(struct spi_controller *controller)
 {
        int ret;
        enum dma_slave_buswidth buswidth;
        struct dma_slave_config rx = {}, tx = {};
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
 
        switch (spi_imx_bytes_per_word(spi_imx->bits_per_word)) {
        case 4:
@@ -1178,7 +1202,7 @@ static int spi_imx_dma_configure(struct spi_master *master)
        tx.dst_addr = spi_imx->base_phys + MXC_CSPITXDATA;
        tx.dst_addr_width = buswidth;
        tx.dst_maxburst = spi_imx->wml;
-       ret = dmaengine_slave_config(master->dma_tx, &tx);
+       ret = dmaengine_slave_config(controller->dma_tx, &tx);
        if (ret) {
                dev_err(spi_imx->dev, "TX dma configuration failed with %d\n", ret);
                return ret;
@@ -1188,7 +1212,7 @@ static int spi_imx_dma_configure(struct spi_master *master)
        rx.src_addr = spi_imx->base_phys + MXC_CSPIRXDATA;
        rx.src_addr_width = buswidth;
        rx.src_maxburst = spi_imx->wml;
-       ret = dmaengine_slave_config(master->dma_rx, &rx);
+       ret = dmaengine_slave_config(controller->dma_rx, &rx);
        if (ret) {
                dev_err(spi_imx->dev, "RX dma configuration failed with %d\n", ret);
                return ret;
@@ -1200,7 +1224,7 @@ static int spi_imx_dma_configure(struct spi_master *master)
 static int spi_imx_setupxfer(struct spi_device *spi,
                                 struct spi_transfer *t)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
 
        if (!t)
                return 0;
@@ -1246,11 +1270,14 @@ static int spi_imx_setupxfer(struct spi_device *spi,
                spi_imx->dynamic_burst = 0;
        }
 
-       if (spi_imx_can_dma(spi_imx->bitbang.master, spi, t))
+       if (spi_imx_can_dma(spi_imx->controller, spi, t))
                spi_imx->usedma = true;
        else
                spi_imx->usedma = false;
 
+       spi_imx->rx_only = ((t->tx_buf == NULL)
+                       || (t->tx_buf == spi->controller->dummy_tx));
+
        if (is_imx53_ecspi(spi_imx) && spi_imx->slave_mode) {
                spi_imx->rx = mx53_ecspi_rx_slave;
                spi_imx->tx = mx53_ecspi_tx_slave;
@@ -1264,50 +1291,50 @@ static int spi_imx_setupxfer(struct spi_device *spi,
 
 static void spi_imx_sdma_exit(struct spi_imx_data *spi_imx)
 {
-       struct spi_master *master = spi_imx->bitbang.master;
+       struct spi_controller *controller = spi_imx->controller;
 
-       if (master->dma_rx) {
-               dma_release_channel(master->dma_rx);
-               master->dma_rx = NULL;
+       if (controller->dma_rx) {
+               dma_release_channel(controller->dma_rx);
+               controller->dma_rx = NULL;
        }
 
-       if (master->dma_tx) {
-               dma_release_channel(master->dma_tx);
-               master->dma_tx = NULL;
+       if (controller->dma_tx) {
+               dma_release_channel(controller->dma_tx);
+               controller->dma_tx = NULL;
        }
 }
 
 static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx,
-                            struct spi_master *master)
+                            struct spi_controller *controller)
 {
        int ret;
 
        spi_imx->wml = spi_imx->devtype_data->fifo_size / 2;
 
        /* Prepare for TX DMA: */
-       master->dma_tx = dma_request_chan(dev, "tx");
-       if (IS_ERR(master->dma_tx)) {
-               ret = PTR_ERR(master->dma_tx);
+       controller->dma_tx = dma_request_chan(dev, "tx");
+       if (IS_ERR(controller->dma_tx)) {
+               ret = PTR_ERR(controller->dma_tx);
                dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret);
-               master->dma_tx = NULL;
+               controller->dma_tx = NULL;
                goto err;
        }
 
        /* Prepare for RX : */
-       master->dma_rx = dma_request_chan(dev, "rx");
-       if (IS_ERR(master->dma_rx)) {
-               ret = PTR_ERR(master->dma_rx);
+       controller->dma_rx = dma_request_chan(dev, "rx");
+       if (IS_ERR(controller->dma_rx)) {
+               ret = PTR_ERR(controller->dma_rx);
                dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret);
-               master->dma_rx = NULL;
+               controller->dma_rx = NULL;
                goto err;
        }
 
        init_completion(&spi_imx->dma_rx_completion);
        init_completion(&spi_imx->dma_tx_completion);
-       master->can_dma = spi_imx_can_dma;
-       master->max_dma_len = MAX_SDMA_BD_BYTES;
-       spi_imx->bitbang.master->flags = SPI_MASTER_MUST_RX |
-                                        SPI_MASTER_MUST_TX;
+       controller->can_dma = spi_imx_can_dma;
+       controller->max_dma_len = MAX_SDMA_BD_BYTES;
+       spi_imx->controller->flags = SPI_CONTROLLER_MUST_RX |
+                                        SPI_CONTROLLER_MUST_TX;
 
        return 0;
 err:
@@ -1349,7 +1376,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
        struct dma_async_tx_descriptor *desc_tx, *desc_rx;
        unsigned long transfer_timeout;
        unsigned long timeout;
-       struct spi_master *master = spi_imx->bitbang.master;
+       struct spi_controller *controller = spi_imx->controller;
        struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
        struct scatterlist *last_sg = sg_last(rx->sgl, rx->nents);
        unsigned int bytes_per_word, i;
@@ -1367,7 +1394,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 
        spi_imx->wml =  i;
 
-       ret = spi_imx_dma_configure(master);
+       ret = spi_imx_dma_configure(controller);
        if (ret)
                goto dma_failure_no_start;
 
@@ -1382,7 +1409,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
         * The TX DMA setup starts the transfer, so make sure RX is configured
         * before TX.
         */
-       desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
+       desc_rx = dmaengine_prep_slave_sg(controller->dma_rx,
                                rx->sgl, rx->nents, DMA_DEV_TO_MEM,
                                DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
        if (!desc_rx) {
@@ -1394,14 +1421,14 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
        desc_rx->callback_param = (void *)spi_imx;
        dmaengine_submit(desc_rx);
        reinit_completion(&spi_imx->dma_rx_completion);
-       dma_async_issue_pending(master->dma_rx);
+       dma_async_issue_pending(controller->dma_rx);
 
-       desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
+       desc_tx = dmaengine_prep_slave_sg(controller->dma_tx,
                                tx->sgl, tx->nents, DMA_MEM_TO_DEV,
                                DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
        if (!desc_tx) {
-               dmaengine_terminate_all(master->dma_tx);
-               dmaengine_terminate_all(master->dma_rx);
+               dmaengine_terminate_all(controller->dma_tx);
+               dmaengine_terminate_all(controller->dma_rx);
                return -EINVAL;
        }
 
@@ -1409,7 +1436,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
        desc_tx->callback_param = (void *)spi_imx;
        dmaengine_submit(desc_tx);
        reinit_completion(&spi_imx->dma_tx_completion);
-       dma_async_issue_pending(master->dma_tx);
+       dma_async_issue_pending(controller->dma_tx);
 
        transfer_timeout = spi_imx_calculate_timeout(spi_imx, transfer->len);
 
@@ -1418,21 +1445,21 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
                                                transfer_timeout);
        if (!timeout) {
                dev_err(spi_imx->dev, "I/O Error in DMA TX\n");
-               dmaengine_terminate_all(master->dma_tx);
-               dmaengine_terminate_all(master->dma_rx);
+               dmaengine_terminate_all(controller->dma_tx);
+               dmaengine_terminate_all(controller->dma_rx);
                return -ETIMEDOUT;
        }
 
        timeout = wait_for_completion_timeout(&spi_imx->dma_rx_completion,
                                              transfer_timeout);
        if (!timeout) {
-               dev_err(&master->dev, "I/O Error in DMA RX\n");
+               dev_err(&controller->dev, "I/O Error in DMA RX\n");
                spi_imx->devtype_data->reset(spi_imx);
-               dmaengine_terminate_all(master->dma_rx);
+               dmaengine_terminate_all(controller->dma_rx);
                return -ETIMEDOUT;
        }
 
-       return transfer->len;
+       return 0;
 /* fallback to pio */
 dma_failure_no_start:
        transfer->error |= SPI_TRANS_FAIL_NO_START;
@@ -1442,7 +1469,7 @@ dma_failure_no_start:
 static int spi_imx_pio_transfer(struct spi_device *spi,
                                struct spi_transfer *transfer)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
        unsigned long transfer_timeout;
        unsigned long timeout;
 
@@ -1468,14 +1495,62 @@ static int spi_imx_pio_transfer(struct spi_device *spi,
                return -ETIMEDOUT;
        }
 
-       return transfer->len;
+       return 0;
+}
+
+static int spi_imx_poll_transfer(struct spi_device *spi,
+                                struct spi_transfer *transfer)
+{
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
+       unsigned long timeout;
+
+       spi_imx->tx_buf = transfer->tx_buf;
+       spi_imx->rx_buf = transfer->rx_buf;
+       spi_imx->count = transfer->len;
+       spi_imx->txfifo = 0;
+       spi_imx->remainder = 0;
+
+       /* fill in the fifo before timeout calculations if we are
+        * interrupted here, then the data is getting transferred by
+        * the HW while we are interrupted
+        */
+       spi_imx_push(spi_imx);
+
+       timeout = spi_imx_calculate_timeout(spi_imx, transfer->len) + jiffies;
+       while (spi_imx->txfifo) {
+               /* RX */
+               while (spi_imx->txfifo &&
+                      spi_imx->devtype_data->rx_available(spi_imx)) {
+                       spi_imx->rx(spi_imx);
+                       spi_imx->txfifo--;
+               }
+
+               /* TX */
+               if (spi_imx->count) {
+                       spi_imx_push(spi_imx);
+                       continue;
+               }
+
+               if (spi_imx->txfifo &&
+                   time_after(jiffies, timeout)) {
+
+                       dev_err_ratelimited(&spi->dev,
+                                           "timeout period reached: jiffies: %lu- falling back to interrupt mode\n",
+                                           jiffies - timeout);
+
+                       /* fall back to interrupt mode */
+                       return spi_imx_pio_transfer(spi, transfer);
+               }
+       }
+
+       return 0;
 }
 
 static int spi_imx_pio_transfer_slave(struct spi_device *spi,
                                      struct spi_transfer *transfer)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
-       int ret = transfer->len;
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
+       int ret = 0;
 
        if (is_imx53_ecspi(spi_imx) &&
            transfer->len > MX53_MAX_TRANSFER_BYTES) {
@@ -1515,11 +1590,14 @@ static int spi_imx_pio_transfer_slave(struct spi_device *spi,
        return ret;
 }
 
-static int spi_imx_transfer(struct spi_device *spi,
+static int spi_imx_transfer_one(struct spi_controller *controller,
+                               struct spi_device *spi,
                                struct spi_transfer *transfer)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
+       unsigned long hz_per_byte, byte_limit;
 
+       spi_imx_setupxfer(spi, transfer);
        transfer->effective_speed_hz = spi_imx->spi_bus_clk;
 
        /* flush rxfifo before transfer */
@@ -1529,6 +1607,17 @@ static int spi_imx_transfer(struct spi_device *spi,
        if (spi_imx->slave_mode)
                return spi_imx_pio_transfer_slave(spi, transfer);
 
+       /*
+        * Calculate the estimated time in us the transfer runs. Find
+        * the number of Hz per byte per polling limit.
+        */
+       hz_per_byte = polling_limit_us ? ((8 + 4) * USEC_PER_SEC) / polling_limit_us : 0;
+       byte_limit = hz_per_byte ? transfer->effective_speed_hz / hz_per_byte : 1;
+
+       /* run in polling mode for short transfers */
+       if (transfer->len < byte_limit)
+               return spi_imx_poll_transfer(spi, transfer);
+
        if (spi_imx->usedma)
                return spi_imx_dma_transfer(spi_imx, transfer);
 
@@ -1548,14 +1637,13 @@ static void spi_imx_cleanup(struct spi_device *spi)
 }
 
 static int
-spi_imx_prepare_message(struct spi_master *master, struct spi_message *msg)
+spi_imx_prepare_message(struct spi_controller *controller, struct spi_message *msg)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
        int ret;
 
-       ret = pm_runtime_get_sync(spi_imx->dev);
+       ret = pm_runtime_resume_and_get(spi_imx->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(spi_imx->dev);
                dev_err(spi_imx->dev, "failed to enable clock\n");
                return ret;
        }
@@ -1570,18 +1658,18 @@ spi_imx_prepare_message(struct spi_master *master, struct spi_message *msg)
 }
 
 static int
-spi_imx_unprepare_message(struct spi_master *master, struct spi_message *msg)
+spi_imx_unprepare_message(struct spi_controller *controller, struct spi_message *msg)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
 
        pm_runtime_mark_last_busy(spi_imx->dev);
        pm_runtime_put_autosuspend(spi_imx->dev);
        return 0;
 }
 
-static int spi_imx_slave_abort(struct spi_master *master)
+static int spi_imx_slave_abort(struct spi_controller *controller)
 {
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
 
        spi_imx->slave_aborted = true;
        complete(&spi_imx->xfer_done);
@@ -1592,7 +1680,7 @@ static int spi_imx_slave_abort(struct spi_master *master)
 static int spi_imx_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
-       struct spi_master *master;
+       struct spi_controller *controller;
        struct spi_imx_data *spi_imx;
        struct resource *res;
        int ret, irq, spi_drctl;
@@ -1604,12 +1692,12 @@ static int spi_imx_probe(struct platform_device *pdev)
        slave_mode = devtype_data->has_slavemode &&
                        of_property_read_bool(np, "spi-slave");
        if (slave_mode)
-               master = spi_alloc_slave(&pdev->dev,
-                                        sizeof(struct spi_imx_data));
+               controller = spi_alloc_slave(&pdev->dev,
+                                            sizeof(struct spi_imx_data));
        else
-               master = spi_alloc_master(&pdev->dev,
-                                         sizeof(struct spi_imx_data));
-       if (!master)
+               controller = spi_alloc_master(&pdev->dev,
+                                             sizeof(struct spi_imx_data));
+       if (!controller)
                return -ENOMEM;
 
        ret = of_property_read_u32(np, "fsl,spi-rdy-drctl", &spi_drctl);
@@ -1618,14 +1706,14 @@ static int spi_imx_probe(struct platform_device *pdev)
                spi_drctl = 0;
        }
 
-       platform_set_drvdata(pdev, master);
+       platform_set_drvdata(pdev, controller);
 
-       master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
-       master->bus_num = np ? -1 : pdev->id;
-       master->use_gpio_descriptors = true;
+       controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
+       controller->bus_num = np ? -1 : pdev->id;
+       controller->use_gpio_descriptors = true;
 
-       spi_imx = spi_master_get_devdata(master);
-       spi_imx->bitbang.master = master;
+       spi_imx = spi_controller_get_devdata(controller);
+       spi_imx->controller = controller;
        spi_imx->dev = &pdev->dev;
        spi_imx->slave_mode = slave_mode;
 
@@ -1638,22 +1726,24 @@ static int spi_imx_probe(struct platform_device *pdev)
         * board files have <= 3 chip selects.
         */
        if (!device_property_read_u32(&pdev->dev, "num-cs", &val))
-               master->num_chipselect = val;
+               controller->num_chipselect = val;
        else
-               master->num_chipselect = 3;
-
-       spi_imx->bitbang.setup_transfer = spi_imx_setupxfer;
-       spi_imx->bitbang.txrx_bufs = spi_imx_transfer;
-       spi_imx->bitbang.master->setup = spi_imx_setup;
-       spi_imx->bitbang.master->cleanup = spi_imx_cleanup;
-       spi_imx->bitbang.master->prepare_message = spi_imx_prepare_message;
-       spi_imx->bitbang.master->unprepare_message = spi_imx_unprepare_message;
-       spi_imx->bitbang.master->slave_abort = spi_imx_slave_abort;
-       spi_imx->bitbang.master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
-                                            | SPI_NO_CS;
+               controller->num_chipselect = 3;
+
+       spi_imx->controller->transfer_one = spi_imx_transfer_one;
+       spi_imx->controller->setup = spi_imx_setup;
+       spi_imx->controller->cleanup = spi_imx_cleanup;
+       spi_imx->controller->prepare_message = spi_imx_prepare_message;
+       spi_imx->controller->unprepare_message = spi_imx_unprepare_message;
+       spi_imx->controller->slave_abort = spi_imx_slave_abort;
+       spi_imx->controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_NO_CS;
+
        if (is_imx35_cspi(spi_imx) || is_imx51_ecspi(spi_imx) ||
            is_imx53_ecspi(spi_imx))
-               spi_imx->bitbang.master->mode_bits |= SPI_LOOP | SPI_READY;
+               spi_imx->controller->mode_bits |= SPI_LOOP | SPI_READY;
+
+       if (is_imx51_ecspi(spi_imx) || is_imx53_ecspi(spi_imx))
+               spi_imx->controller->mode_bits |= SPI_RX_CPHA_FLIP;
 
        if (is_imx51_ecspi(spi_imx) &&
            device_property_read_u32(&pdev->dev, "cs-gpios", NULL))
@@ -1662,7 +1752,7 @@ static int spi_imx_probe(struct platform_device *pdev)
                 * setting the burst length to the word size. This is
                 * considerably faster than manually controlling the CS.
                 */
-               spi_imx->bitbang.master->mode_bits |= SPI_CS_WORD;
+               spi_imx->controller->mode_bits |= SPI_CS_WORD;
 
        spi_imx->spi_drctl = spi_drctl;
 
@@ -1672,38 +1762,38 @@ static int spi_imx_probe(struct platform_device *pdev)
        spi_imx->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(spi_imx->base)) {
                ret = PTR_ERR(spi_imx->base);
-               goto out_master_put;
+               goto out_controller_put;
        }
        spi_imx->base_phys = res->start;
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0) {
                ret = irq;
-               goto out_master_put;
+               goto out_controller_put;
        }
 
        ret = devm_request_irq(&pdev->dev, irq, spi_imx_isr, 0,
                               dev_name(&pdev->dev), spi_imx);
        if (ret) {
                dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
-               goto out_master_put;
+               goto out_controller_put;
        }
 
        spi_imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
        if (IS_ERR(spi_imx->clk_ipg)) {
                ret = PTR_ERR(spi_imx->clk_ipg);
-               goto out_master_put;
+               goto out_controller_put;
        }
 
        spi_imx->clk_per = devm_clk_get(&pdev->dev, "per");
        if (IS_ERR(spi_imx->clk_per)) {
                ret = PTR_ERR(spi_imx->clk_per);
-               goto out_master_put;
+               goto out_controller_put;
        }
 
        ret = clk_prepare_enable(spi_imx->clk_per);
        if (ret)
-               goto out_master_put;
+               goto out_controller_put;
 
        ret = clk_prepare_enable(spi_imx->clk_ipg);
        if (ret)
@@ -1721,7 +1811,7 @@ static int spi_imx_probe(struct platform_device *pdev)
         * if validated on other chips.
         */
        if (spi_imx->devtype_data->has_dmamode) {
-               ret = spi_imx_sdma_init(&pdev->dev, spi_imx, master);
+               ret = spi_imx_sdma_init(&pdev->dev, spi_imx, controller);
                if (ret == -EPROBE_DEFER)
                        goto out_runtime_pm_put;
 
@@ -1734,11 +1824,11 @@ static int spi_imx_probe(struct platform_device *pdev)
 
        spi_imx->devtype_data->intctrl(spi_imx, 0);
 
-       master->dev.of_node = pdev->dev.of_node;
-       ret = spi_bitbang_start(&spi_imx->bitbang);
+       controller->dev.of_node = pdev->dev.of_node;
+       ret = spi_register_controller(controller);
        if (ret) {
-               dev_err_probe(&pdev->dev, ret, "bitbang start failed\n");
-               goto out_bitbang_start;
+               dev_err_probe(&pdev->dev, ret, "register controller failed\n");
+               goto out_register_controller;
        }
 
        pm_runtime_mark_last_busy(spi_imx->dev);
@@ -1746,7 +1836,7 @@ static int spi_imx_probe(struct platform_device *pdev)
 
        return ret;
 
-out_bitbang_start:
+out_register_controller:
        if (spi_imx->devtype_data->has_dmamode)
                spi_imx_sdma_exit(spi_imx);
 out_runtime_pm_put:
@@ -1757,23 +1847,22 @@ out_runtime_pm_put:
        clk_disable_unprepare(spi_imx->clk_ipg);
 out_put_per:
        clk_disable_unprepare(spi_imx->clk_per);
-out_master_put:
-       spi_master_put(master);
+out_controller_put:
+       spi_controller_put(controller);
 
        return ret;
 }
 
 static int spi_imx_remove(struct platform_device *pdev)
 {
-       struct spi_master *master = platform_get_drvdata(pdev);
-       struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+       struct spi_controller *controller = platform_get_drvdata(pdev);
+       struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
        int ret;
 
-       spi_bitbang_stop(&spi_imx->bitbang);
+       spi_unregister_controller(controller);
 
-       ret = pm_runtime_get_sync(spi_imx->dev);
+       ret = pm_runtime_resume_and_get(spi_imx->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(spi_imx->dev);
                dev_err(spi_imx->dev, "failed to enable clock\n");
                return ret;
        }
@@ -1785,18 +1874,17 @@ static int spi_imx_remove(struct platform_device *pdev)
        pm_runtime_disable(spi_imx->dev);
 
        spi_imx_sdma_exit(spi_imx);
-       spi_master_put(master);
 
        return 0;
 }
 
 static int __maybe_unused spi_imx_runtime_resume(struct device *dev)
 {
-       struct spi_master *master = dev_get_drvdata(dev);
+       struct spi_controller *controller = dev_get_drvdata(dev);
        struct spi_imx_data *spi_imx;
        int ret;
 
-       spi_imx = spi_master_get_devdata(master);
+       spi_imx = spi_controller_get_devdata(controller);
 
        ret = clk_prepare_enable(spi_imx->clk_per);
        if (ret)
@@ -1813,10 +1901,10 @@ static int __maybe_unused spi_imx_runtime_resume(struct device *dev)
 
 static int __maybe_unused spi_imx_runtime_suspend(struct device *dev)
 {
-       struct spi_master *master = dev_get_drvdata(dev);
+       struct spi_controller *controller = dev_get_drvdata(dev);
        struct spi_imx_data *spi_imx;
 
-       spi_imx = spi_master_get_devdata(master);
+       spi_imx = spi_controller_get_devdata(controller);
 
        clk_disable_unprepare(spi_imx->clk_per);
        clk_disable_unprepare(spi_imx->clk_ipg);
index 03077a7e11c85979a5193da102c470e07840fc67..713a238bee63a5b2c007874acf77748a64b594ca 100644 (file)
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * SPI bus driver for the Ingenic JZ47xx SoCs
+ * SPI bus driver for the Ingenic SoCs
  * Copyright (c) 2017-2021 Artur Rojek <contact@artur-rojek.eu>
  * Copyright (c) 2017-2021 Paul Cercueil <paul@crapouillou.net>
+ * Copyright (c) 2022 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
  */
 
 #include <linux/clk.h>
@@ -52,6 +53,9 @@ struct jz_soc_info {
        u32 bits_per_word_mask;
        struct reg_field flen_field;
        bool has_trendian;
+
+       unsigned int max_speed_hz;
+       unsigned int max_native_cs;
 };
 
 struct ingenic_spi {
@@ -380,7 +384,7 @@ static int spi_ingenic_probe(struct platform_device *pdev)
        struct spi_controller *ctlr;
        struct ingenic_spi *priv;
        void __iomem *base;
-       int ret;
+       int num_cs, ret;
 
        pdata = of_device_get_match_data(dev);
        if (!pdata) {
@@ -416,6 +420,9 @@ static int spi_ingenic_probe(struct platform_device *pdev)
        if (IS_ERR(priv->flen_field))
                return PTR_ERR(priv->flen_field);
 
+       if (device_property_read_u32(dev, "num-cs", &num_cs))
+               num_cs = pdata->max_native_cs;
+
        platform_set_drvdata(pdev, ctlr);
 
        ctlr->prepare_transfer_hardware = spi_ingenic_prepare_hardware;
@@ -428,8 +435,10 @@ static int spi_ingenic_probe(struct platform_device *pdev)
        ctlr->max_dma_len = SPI_INGENIC_FIFO_SIZE;
        ctlr->bits_per_word_mask = pdata->bits_per_word_mask;
        ctlr->min_speed_hz = 7200;
-       ctlr->max_speed_hz = 54000000;
-       ctlr->num_chipselect = 2;
+       ctlr->max_speed_hz = pdata->max_speed_hz;
+       ctlr->use_gpio_descriptors = true;
+       ctlr->max_native_cs = pdata->max_native_cs;
+       ctlr->num_chipselect = num_cs;
        ctlr->dev.of_node = pdev->dev.of_node;
 
        if (spi_ingenic_request_dma(ctlr, dev))
@@ -452,17 +461,44 @@ static const struct jz_soc_info jz4750_soc_info = {
        .bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 17),
        .flen_field = REG_FIELD(REG_SSICR1, 4, 7),
        .has_trendian = false,
+
+       .max_speed_hz = 54000000,
+       .max_native_cs = 2,
 };
 
 static const struct jz_soc_info jz4780_soc_info = {
        .bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 32),
        .flen_field = REG_FIELD(REG_SSICR1, 3, 7),
        .has_trendian = true,
+
+       .max_speed_hz = 54000000,
+       .max_native_cs = 2,
+};
+
+static const struct jz_soc_info x1000_soc_info = {
+       .bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 32),
+       .flen_field = REG_FIELD(REG_SSICR1, 3, 7),
+       .has_trendian = true,
+
+       .max_speed_hz = 50000000,
+       .max_native_cs = 2,
+};
+
+static const struct jz_soc_info x2000_soc_info = {
+       .bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 32),
+       .flen_field = REG_FIELD(REG_SSICR1, 3, 7),
+       .has_trendian = true,
+
+       .max_speed_hz = 50000000,
+       .max_native_cs = 1,
 };
 
 static const struct of_device_id spi_ingenic_of_match[] = {
        { .compatible = "ingenic,jz4750-spi", .data = &jz4750_soc_info },
+       { .compatible = "ingenic,jz4775-spi", .data = &jz4780_soc_info },
        { .compatible = "ingenic,jz4780-spi", .data = &jz4780_soc_info },
+       { .compatible = "ingenic,x1000-spi", .data = &x1000_soc_info },
+       { .compatible = "ingenic,x2000-spi", .data = &x2000_soc_info },
        {}
 };
 MODULE_DEVICE_TABLE(of, spi_ingenic_of_match);
@@ -476,7 +512,8 @@ static struct platform_driver spi_ingenic_driver = {
 };
 
 module_platform_driver(spi_ingenic_driver);
-MODULE_DESCRIPTION("SPI bus driver for the Ingenic JZ47xx SoCs");
+MODULE_DESCRIPTION("SPI bus driver for the Ingenic SoCs");
 MODULE_AUTHOR("Artur Rojek <contact@artur-rojek.eu>");
 MODULE_AUTHOR("Paul Cercueil <paul@crapouillou.net>");
+MODULE_AUTHOR("周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>");
 MODULE_LICENSE("GPL");
index e937cfe85559b192738b1522dc55ab5d388ad112..50f42983b9502c838a57c5b345dbfc26deabdefc 100644 (file)
@@ -779,10 +779,59 @@ static const char *intel_spi_get_name(struct spi_mem *mem)
        return dev_name(ispi->dev);
 }
 
+static int intel_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+       struct intel_spi *ispi = spi_master_get_devdata(desc->mem->spi->master);
+       const struct intel_spi_mem_op *iop;
+
+       iop = intel_spi_match_mem_op(ispi, &desc->info.op_tmpl);
+       if (!iop)
+               return -EOPNOTSUPP;
+
+       desc->priv = (void *)iop;
+       return 0;
+}
+
+static ssize_t intel_spi_dirmap_read(struct spi_mem_dirmap_desc *desc, u64 offs,
+                                    size_t len, void *buf)
+{
+       struct intel_spi *ispi = spi_master_get_devdata(desc->mem->spi->master);
+       const struct intel_spi_mem_op *iop = desc->priv;
+       struct spi_mem_op op = desc->info.op_tmpl;
+       int ret;
+
+       /* Fill in the gaps */
+       op.addr.val = offs;
+       op.data.nbytes = len;
+       op.data.buf.in = buf;
+
+       ret = iop->exec_op(ispi, iop, &op);
+       return ret ? ret : len;
+}
+
+static ssize_t intel_spi_dirmap_write(struct spi_mem_dirmap_desc *desc, u64 offs,
+                                     size_t len, const void *buf)
+{
+       struct intel_spi *ispi = spi_master_get_devdata(desc->mem->spi->master);
+       const struct intel_spi_mem_op *iop = desc->priv;
+       struct spi_mem_op op = desc->info.op_tmpl;
+       int ret;
+
+       op.addr.val = offs;
+       op.data.nbytes = len;
+       op.data.buf.out = buf;
+
+       ret = iop->exec_op(ispi, iop, &op);
+       return ret ? ret : len;
+}
+
 static const struct spi_controller_mem_ops intel_spi_mem_ops = {
        .supports_op = intel_spi_supports_mem_op,
        .exec_op = intel_spi_exec_mem_op,
        .get_name = intel_spi_get_name,
+       .dirmap_create = intel_spi_dirmap_create,
+       .dirmap_read = intel_spi_dirmap_read,
+       .dirmap_write = intel_spi_dirmap_write,
 };
 
 #define INTEL_SPI_OP_ADDR(__nbytes)                                    \
@@ -1205,7 +1254,7 @@ static int intel_spi_populate_chip(struct intel_spi *ispi)
  * intel_spi_probe() - Probe the Intel SPI flash controller
  * @dev: Pointer to the parent device
  * @mem: MMIO resource
- * @info: Platform spefific information
+ * @info: Platform specific information
  *
  * Probes Intel SPI flash controller and creates the flash chip device.
  * Returns %0 on success and negative errno in case of failure.
index 0e8dafc62d94ff886f92e52c61287c8f3041ff37..e8de4f5017cdc5cb2566a3479970f3d9e5f73c7b 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
+#include <linux/sched/task_stack.h>
 
 #include "internals.h"
 
@@ -211,6 +212,15 @@ static int spi_mem_check_op(const struct spi_mem_op *op)
            !spi_mem_buswidth_is_valid(op->data.buswidth))
                return -EINVAL;
 
+       /* Buffers must be DMA-able. */
+       if (WARN_ON_ONCE(op->data.dir == SPI_MEM_DATA_IN &&
+                        object_is_on_stack(op->data.buf.in)))
+               return -EINVAL;
+
+       if (WARN_ON_ONCE(op->data.dir == SPI_MEM_DATA_OUT &&
+                        object_is_on_stack(op->data.buf.out)))
+               return -EINVAL;
+
        return 0;
 }
 
@@ -262,9 +272,8 @@ static int spi_mem_access_start(struct spi_mem *mem)
        if (ctlr->auto_runtime_pm) {
                int ret;
 
-               ret = pm_runtime_get_sync(ctlr->dev.parent);
+               ret = pm_runtime_resume_and_get(ctlr->dev.parent);
                if (ret < 0) {
-                       pm_runtime_put_noidle(ctlr->dev.parent);
                        dev_err(&ctlr->dev, "Failed to power device: %d\n",
                                ret);
                        return ret;
index 21ef5d481faf4b5b9a3e8decf7de041e8da49cd7..7654736c2c0e4173e5f80f5e39ad03bf9402098c 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/spi/spi.h>
 #include <linux/fsl_devices.h>
 #include <linux/slab.h>
+#include <linux/of_irq.h>
 
 #include <asm/mpc52xx.h>
 #include <asm/mpc52xx_psc.h>
index 51041526546dd2f3c264bc8d08b0ed653a13c53f..3ebdce804b908c2d39710a46c913b6c15181aead 100644 (file)
@@ -19,6 +19,9 @@
 #include <linux/io.h>
 #include <linux/of_gpio.h>
 #include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
 #include <asm/time.h>
 #include <asm/mpc52xx.h>
 
index 1a0b3208dfcaf719a07d0f16e6faea24ae82f2e1..0a3b9f7eed30f1edf0dfc8868b453698908061c3 100644 (file)
 #include <linux/platform_data/spi-mt65xx.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 #include <linux/dma-mapping.h>
 
-#define SPI_CFG0_REG                      0x0000
-#define SPI_CFG1_REG                      0x0004
-#define SPI_TX_SRC_REG                    0x0008
-#define SPI_RX_DST_REG                    0x000c
-#define SPI_TX_DATA_REG                   0x0010
-#define SPI_RX_DATA_REG                   0x0014
-#define SPI_CMD_REG                       0x0018
-#define SPI_STATUS0_REG                   0x001c
-#define SPI_PAD_SEL_REG                   0x0024
-#define SPI_CFG2_REG                      0x0028
-#define SPI_TX_SRC_REG_64                 0x002c
-#define SPI_RX_DST_REG_64                 0x0030
-#define SPI_CFG3_IPM_REG                  0x0040
-
-#define SPI_CFG0_SCK_HIGH_OFFSET          0
-#define SPI_CFG0_SCK_LOW_OFFSET           8
-#define SPI_CFG0_CS_HOLD_OFFSET           16
-#define SPI_CFG0_CS_SETUP_OFFSET          24
-#define SPI_ADJUST_CFG0_CS_HOLD_OFFSET    0
-#define SPI_ADJUST_CFG0_CS_SETUP_OFFSET   16
-
-#define SPI_CFG1_CS_IDLE_OFFSET           0
-#define SPI_CFG1_PACKET_LOOP_OFFSET       8
-#define SPI_CFG1_PACKET_LENGTH_OFFSET     16
-#define SPI_CFG1_GET_TICK_DLY_OFFSET      29
-#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1   30
-
-#define SPI_CFG1_GET_TICK_DLY_MASK        0xe0000000
-#define SPI_CFG1_GET_TICK_DLY_MASK_V1     0xc0000000
-
-#define SPI_CFG1_CS_IDLE_MASK             0xff
-#define SPI_CFG1_PACKET_LOOP_MASK         0xff00
-#define SPI_CFG1_PACKET_LENGTH_MASK       0x3ff0000
-#define SPI_CFG1_IPM_PACKET_LENGTH_MASK   GENMASK(31, 16)
-#define SPI_CFG2_SCK_HIGH_OFFSET          0
-#define SPI_CFG2_SCK_LOW_OFFSET           16
-
-#define SPI_CMD_ACT                  BIT(0)
-#define SPI_CMD_RESUME               BIT(1)
-#define SPI_CMD_RST                  BIT(2)
-#define SPI_CMD_PAUSE_EN             BIT(4)
-#define SPI_CMD_DEASSERT             BIT(5)
-#define SPI_CMD_SAMPLE_SEL           BIT(6)
-#define SPI_CMD_CS_POL               BIT(7)
-#define SPI_CMD_CPHA                 BIT(8)
-#define SPI_CMD_CPOL                 BIT(9)
-#define SPI_CMD_RX_DMA               BIT(10)
-#define SPI_CMD_TX_DMA               BIT(11)
-#define SPI_CMD_TXMSBF               BIT(12)
-#define SPI_CMD_RXMSBF               BIT(13)
-#define SPI_CMD_RX_ENDIAN            BIT(14)
-#define SPI_CMD_TX_ENDIAN            BIT(15)
-#define SPI_CMD_FINISH_IE            BIT(16)
-#define SPI_CMD_PAUSE_IE             BIT(17)
-#define SPI_CMD_IPM_NONIDLE_MODE     BIT(19)
-#define SPI_CMD_IPM_SPIM_LOOP        BIT(21)
-#define SPI_CMD_IPM_GET_TICKDLY_OFFSET    22
+#define SPI_CFG0_REG                   0x0000
+#define SPI_CFG1_REG                   0x0004
+#define SPI_TX_SRC_REG                 0x0008
+#define SPI_RX_DST_REG                 0x000c
+#define SPI_TX_DATA_REG                        0x0010
+#define SPI_RX_DATA_REG                        0x0014
+#define SPI_CMD_REG                    0x0018
+#define SPI_STATUS0_REG                        0x001c
+#define SPI_PAD_SEL_REG                        0x0024
+#define SPI_CFG2_REG                   0x0028
+#define SPI_TX_SRC_REG_64              0x002c
+#define SPI_RX_DST_REG_64              0x0030
+#define SPI_CFG3_IPM_REG               0x0040
+
+#define SPI_CFG0_SCK_HIGH_OFFSET       0
+#define SPI_CFG0_SCK_LOW_OFFSET                8
+#define SPI_CFG0_CS_HOLD_OFFSET                16
+#define SPI_CFG0_CS_SETUP_OFFSET       24
+#define SPI_ADJUST_CFG0_CS_HOLD_OFFSET 0
+#define SPI_ADJUST_CFG0_CS_SETUP_OFFSET        16
+
+#define SPI_CFG1_CS_IDLE_OFFSET                0
+#define SPI_CFG1_PACKET_LOOP_OFFSET    8
+#define SPI_CFG1_PACKET_LENGTH_OFFSET  16
+#define SPI_CFG1_GET_TICK_DLY_OFFSET   29
+#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1        30
+
+#define SPI_CFG1_GET_TICK_DLY_MASK     0xe0000000
+#define SPI_CFG1_GET_TICK_DLY_MASK_V1  0xc0000000
+
+#define SPI_CFG1_CS_IDLE_MASK          0xff
+#define SPI_CFG1_PACKET_LOOP_MASK      0xff00
+#define SPI_CFG1_PACKET_LENGTH_MASK    0x3ff0000
+#define SPI_CFG1_IPM_PACKET_LENGTH_MASK        GENMASK(31, 16)
+#define SPI_CFG2_SCK_HIGH_OFFSET       0
+#define SPI_CFG2_SCK_LOW_OFFSET                16
+
+#define SPI_CMD_ACT                    BIT(0)
+#define SPI_CMD_RESUME                 BIT(1)
+#define SPI_CMD_RST                    BIT(2)
+#define SPI_CMD_PAUSE_EN               BIT(4)
+#define SPI_CMD_DEASSERT               BIT(5)
+#define SPI_CMD_SAMPLE_SEL             BIT(6)
+#define SPI_CMD_CS_POL                 BIT(7)
+#define SPI_CMD_CPHA                   BIT(8)
+#define SPI_CMD_CPOL                   BIT(9)
+#define SPI_CMD_RX_DMA                 BIT(10)
+#define SPI_CMD_TX_DMA                 BIT(11)
+#define SPI_CMD_TXMSBF                 BIT(12)
+#define SPI_CMD_RXMSBF                 BIT(13)
+#define SPI_CMD_RX_ENDIAN              BIT(14)
+#define SPI_CMD_TX_ENDIAN              BIT(15)
+#define SPI_CMD_FINISH_IE              BIT(16)
+#define SPI_CMD_PAUSE_IE               BIT(17)
+#define SPI_CMD_IPM_NONIDLE_MODE       BIT(19)
+#define SPI_CMD_IPM_SPIM_LOOP          BIT(21)
+#define SPI_CMD_IPM_GET_TICKDLY_OFFSET 22
 
 #define SPI_CMD_IPM_GET_TICKDLY_MASK   GENMASK(24, 22)
-#define SPI_CFG3_IPM_HALF_DUPLEX_DIR           BIT(2)
-#define SPI_CFG3_IPM_HALF_DUPLEX_EN            BIT(3)
-#define MT8173_SPI_MAX_PAD_SEL 3
 
-#define MTK_SPI_PAUSE_INT_STATUS 0x2
+#define PIN_MODE_CFG(x)        ((x) / 2)
 
-#define MTK_SPI_IDLE 0
-#define MTK_SPI_PAUSED 1
+#define SPI_CFG3_IPM_HALF_DUPLEX_DIR   BIT(2)
+#define SPI_CFG3_IPM_HALF_DUPLEX_EN    BIT(3)
+#define SPI_CFG3_IPM_XMODE_EN          BIT(4)
+#define SPI_CFG3_IPM_NODATA_FLAG       BIT(5)
+#define SPI_CFG3_IPM_CMD_BYTELEN_OFFSET        8
+#define SPI_CFG3_IPM_ADDR_BYTELEN_OFFSET 12
 
-#define MTK_SPI_MAX_FIFO_SIZE 32U
-#define MTK_SPI_PACKET_SIZE 1024
-#define MTK_SPI_IPM_PACKET_SIZE SZ_64K
-#define MTK_SPI_32BITS_MASK  (0xffffffff)
+#define SPI_CFG3_IPM_CMD_PIN_MODE_MASK GENMASK(1, 0)
+#define SPI_CFG3_IPM_CMD_BYTELEN_MASK  GENMASK(11, 8)
+#define SPI_CFG3_IPM_ADDR_BYTELEN_MASK GENMASK(15, 12)
 
-#define DMA_ADDR_EXT_BITS (36)
-#define DMA_ADDR_DEF_BITS (32)
+#define MT8173_SPI_MAX_PAD_SEL         3
 
+#define MTK_SPI_PAUSE_INT_STATUS       0x2
+
+#define MTK_SPI_MAX_FIFO_SIZE          32U
+#define MTK_SPI_PACKET_SIZE            1024
+#define MTK_SPI_IPM_PACKET_SIZE                SZ_64K
+#define MTK_SPI_IPM_PACKET_LOOP                SZ_256
+
+#define MTK_SPI_IDLE                   0
+#define MTK_SPI_PAUSED                 1
+
+#define MTK_SPI_32BITS_MASK            (0xffffffff)
+
+#define DMA_ADDR_EXT_BITS              (36)
+#define DMA_ADDR_DEF_BITS              (32)
+
+/**
+ * struct mtk_spi_compatible - device data structure
+ * @need_pad_sel:      Enable pad (pins) selection in SPI controller
+ * @must_tx:           Must explicitly send dummy TX bytes to do RX only transfer
+ * @enhance_timing:    Enable adjusting cfg register to enhance time accuracy
+ * @dma_ext:           DMA address extension supported
+ * @no_need_unprepare: Don't unprepare the SPI clk during runtime
+ * @ipm_design:                Adjust/extend registers to support IPM design IP features
+ */
 struct mtk_spi_compatible {
        bool need_pad_sel;
-       /* Must explicitly send dummy Tx bytes to do Rx only transfer */
        bool must_tx;
-       /* some IC design adjust cfg register to enhance time accuracy */
        bool enhance_timing;
-       /* some IC support DMA addr extension */
        bool dma_ext;
-       /* some IC no need unprepare SPI clk */
        bool no_need_unprepare;
-       /* IPM design adjust and extend register to support more features */
        bool ipm_design;
-
 };
 
+/**
+ * struct mtk_spi - SPI driver instance
+ * @base:              Start address of the SPI controller registers
+ * @state:             SPI controller state
+ * @pad_num:           Number of pad_sel entries
+ * @pad_sel:           Groups of pins to select
+ * @parent_clk:                Parent of sel_clk
+ * @sel_clk:           SPI master mux clock
+ * @spi_clk:           Peripheral clock
+ * @spi_hclk:          AHB bus clock
+ * @cur_transfer:      Currently processed SPI transfer
+ * @xfer_len:          Number of bytes to transfer
+ * @num_xfered:                Number of transferred bytes
+ * @tx_sgl:            TX transfer scatterlist
+ * @rx_sgl:            RX transfer scatterlist
+ * @tx_sgl_len:                Size of TX DMA transfer
+ * @rx_sgl_len:                Size of RX DMA transfer
+ * @dev_comp:          Device data structure
+ * @spi_clk_hz:                Current SPI clock in Hz
+ * @spimem_done:       SPI-MEM operation completion
+ * @use_spimem:                Enables SPI-MEM
+ * @dev:               Device pointer
+ * @tx_dma:            DMA start for SPI-MEM TX
+ * @rx_dma:            DMA start for SPI-MEM RX
+ */
 struct mtk_spi {
        void __iomem *base;
        u32 state;
        int pad_num;
        u32 *pad_sel;
-       struct clk *parent_clk, *sel_clk, *spi_clk;
+       struct clk *parent_clk, *sel_clk, *spi_clk, *spi_hclk;
        struct spi_transfer *cur_transfer;
        u32 xfer_len;
        u32 num_xfered;
@@ -123,6 +166,11 @@ struct mtk_spi {
        u32 tx_sgl_len, rx_sgl_len;
        const struct mtk_spi_compatible *dev_comp;
        u32 spi_clk_hz;
+       struct completion spimem_done;
+       bool use_spimem;
+       struct device *dev;
+       dma_addr_t tx_dma;
+       dma_addr_t rx_dma;
 };
 
 static const struct mtk_spi_compatible mtk_common_compat;
@@ -704,6 +752,12 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id)
        else
                mdata->state = MTK_SPI_IDLE;
 
+       /* SPI-MEM ops */
+       if (mdata->use_spimem) {
+               complete(&mdata->spimem_done);
+               return IRQ_HANDLED;
+       }
+
        if (!master->can_dma(master, NULL, trans)) {
                if (trans->rx_buf) {
                        cnt = mdata->xfer_len / 4;
@@ -787,21 +841,287 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static int mtk_spi_mem_adjust_op_size(struct spi_mem *mem,
+                                     struct spi_mem_op *op)
+{
+       int opcode_len;
+
+       if (op->data.dir != SPI_MEM_NO_DATA) {
+               opcode_len = 1 + op->addr.nbytes + op->dummy.nbytes;
+               if (opcode_len + op->data.nbytes > MTK_SPI_IPM_PACKET_SIZE) {
+                       op->data.nbytes = MTK_SPI_IPM_PACKET_SIZE - opcode_len;
+                       /* force data buffer dma-aligned. */
+                       op->data.nbytes -= op->data.nbytes % 4;
+               }
+       }
+
+       return 0;
+}
+
+static bool mtk_spi_mem_supports_op(struct spi_mem *mem,
+                                   const struct spi_mem_op *op)
+{
+       if (!spi_mem_default_supports_op(mem, op))
+               return false;
+
+       if (op->addr.nbytes && op->dummy.nbytes &&
+           op->addr.buswidth != op->dummy.buswidth)
+               return false;
+
+       if (op->addr.nbytes + op->dummy.nbytes > 16)
+               return false;
+
+       if (op->data.nbytes > MTK_SPI_IPM_PACKET_SIZE) {
+               if (op->data.nbytes / MTK_SPI_IPM_PACKET_SIZE >
+                   MTK_SPI_IPM_PACKET_LOOP ||
+                   op->data.nbytes % MTK_SPI_IPM_PACKET_SIZE != 0)
+                       return false;
+       }
+
+       return true;
+}
+
+static void mtk_spi_mem_setup_dma_xfer(struct spi_master *master,
+                                      const struct spi_mem_op *op)
+{
+       struct mtk_spi *mdata = spi_master_get_devdata(master);
+
+       writel((u32)(mdata->tx_dma & MTK_SPI_32BITS_MASK),
+              mdata->base + SPI_TX_SRC_REG);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       if (mdata->dev_comp->dma_ext)
+               writel((u32)(mdata->tx_dma >> 32),
+                      mdata->base + SPI_TX_SRC_REG_64);
+#endif
+
+       if (op->data.dir == SPI_MEM_DATA_IN) {
+               writel((u32)(mdata->rx_dma & MTK_SPI_32BITS_MASK),
+                      mdata->base + SPI_RX_DST_REG);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+               if (mdata->dev_comp->dma_ext)
+                       writel((u32)(mdata->rx_dma >> 32),
+                              mdata->base + SPI_RX_DST_REG_64);
+#endif
+       }
+}
+
+static int mtk_spi_transfer_wait(struct spi_mem *mem,
+                                const struct spi_mem_op *op)
+{
+       struct mtk_spi *mdata = spi_master_get_devdata(mem->spi->master);
+       /*
+        * For each byte we wait for 8 cycles of the SPI clock.
+        * Since speed is defined in Hz and we want milliseconds,
+        * so it should be 8 * 1000.
+        */
+       u64 ms = 8000LL;
+
+       if (op->data.dir == SPI_MEM_NO_DATA)
+               ms *= 32; /* prevent we may get 0 for short transfers. */
+       else
+               ms *= op->data.nbytes;
+       ms = div_u64(ms, mem->spi->max_speed_hz);
+       ms += ms + 1000; /* 1s tolerance */
+
+       if (ms > UINT_MAX)
+               ms = UINT_MAX;
+
+       if (!wait_for_completion_timeout(&mdata->spimem_done,
+                                        msecs_to_jiffies(ms))) {
+               dev_err(mdata->dev, "spi-mem transfer timeout\n");
+               return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static int mtk_spi_mem_exec_op(struct spi_mem *mem,
+                              const struct spi_mem_op *op)
+{
+       struct mtk_spi *mdata = spi_master_get_devdata(mem->spi->master);
+       u32 reg_val, nio, tx_size;
+       char *tx_tmp_buf, *rx_tmp_buf;
+       int ret = 0;
+
+       mdata->use_spimem = true;
+       reinit_completion(&mdata->spimem_done);
+
+       mtk_spi_reset(mdata);
+       mtk_spi_hw_init(mem->spi->master, mem->spi);
+       mtk_spi_prepare_transfer(mem->spi->master, mem->spi->max_speed_hz);
+
+       reg_val = readl(mdata->base + SPI_CFG3_IPM_REG);
+       /* opcode byte len */
+       reg_val &= ~SPI_CFG3_IPM_CMD_BYTELEN_MASK;
+       reg_val |= 1 << SPI_CFG3_IPM_CMD_BYTELEN_OFFSET;
+
+       /* addr & dummy byte len */
+       reg_val &= ~SPI_CFG3_IPM_ADDR_BYTELEN_MASK;
+       if (op->addr.nbytes || op->dummy.nbytes)
+               reg_val |= (op->addr.nbytes + op->dummy.nbytes) <<
+                           SPI_CFG3_IPM_ADDR_BYTELEN_OFFSET;
+
+       /* data byte len */
+       if (op->data.dir == SPI_MEM_NO_DATA) {
+               reg_val |= SPI_CFG3_IPM_NODATA_FLAG;
+               writel(0, mdata->base + SPI_CFG1_REG);
+       } else {
+               reg_val &= ~SPI_CFG3_IPM_NODATA_FLAG;
+               mdata->xfer_len = op->data.nbytes;
+               mtk_spi_setup_packet(mem->spi->master);
+       }
+
+       if (op->addr.nbytes || op->dummy.nbytes) {
+               if (op->addr.buswidth == 1 || op->dummy.buswidth == 1)
+                       reg_val |= SPI_CFG3_IPM_XMODE_EN;
+               else
+                       reg_val &= ~SPI_CFG3_IPM_XMODE_EN;
+       }
+
+       if (op->addr.buswidth == 2 ||
+           op->dummy.buswidth == 2 ||
+           op->data.buswidth == 2)
+               nio = 2;
+       else if (op->addr.buswidth == 4 ||
+                op->dummy.buswidth == 4 ||
+                op->data.buswidth == 4)
+               nio = 4;
+       else
+               nio = 1;
+
+       reg_val &= ~SPI_CFG3_IPM_CMD_PIN_MODE_MASK;
+       reg_val |= PIN_MODE_CFG(nio);
+
+       reg_val |= SPI_CFG3_IPM_HALF_DUPLEX_EN;
+       if (op->data.dir == SPI_MEM_DATA_IN)
+               reg_val |= SPI_CFG3_IPM_HALF_DUPLEX_DIR;
+       else
+               reg_val &= ~SPI_CFG3_IPM_HALF_DUPLEX_DIR;
+       writel(reg_val, mdata->base + SPI_CFG3_IPM_REG);
+
+       tx_size = 1 + op->addr.nbytes + op->dummy.nbytes;
+       if (op->data.dir == SPI_MEM_DATA_OUT)
+               tx_size += op->data.nbytes;
+
+       tx_size = max_t(u32, tx_size, 32);
+
+       tx_tmp_buf = kzalloc(tx_size, GFP_KERNEL | GFP_DMA);
+       if (!tx_tmp_buf) {
+               mdata->use_spimem = false;
+               return -ENOMEM;
+       }
+
+       tx_tmp_buf[0] = op->cmd.opcode;
+
+       if (op->addr.nbytes) {
+               int i;
+
+               for (i = 0; i < op->addr.nbytes; i++)
+                       tx_tmp_buf[i + 1] = op->addr.val >>
+                                       (8 * (op->addr.nbytes - i - 1));
+       }
+
+       if (op->dummy.nbytes)
+               memset(tx_tmp_buf + op->addr.nbytes + 1,
+                      0xff,
+                      op->dummy.nbytes);
+
+       if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+               memcpy(tx_tmp_buf + op->dummy.nbytes + op->addr.nbytes + 1,
+                      op->data.buf.out,
+                      op->data.nbytes);
+
+       mdata->tx_dma = dma_map_single(mdata->dev, tx_tmp_buf,
+                                      tx_size, DMA_TO_DEVICE);
+       if (dma_mapping_error(mdata->dev, mdata->tx_dma)) {
+               ret = -ENOMEM;
+               goto err_exit;
+       }
+
+       if (op->data.dir == SPI_MEM_DATA_IN) {
+               if (!IS_ALIGNED((size_t)op->data.buf.in, 4)) {
+                       rx_tmp_buf = kzalloc(op->data.nbytes,
+                                            GFP_KERNEL | GFP_DMA);
+                       if (!rx_tmp_buf) {
+                               ret = -ENOMEM;
+                               goto unmap_tx_dma;
+                       }
+               } else {
+                       rx_tmp_buf = op->data.buf.in;
+               }
+
+               mdata->rx_dma = dma_map_single(mdata->dev,
+                                              rx_tmp_buf,
+                                              op->data.nbytes,
+                                              DMA_FROM_DEVICE);
+               if (dma_mapping_error(mdata->dev, mdata->rx_dma)) {
+                       ret = -ENOMEM;
+                       goto kfree_rx_tmp_buf;
+               }
+       }
+
+       reg_val = readl(mdata->base + SPI_CMD_REG);
+       reg_val |= SPI_CMD_TX_DMA;
+       if (op->data.dir == SPI_MEM_DATA_IN)
+               reg_val |= SPI_CMD_RX_DMA;
+       writel(reg_val, mdata->base + SPI_CMD_REG);
+
+       mtk_spi_mem_setup_dma_xfer(mem->spi->master, op);
+
+       mtk_spi_enable_transfer(mem->spi->master);
+
+       /* Wait for the interrupt. */
+       ret = mtk_spi_transfer_wait(mem, op);
+       if (ret)
+               goto unmap_rx_dma;
+
+       /* spi disable dma */
+       reg_val = readl(mdata->base + SPI_CMD_REG);
+       reg_val &= ~SPI_CMD_TX_DMA;
+       if (op->data.dir == SPI_MEM_DATA_IN)
+               reg_val &= ~SPI_CMD_RX_DMA;
+       writel(reg_val, mdata->base + SPI_CMD_REG);
+
+unmap_rx_dma:
+       if (op->data.dir == SPI_MEM_DATA_IN) {
+               dma_unmap_single(mdata->dev, mdata->rx_dma,
+                                op->data.nbytes, DMA_FROM_DEVICE);
+               if (!IS_ALIGNED((size_t)op->data.buf.in, 4))
+                       memcpy(op->data.buf.in, rx_tmp_buf, op->data.nbytes);
+       }
+kfree_rx_tmp_buf:
+       if (op->data.dir == SPI_MEM_DATA_IN &&
+           !IS_ALIGNED((size_t)op->data.buf.in, 4))
+               kfree(rx_tmp_buf);
+unmap_tx_dma:
+       dma_unmap_single(mdata->dev, mdata->tx_dma,
+                        tx_size, DMA_TO_DEVICE);
+err_exit:
+       kfree(tx_tmp_buf);
+       mdata->use_spimem = false;
+
+       return ret;
+}
+
+static const struct spi_controller_mem_ops mtk_spi_mem_ops = {
+       .adjust_op_size = mtk_spi_mem_adjust_op_size,
+       .supports_op = mtk_spi_mem_supports_op,
+       .exec_op = mtk_spi_mem_exec_op,
+};
+
 static int mtk_spi_probe(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
        struct spi_master *master;
        struct mtk_spi *mdata;
-       const struct of_device_id *of_id;
        int i, irq, ret, addr_bits;
 
-       master = spi_alloc_master(&pdev->dev, sizeof(*mdata));
-       if (!master) {
-               dev_err(&pdev->dev, "failed to alloc spi master\n");
-               return -ENOMEM;
-       }
+       master = devm_spi_alloc_master(dev, sizeof(*mdata));
+       if (!master)
+               return dev_err_probe(dev, -ENOMEM, "failed to alloc spi master\n");
 
        master->auto_runtime_pm = true;
-       master->dev.of_node = pdev->dev.of_node;
+       master->dev.of_node = dev->of_node;
        master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
 
        master->set_cs = mtk_spi_set_cs;
@@ -812,15 +1132,8 @@ static int mtk_spi_probe(struct platform_device *pdev)
        master->set_cs_timing = mtk_spi_set_hw_cs_timing;
        master->use_gpio_descriptors = true;
 
-       of_id = of_match_node(mtk_spi_of_match, pdev->dev.of_node);
-       if (!of_id) {
-               dev_err(&pdev->dev, "failed to probe of_node\n");
-               ret = -EINVAL;
-               goto err_put_master;
-       }
-
        mdata = spi_master_get_devdata(master);
-       mdata->dev_comp = of_id->data;
+       mdata->dev_comp = device_get_match_data(dev);
 
        if (mdata->dev_comp->enhance_timing)
                master->mode_bits |= SPI_CS_HIGH;
@@ -830,143 +1143,122 @@ static int mtk_spi_probe(struct platform_device *pdev)
        if (mdata->dev_comp->ipm_design)
                master->mode_bits |= SPI_LOOP;
 
+       if (mdata->dev_comp->ipm_design) {
+               mdata->dev = dev;
+               master->mem_ops = &mtk_spi_mem_ops;
+               init_completion(&mdata->spimem_done);
+       }
+
        if (mdata->dev_comp->need_pad_sel) {
-               mdata->pad_num = of_property_count_u32_elems(
-                       pdev->dev.of_node,
+               mdata->pad_num = of_property_count_u32_elems(dev->of_node,
                        "mediatek,pad-select");
-               if (mdata->pad_num < 0) {
-                       dev_err(&pdev->dev,
+               if (mdata->pad_num < 0)
+                       return dev_err_probe(dev, -EINVAL,
                                "No 'mediatek,pad-select' property\n");
-                       ret = -EINVAL;
-                       goto err_put_master;
-               }
 
-               mdata->pad_sel = devm_kmalloc_array(&pdev->dev, mdata->pad_num,
+               mdata->pad_sel = devm_kmalloc_array(dev, mdata->pad_num,
                                                    sizeof(u32), GFP_KERNEL);
-               if (!mdata->pad_sel) {
-                       ret = -ENOMEM;
-                       goto err_put_master;
-               }
+               if (!mdata->pad_sel)
+                       return -ENOMEM;
 
                for (i = 0; i < mdata->pad_num; i++) {
-                       of_property_read_u32_index(pdev->dev.of_node,
+                       of_property_read_u32_index(dev->of_node,
                                                   "mediatek,pad-select",
                                                   i, &mdata->pad_sel[i]);
-                       if (mdata->pad_sel[i] > MT8173_SPI_MAX_PAD_SEL) {
-                               dev_err(&pdev->dev, "wrong pad-sel[%d]: %u\n",
-                                       i, mdata->pad_sel[i]);
-                               ret = -EINVAL;
-                               goto err_put_master;
-                       }
+                       if (mdata->pad_sel[i] > MT8173_SPI_MAX_PAD_SEL)
+                               return dev_err_probe(dev, -EINVAL,
+                                                    "wrong pad-sel[%d]: %u\n",
+                                                    i, mdata->pad_sel[i]);
                }
        }
 
        platform_set_drvdata(pdev, master);
        mdata->base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(mdata->base)) {
-               ret = PTR_ERR(mdata->base);
-               goto err_put_master;
-       }
+       if (IS_ERR(mdata->base))
+               return PTR_ERR(mdata->base);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq < 0) {
-               ret = irq;
-               goto err_put_master;
-       }
+       if (irq < 0)
+               return irq;
 
-       if (!pdev->dev.dma_mask)
-               pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+       if (!dev->dma_mask)
+               dev->dma_mask = &dev->coherent_dma_mask;
 
-       ret = devm_request_irq(&pdev->dev, irq, mtk_spi_interrupt,
-                              IRQF_TRIGGER_NONE, dev_name(&pdev->dev), master);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to register irq (%d)\n", ret);
-               goto err_put_master;
-       }
+       ret = devm_request_irq(dev, irq, mtk_spi_interrupt,
+                              IRQF_TRIGGER_NONE, dev_name(dev), master);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to register irq\n");
 
-       mdata->parent_clk = devm_clk_get(&pdev->dev, "parent-clk");
-       if (IS_ERR(mdata->parent_clk)) {
-               ret = PTR_ERR(mdata->parent_clk);
-               dev_err(&pdev->dev, "failed to get parent-clk: %d\n", ret);
-               goto err_put_master;
-       }
+       mdata->parent_clk = devm_clk_get(dev, "parent-clk");
+       if (IS_ERR(mdata->parent_clk))
+               return dev_err_probe(dev, PTR_ERR(mdata->parent_clk),
+                                    "failed to get parent-clk\n");
 
-       mdata->sel_clk = devm_clk_get(&pdev->dev, "sel-clk");
-       if (IS_ERR(mdata->sel_clk)) {
-               ret = PTR_ERR(mdata->sel_clk);
-               dev_err(&pdev->dev, "failed to get sel-clk: %d\n", ret);
-               goto err_put_master;
-       }
+       mdata->sel_clk = devm_clk_get(dev, "sel-clk");
+       if (IS_ERR(mdata->sel_clk))
+               return dev_err_probe(dev, PTR_ERR(mdata->sel_clk), "failed to get sel-clk\n");
 
-       mdata->spi_clk = devm_clk_get(&pdev->dev, "spi-clk");
-       if (IS_ERR(mdata->spi_clk)) {
-               ret = PTR_ERR(mdata->spi_clk);
-               dev_err(&pdev->dev, "failed to get spi-clk: %d\n", ret);
-               goto err_put_master;
-       }
+       mdata->spi_clk = devm_clk_get(dev, "spi-clk");
+       if (IS_ERR(mdata->spi_clk))
+               return dev_err_probe(dev, PTR_ERR(mdata->spi_clk), "failed to get spi-clk\n");
 
-       ret = clk_prepare_enable(mdata->spi_clk);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "failed to enable spi_clk (%d)\n", ret);
-               goto err_put_master;
-       }
+       mdata->spi_hclk = devm_clk_get_optional(dev, "hclk");
+       if (IS_ERR(mdata->spi_hclk))
+               return dev_err_probe(dev, PTR_ERR(mdata->spi_hclk), "failed to get hclk\n");
 
        ret = clk_set_parent(mdata->sel_clk, mdata->parent_clk);
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to clk_set_parent\n");
+
+       ret = clk_prepare_enable(mdata->spi_hclk);
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to enable hclk\n");
+
+       ret = clk_prepare_enable(mdata->spi_clk);
        if (ret < 0) {
-               dev_err(&pdev->dev, "failed to clk_set_parent (%d)\n", ret);
-               clk_disable_unprepare(mdata->spi_clk);
-               goto err_put_master;
+               clk_disable_unprepare(mdata->spi_hclk);
+               return dev_err_probe(dev, ret, "failed to enable spi_clk\n");
        }
 
        mdata->spi_clk_hz = clk_get_rate(mdata->spi_clk);
 
-       if (mdata->dev_comp->no_need_unprepare)
+       if (mdata->dev_comp->no_need_unprepare) {
                clk_disable(mdata->spi_clk);
-       else
+               clk_disable(mdata->spi_hclk);
+       } else {
                clk_disable_unprepare(mdata->spi_clk);
-
-       pm_runtime_enable(&pdev->dev);
+               clk_disable_unprepare(mdata->spi_hclk);
+       }
 
        if (mdata->dev_comp->need_pad_sel) {
-               if (mdata->pad_num != master->num_chipselect) {
-                       dev_err(&pdev->dev,
+               if (mdata->pad_num != master->num_chipselect)
+                       return dev_err_probe(dev, -EINVAL,
                                "pad_num does not match num_chipselect(%d != %d)\n",
                                mdata->pad_num, master->num_chipselect);
-                       ret = -EINVAL;
-                       goto err_disable_runtime_pm;
-               }
 
-               if (!master->cs_gpiods && master->num_chipselect > 1) {
-                       dev_err(&pdev->dev,
+               if (!master->cs_gpiods && master->num_chipselect > 1)
+                       return dev_err_probe(dev, -EINVAL,
                                "cs_gpios not specified and num_chipselect > 1\n");
-                       ret = -EINVAL;
-                       goto err_disable_runtime_pm;
-               }
        }
 
        if (mdata->dev_comp->dma_ext)
                addr_bits = DMA_ADDR_EXT_BITS;
        else
                addr_bits = DMA_ADDR_DEF_BITS;
-       ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(addr_bits));
+       ret = dma_set_mask(dev, DMA_BIT_MASK(addr_bits));
        if (ret)
-               dev_notice(&pdev->dev, "SPI dma_set_mask(%d) failed, ret:%d\n",
+               dev_notice(dev, "SPI dma_set_mask(%d) failed, ret:%d\n",
                           addr_bits, ret);
 
-       ret = devm_spi_register_master(&pdev->dev, master);
+       pm_runtime_enable(dev);
+
+       ret = devm_spi_register_master(dev, master);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register master (%d)\n", ret);
-               goto err_disable_runtime_pm;
+               pm_runtime_disable(dev);
+               return dev_err_probe(dev, ret, "failed to register master\n");
        }
 
        return 0;
-
-err_disable_runtime_pm:
-       pm_runtime_disable(&pdev->dev);
-err_put_master:
-       spi_master_put(master);
-
-       return ret;
 }
 
 static int mtk_spi_remove(struct platform_device *pdev)
@@ -978,8 +1270,10 @@ static int mtk_spi_remove(struct platform_device *pdev)
 
        mtk_spi_reset(mdata);
 
-       if (mdata->dev_comp->no_need_unprepare)
+       if (mdata->dev_comp->no_need_unprepare) {
                clk_unprepare(mdata->spi_clk);
+               clk_unprepare(mdata->spi_hclk);
+       }
 
        return 0;
 }
@@ -995,8 +1289,10 @@ static int mtk_spi_suspend(struct device *dev)
        if (ret)
                return ret;
 
-       if (!pm_runtime_suspended(dev))
+       if (!pm_runtime_suspended(dev)) {
                clk_disable_unprepare(mdata->spi_clk);
+               clk_disable_unprepare(mdata->spi_hclk);
+       }
 
        return ret;
 }
@@ -1013,11 +1309,20 @@ static int mtk_spi_resume(struct device *dev)
                        dev_err(dev, "failed to enable spi_clk (%d)\n", ret);
                        return ret;
                }
+
+               ret = clk_prepare_enable(mdata->spi_hclk);
+               if (ret < 0) {
+                       dev_err(dev, "failed to enable spi_hclk (%d)\n", ret);
+                       clk_disable_unprepare(mdata->spi_clk);
+                       return ret;
+               }
        }
 
        ret = spi_master_resume(master);
-       if (ret < 0)
+       if (ret < 0) {
                clk_disable_unprepare(mdata->spi_clk);
+               clk_disable_unprepare(mdata->spi_hclk);
+       }
 
        return ret;
 }
@@ -1029,10 +1334,13 @@ static int mtk_spi_runtime_suspend(struct device *dev)
        struct spi_master *master = dev_get_drvdata(dev);
        struct mtk_spi *mdata = spi_master_get_devdata(master);
 
-       if (mdata->dev_comp->no_need_unprepare)
+       if (mdata->dev_comp->no_need_unprepare) {
                clk_disable(mdata->spi_clk);
-       else
+               clk_disable(mdata->spi_hclk);
+       } else {
                clk_disable_unprepare(mdata->spi_clk);
+               clk_disable_unprepare(mdata->spi_hclk);
+       }
 
        return 0;
 }
@@ -1043,13 +1351,31 @@ static int mtk_spi_runtime_resume(struct device *dev)
        struct mtk_spi *mdata = spi_master_get_devdata(master);
        int ret;
 
-       if (mdata->dev_comp->no_need_unprepare)
+       if (mdata->dev_comp->no_need_unprepare) {
                ret = clk_enable(mdata->spi_clk);
-       else
+               if (ret < 0) {
+                       dev_err(dev, "failed to enable spi_clk (%d)\n", ret);
+                       return ret;
+               }
+               ret = clk_enable(mdata->spi_hclk);
+               if (ret < 0) {
+                       dev_err(dev, "failed to enable spi_hclk (%d)\n", ret);
+                       clk_disable(mdata->spi_clk);
+                       return ret;
+               }
+       } else {
                ret = clk_prepare_enable(mdata->spi_clk);
-       if (ret < 0) {
-               dev_err(dev, "failed to enable spi_clk (%d)\n", ret);
-               return ret;
+               if (ret < 0) {
+                       dev_err(dev, "failed to prepare_enable spi_clk (%d)\n", ret);
+                       return ret;
+               }
+
+               ret = clk_prepare_enable(mdata->spi_hclk);
+               if (ret < 0) {
+                       dev_err(dev, "failed to prepare_enable spi_hclk (%d)\n", ret);
+                       clk_disable_unprepare(mdata->spi_clk);
+                       return ret;
+               }
        }
 
        return 0;
diff --git a/drivers/spi/spi-mtk-snfi.c b/drivers/spi/spi-mtk-snfi.c
new file mode 100644 (file)
index 0000000..d66bf97
--- /dev/null
@@ -0,0 +1,1472 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Driver for the SPI-NAND mode of Mediatek NAND Flash Interface
+//
+// Copyright (c) 2022 Chuanhong Guo <gch981213@gmail.com>
+//
+// This driver is based on the SPI-NAND mtd driver from Mediatek SDK:
+//
+// Copyright (C) 2020 MediaTek Inc.
+// Author: Weijie Gao <weijie.gao@mediatek.com>
+//
+// This controller organize the page data as several interleaved sectors
+// like the following: (sizeof(FDM + ECC) = snf->nfi_cfg.spare_size)
+// +---------+------+------+---------+------+------+-----+
+// | Sector1 | FDM1 | ECC1 | Sector2 | FDM2 | ECC2 | ... |
+// +---------+------+------+---------+------+------+-----+
+// With auto-format turned on, DMA only returns this part:
+// +---------+---------+-----+
+// | Sector1 | Sector2 | ... |
+// +---------+---------+-----+
+// The FDM data will be filled to the registers, and ECC parity data isn't
+// accessible.
+// With auto-format off, all ((Sector+FDM+ECC)*nsectors) will be read over DMA
+// in it's original order shown in the first table. ECC can't be turned on when
+// auto-format is off.
+//
+// However, Linux SPI-NAND driver expects the data returned as:
+// +------+-----+
+// | Page | OOB |
+// +------+-----+
+// where the page data is continuously stored instead of interleaved.
+// So we assume all instructions matching the page_op template between ECC
+// prepare_io_req and finish_io_req are for page cache r/w.
+// Here's how this spi-mem driver operates when reading:
+//  1. Always set snf->autofmt = true in prepare_io_req (even when ECC is off).
+//  2. Perform page ops and let the controller fill the DMA bounce buffer with
+//     de-interleaved sector data and set FDM registers.
+//  3. Return the data as:
+//     +---------+---------+-----+------+------+-----+
+//     | Sector1 | Sector2 | ... | FDM1 | FDM2 | ... |
+//     +---------+---------+-----+------+------+-----+
+//  4. For other matching spi_mem ops outside a prepare/finish_io_req pair,
+//     read the data with auto-format off into the bounce buffer and copy
+//     needed data to the buffer specified in the request.
+//
+// Write requests operates in a similar manner.
+// As a limitation of this strategy, we won't be able to access any ECC parity
+// data at all in Linux.
+//
+// Here's the bad block mark situation on MTK chips:
+// In older chips like mt7622, MTK uses the first FDM byte in the first sector
+// as the bad block mark. After de-interleaving, this byte appears at [pagesize]
+// in the returned data, which is the BBM position expected by kernel. However,
+// the conventional bad block mark is the first byte of the OOB, which is part
+// of the last sector data in the interleaved layout. Instead of fixing their
+// hardware, MTK decided to address this inconsistency in software. On these
+// later chips, the BootROM expects the following:
+// 1. The [pagesize] byte on a nand page is used as BBM, which will appear at
+//    (page_size - (nsectors - 1) * spare_size) in the DMA buffer.
+// 2. The original byte stored at that position in the DMA buffer will be stored
+//    as the first byte of the FDM section in the last sector.
+// We can't disagree with the BootROM, so after de-interleaving, we need to
+// perform the following swaps in read:
+// 1. Store the BBM at [page_size - (nsectors - 1) * spare_size] to [page_size],
+//    which is the expected BBM position by kernel.
+// 2. Store the page data byte at [pagesize + (nsectors-1) * fdm] back to
+//    [page_size - (nsectors - 1) * spare_size]
+// Similarly, when writing, we need to perform swaps in the other direction.
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/iopoll.h>
+#include <linux/of_platform.h>
+#include <linux/mtd/nand-ecc-mtk.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/mtd/nand.h>
+
+// NFI registers
+#define NFI_CNFG 0x000
+#define CNFG_OP_MODE_S 12
+#define CNFG_OP_MODE_CUST 6
+#define CNFG_OP_MODE_PROGRAM 3
+#define CNFG_AUTO_FMT_EN BIT(9)
+#define CNFG_HW_ECC_EN BIT(8)
+#define CNFG_DMA_BURST_EN BIT(2)
+#define CNFG_READ_MODE BIT(1)
+#define CNFG_DMA_MODE BIT(0)
+
+#define NFI_PAGEFMT 0x0004
+#define NFI_SPARE_SIZE_LS_S 16
+#define NFI_FDM_ECC_NUM_S 12
+#define NFI_FDM_NUM_S 8
+#define NFI_SPARE_SIZE_S 4
+#define NFI_SEC_SEL_512 BIT(2)
+#define NFI_PAGE_SIZE_S 0
+#define NFI_PAGE_SIZE_512_2K 0
+#define NFI_PAGE_SIZE_2K_4K 1
+#define NFI_PAGE_SIZE_4K_8K 2
+#define NFI_PAGE_SIZE_8K_16K 3
+
+#define NFI_CON 0x008
+#define CON_SEC_NUM_S 12
+#define CON_BWR BIT(9)
+#define CON_BRD BIT(8)
+#define CON_NFI_RST BIT(1)
+#define CON_FIFO_FLUSH BIT(0)
+
+#define NFI_INTR_EN 0x010
+#define NFI_INTR_STA 0x014
+#define NFI_IRQ_INTR_EN BIT(31)
+#define NFI_IRQ_CUS_READ BIT(8)
+#define NFI_IRQ_CUS_PG BIT(7)
+
+#define NFI_CMD 0x020
+#define NFI_CMD_DUMMY_READ 0x00
+#define NFI_CMD_DUMMY_WRITE 0x80
+
+#define NFI_STRDATA 0x040
+#define STR_DATA BIT(0)
+
+#define NFI_STA 0x060
+#define NFI_NAND_FSM GENMASK(28, 24)
+#define NFI_FSM GENMASK(19, 16)
+#define READ_EMPTY BIT(12)
+
+#define NFI_FIFOSTA 0x064
+#define FIFO_WR_REMAIN_S 8
+#define FIFO_RD_REMAIN_S 0
+
+#define NFI_ADDRCNTR 0x070
+#define SEC_CNTR GENMASK(16, 12)
+#define SEC_CNTR_S 12
+#define NFI_SEC_CNTR(val) (((val)&SEC_CNTR) >> SEC_CNTR_S)
+
+#define NFI_STRADDR 0x080
+
+#define NFI_BYTELEN 0x084
+#define BUS_SEC_CNTR(val) (((val)&SEC_CNTR) >> SEC_CNTR_S)
+
+#define NFI_FDM0L 0x0a0
+#define NFI_FDM0M 0x0a4
+#define NFI_FDML(n) (NFI_FDM0L + (n)*8)
+#define NFI_FDMM(n) (NFI_FDM0M + (n)*8)
+
+#define NFI_DEBUG_CON1 0x220
+#define WBUF_EN BIT(2)
+
+#define NFI_MASTERSTA 0x224
+#define MAS_ADDR GENMASK(11, 9)
+#define MAS_RD GENMASK(8, 6)
+#define MAS_WR GENMASK(5, 3)
+#define MAS_RDDLY GENMASK(2, 0)
+#define NFI_MASTERSTA_MASK_7622 (MAS_ADDR | MAS_RD | MAS_WR | MAS_RDDLY)
+
+// SNFI registers
+#define SNF_MAC_CTL 0x500
+#define MAC_XIO_SEL BIT(4)
+#define SF_MAC_EN BIT(3)
+#define SF_TRIG BIT(2)
+#define WIP_READY BIT(1)
+#define WIP BIT(0)
+
+#define SNF_MAC_OUTL 0x504
+#define SNF_MAC_INL 0x508
+
+#define SNF_RD_CTL2 0x510
+#define DATA_READ_DUMMY_S 8
+#define DATA_READ_MAX_DUMMY 0xf
+#define DATA_READ_CMD_S 0
+
+#define SNF_RD_CTL3 0x514
+
+#define SNF_PG_CTL1 0x524
+#define PG_LOAD_CMD_S 8
+
+#define SNF_PG_CTL2 0x528
+
+#define SNF_MISC_CTL 0x538
+#define SW_RST BIT(28)
+#define FIFO_RD_LTC_S 25
+#define PG_LOAD_X4_EN BIT(20)
+#define DATA_READ_MODE_S 16
+#define DATA_READ_MODE GENMASK(18, 16)
+#define DATA_READ_MODE_X1 0
+#define DATA_READ_MODE_X2 1
+#define DATA_READ_MODE_X4 2
+#define DATA_READ_MODE_DUAL 5
+#define DATA_READ_MODE_QUAD 6
+#define PG_LOAD_CUSTOM_EN BIT(7)
+#define DATARD_CUSTOM_EN BIT(6)
+#define CS_DESELECT_CYC_S 0
+
+#define SNF_MISC_CTL2 0x53c
+#define PROGRAM_LOAD_BYTE_NUM_S 16
+#define READ_DATA_BYTE_NUM_S 11
+
+#define SNF_DLY_CTL3 0x548
+#define SFCK_SAM_DLY_S 0
+
+#define SNF_STA_CTL1 0x550
+#define CUS_PG_DONE BIT(28)
+#define CUS_READ_DONE BIT(27)
+#define SPI_STATE_S 0
+#define SPI_STATE GENMASK(3, 0)
+
+#define SNF_CFG 0x55c
+#define SPI_MODE BIT(0)
+
+#define SNF_GPRAM 0x800
+#define SNF_GPRAM_SIZE 0xa0
+
+#define SNFI_POLL_INTERVAL 1000000
+
+static const u8 mt7622_spare_sizes[] = { 16, 26, 27, 28 };
+
+struct mtk_snand_caps {
+       u16 sector_size;
+       u16 max_sectors;
+       u16 fdm_size;
+       u16 fdm_ecc_size;
+       u16 fifo_size;
+
+       bool bbm_swap;
+       bool empty_page_check;
+       u32 mastersta_mask;
+
+       const u8 *spare_sizes;
+       u32 num_spare_size;
+};
+
+static const struct mtk_snand_caps mt7622_snand_caps = {
+       .sector_size = 512,
+       .max_sectors = 8,
+       .fdm_size = 8,
+       .fdm_ecc_size = 1,
+       .fifo_size = 32,
+       .bbm_swap = false,
+       .empty_page_check = false,
+       .mastersta_mask = NFI_MASTERSTA_MASK_7622,
+       .spare_sizes = mt7622_spare_sizes,
+       .num_spare_size = ARRAY_SIZE(mt7622_spare_sizes)
+};
+
+static const struct mtk_snand_caps mt7629_snand_caps = {
+       .sector_size = 512,
+       .max_sectors = 8,
+       .fdm_size = 8,
+       .fdm_ecc_size = 1,
+       .fifo_size = 32,
+       .bbm_swap = true,
+       .empty_page_check = false,
+       .mastersta_mask = NFI_MASTERSTA_MASK_7622,
+       .spare_sizes = mt7622_spare_sizes,
+       .num_spare_size = ARRAY_SIZE(mt7622_spare_sizes)
+};
+
+struct mtk_snand_conf {
+       size_t page_size;
+       size_t oob_size;
+       u8 nsectors;
+       u8 spare_size;
+};
+
+struct mtk_snand {
+       struct spi_controller *ctlr;
+       struct device *dev;
+       struct clk *nfi_clk;
+       struct clk *pad_clk;
+       void __iomem *nfi_base;
+       int irq;
+       struct completion op_done;
+       const struct mtk_snand_caps *caps;
+       struct mtk_ecc_config *ecc_cfg;
+       struct mtk_ecc *ecc;
+       struct mtk_snand_conf nfi_cfg;
+       struct mtk_ecc_stats ecc_stats;
+       struct nand_ecc_engine ecc_eng;
+       bool autofmt;
+       u8 *buf;
+       size_t buf_len;
+};
+
+static struct mtk_snand *nand_to_mtk_snand(struct nand_device *nand)
+{
+       struct nand_ecc_engine *eng = nand->ecc.engine;
+
+       return container_of(eng, struct mtk_snand, ecc_eng);
+}
+
+static inline int snand_prepare_bouncebuf(struct mtk_snand *snf, size_t size)
+{
+       if (snf->buf_len >= size)
+               return 0;
+       kfree(snf->buf);
+       snf->buf = kmalloc(size, GFP_KERNEL);
+       if (!snf->buf)
+               return -ENOMEM;
+       snf->buf_len = size;
+       memset(snf->buf, 0xff, snf->buf_len);
+       return 0;
+}
+
+static inline u32 nfi_read32(struct mtk_snand *snf, u32 reg)
+{
+       return readl(snf->nfi_base + reg);
+}
+
+static inline void nfi_write32(struct mtk_snand *snf, u32 reg, u32 val)
+{
+       writel(val, snf->nfi_base + reg);
+}
+
+static inline void nfi_write16(struct mtk_snand *snf, u32 reg, u16 val)
+{
+       writew(val, snf->nfi_base + reg);
+}
+
+static inline void nfi_rmw32(struct mtk_snand *snf, u32 reg, u32 clr, u32 set)
+{
+       u32 val;
+
+       val = readl(snf->nfi_base + reg);
+       val &= ~clr;
+       val |= set;
+       writel(val, snf->nfi_base + reg);
+}
+
+static void nfi_read_data(struct mtk_snand *snf, u32 reg, u8 *data, u32 len)
+{
+       u32 i, val = 0, es = sizeof(u32);
+
+       for (i = reg; i < reg + len; i++) {
+               if (i == reg || i % es == 0)
+                       val = nfi_read32(snf, i & ~(es - 1));
+
+               *data++ = (u8)(val >> (8 * (i % es)));
+       }
+}
+
+static int mtk_nfi_reset(struct mtk_snand *snf)
+{
+       u32 val, fifo_mask;
+       int ret;
+
+       nfi_write32(snf, NFI_CON, CON_FIFO_FLUSH | CON_NFI_RST);
+
+       ret = readw_poll_timeout(snf->nfi_base + NFI_MASTERSTA, val,
+                                !(val & snf->caps->mastersta_mask), 0,
+                                SNFI_POLL_INTERVAL);
+       if (ret) {
+               dev_err(snf->dev, "NFI master is still busy after reset\n");
+               return ret;
+       }
+
+       ret = readl_poll_timeout(snf->nfi_base + NFI_STA, val,
+                                !(val & (NFI_FSM | NFI_NAND_FSM)), 0,
+                                SNFI_POLL_INTERVAL);
+       if (ret) {
+               dev_err(snf->dev, "Failed to reset NFI\n");
+               return ret;
+       }
+
+       fifo_mask = ((snf->caps->fifo_size - 1) << FIFO_RD_REMAIN_S) |
+                   ((snf->caps->fifo_size - 1) << FIFO_WR_REMAIN_S);
+       ret = readw_poll_timeout(snf->nfi_base + NFI_FIFOSTA, val,
+                                !(val & fifo_mask), 0, SNFI_POLL_INTERVAL);
+       if (ret) {
+               dev_err(snf->dev, "NFI FIFOs are not empty\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int mtk_snand_mac_reset(struct mtk_snand *snf)
+{
+       int ret;
+       u32 val;
+
+       nfi_rmw32(snf, SNF_MISC_CTL, 0, SW_RST);
+
+       ret = readl_poll_timeout(snf->nfi_base + SNF_STA_CTL1, val,
+                                !(val & SPI_STATE), 0, SNFI_POLL_INTERVAL);
+       if (ret)
+               dev_err(snf->dev, "Failed to reset SNFI MAC\n");
+
+       nfi_write32(snf, SNF_MISC_CTL,
+                   (2 << FIFO_RD_LTC_S) | (10 << CS_DESELECT_CYC_S));
+
+       return ret;
+}
+
+static int mtk_snand_mac_trigger(struct mtk_snand *snf, u32 outlen, u32 inlen)
+{
+       int ret;
+       u32 val;
+
+       nfi_write32(snf, SNF_MAC_CTL, SF_MAC_EN);
+       nfi_write32(snf, SNF_MAC_OUTL, outlen);
+       nfi_write32(snf, SNF_MAC_INL, inlen);
+
+       nfi_write32(snf, SNF_MAC_CTL, SF_MAC_EN | SF_TRIG);
+
+       ret = readl_poll_timeout(snf->nfi_base + SNF_MAC_CTL, val,
+                                val & WIP_READY, 0, SNFI_POLL_INTERVAL);
+       if (ret) {
+               dev_err(snf->dev, "Timed out waiting for WIP_READY\n");
+               goto cleanup;
+       }
+
+       ret = readl_poll_timeout(snf->nfi_base + SNF_MAC_CTL, val, !(val & WIP),
+                                0, SNFI_POLL_INTERVAL);
+       if (ret)
+               dev_err(snf->dev, "Timed out waiting for WIP cleared\n");
+
+cleanup:
+       nfi_write32(snf, SNF_MAC_CTL, 0);
+
+       return ret;
+}
+
+static int mtk_snand_mac_io(struct mtk_snand *snf, const struct spi_mem_op *op)
+{
+       u32 rx_len = 0;
+       u32 reg_offs = 0;
+       u32 val = 0;
+       const u8 *tx_buf = NULL;
+       u8 *rx_buf = NULL;
+       int i, ret;
+       u8 b;
+
+       if (op->data.dir == SPI_MEM_DATA_IN) {
+               rx_len = op->data.nbytes;
+               rx_buf = op->data.buf.in;
+       } else {
+               tx_buf = op->data.buf.out;
+       }
+
+       mtk_snand_mac_reset(snf);
+
+       for (i = 0; i < op->cmd.nbytes; i++, reg_offs++) {
+               b = (op->cmd.opcode >> ((op->cmd.nbytes - i - 1) * 8)) & 0xff;
+               val |= b << (8 * (reg_offs % 4));
+               if (reg_offs % 4 == 3) {
+                       nfi_write32(snf, SNF_GPRAM + reg_offs - 3, val);
+                       val = 0;
+               }
+       }
+
+       for (i = 0; i < op->addr.nbytes; i++, reg_offs++) {
+               b = (op->addr.val >> ((op->addr.nbytes - i - 1) * 8)) & 0xff;
+               val |= b << (8 * (reg_offs % 4));
+               if (reg_offs % 4 == 3) {
+                       nfi_write32(snf, SNF_GPRAM + reg_offs - 3, val);
+                       val = 0;
+               }
+       }
+
+       for (i = 0; i < op->dummy.nbytes; i++, reg_offs++) {
+               if (reg_offs % 4 == 3) {
+                       nfi_write32(snf, SNF_GPRAM + reg_offs - 3, val);
+                       val = 0;
+               }
+       }
+
+       if (op->data.dir == SPI_MEM_DATA_OUT) {
+               for (i = 0; i < op->data.nbytes; i++, reg_offs++) {
+                       val |= tx_buf[i] << (8 * (reg_offs % 4));
+                       if (reg_offs % 4 == 3) {
+                               nfi_write32(snf, SNF_GPRAM + reg_offs - 3, val);
+                               val = 0;
+                       }
+               }
+       }
+
+       if (reg_offs % 4)
+               nfi_write32(snf, SNF_GPRAM + (reg_offs & ~3), val);
+
+       for (i = 0; i < reg_offs; i += 4)
+               dev_dbg(snf->dev, "%d: %08X", i,
+                       nfi_read32(snf, SNF_GPRAM + i));
+
+       dev_dbg(snf->dev, "SNF TX: %u RX: %u", reg_offs, rx_len);
+
+       ret = mtk_snand_mac_trigger(snf, reg_offs, rx_len);
+       if (ret)
+               return ret;
+
+       if (!rx_len)
+               return 0;
+
+       nfi_read_data(snf, SNF_GPRAM + reg_offs, rx_buf, rx_len);
+       return 0;
+}
+
+static int mtk_snand_setup_pagefmt(struct mtk_snand *snf, u32 page_size,
+                                  u32 oob_size)
+{
+       int spare_idx = -1;
+       u32 spare_size, spare_size_shift, pagesize_idx;
+       u32 sector_size_512;
+       u8 nsectors;
+       int i;
+
+       // skip if it's already configured as required.
+       if (snf->nfi_cfg.page_size == page_size &&
+           snf->nfi_cfg.oob_size == oob_size)
+               return 0;
+
+       nsectors = page_size / snf->caps->sector_size;
+       if (nsectors > snf->caps->max_sectors) {
+               dev_err(snf->dev, "too many sectors required.\n");
+               goto err;
+       }
+
+       if (snf->caps->sector_size == 512) {
+               sector_size_512 = NFI_SEC_SEL_512;
+               spare_size_shift = NFI_SPARE_SIZE_S;
+       } else {
+               sector_size_512 = 0;
+               spare_size_shift = NFI_SPARE_SIZE_LS_S;
+       }
+
+       switch (page_size) {
+       case SZ_512:
+               pagesize_idx = NFI_PAGE_SIZE_512_2K;
+               break;
+       case SZ_2K:
+               if (snf->caps->sector_size == 512)
+                       pagesize_idx = NFI_PAGE_SIZE_2K_4K;
+               else
+                       pagesize_idx = NFI_PAGE_SIZE_512_2K;
+               break;
+       case SZ_4K:
+               if (snf->caps->sector_size == 512)
+                       pagesize_idx = NFI_PAGE_SIZE_4K_8K;
+               else
+                       pagesize_idx = NFI_PAGE_SIZE_2K_4K;
+               break;
+       case SZ_8K:
+               if (snf->caps->sector_size == 512)
+                       pagesize_idx = NFI_PAGE_SIZE_8K_16K;
+               else
+                       pagesize_idx = NFI_PAGE_SIZE_4K_8K;
+               break;
+       case SZ_16K:
+               pagesize_idx = NFI_PAGE_SIZE_8K_16K;
+               break;
+       default:
+               dev_err(snf->dev, "unsupported page size.\n");
+               goto err;
+       }
+
+       spare_size = oob_size / nsectors;
+       // If we're using the 1KB sector size, HW will automatically double the
+       // spare size. We should only use half of the value in this case.
+       if (snf->caps->sector_size == 1024)
+               spare_size /= 2;
+
+       for (i = snf->caps->num_spare_size - 1; i >= 0; i--) {
+               if (snf->caps->spare_sizes[i] <= spare_size) {
+                       spare_size = snf->caps->spare_sizes[i];
+                       if (snf->caps->sector_size == 1024)
+                               spare_size *= 2;
+                       spare_idx = i;
+                       break;
+               }
+       }
+
+       if (spare_idx < 0) {
+               dev_err(snf->dev, "unsupported spare size: %u\n", spare_size);
+               goto err;
+       }
+
+       nfi_write32(snf, NFI_PAGEFMT,
+                   (snf->caps->fdm_ecc_size << NFI_FDM_ECC_NUM_S) |
+                           (snf->caps->fdm_size << NFI_FDM_NUM_S) |
+                           (spare_idx << spare_size_shift) |
+                           (pagesize_idx << NFI_PAGE_SIZE_S) |
+                           sector_size_512);
+
+       snf->nfi_cfg.page_size = page_size;
+       snf->nfi_cfg.oob_size = oob_size;
+       snf->nfi_cfg.nsectors = nsectors;
+       snf->nfi_cfg.spare_size = spare_size;
+
+       dev_dbg(snf->dev, "page format: (%u + %u) * %u\n",
+               snf->caps->sector_size, spare_size, nsectors);
+       return snand_prepare_bouncebuf(snf, page_size + oob_size);
+err:
+       dev_err(snf->dev, "page size %u + %u is not supported\n", page_size,
+               oob_size);
+       return -EOPNOTSUPP;
+}
+
+static int mtk_snand_ooblayout_ecc(struct mtd_info *mtd, int section,
+                                  struct mtd_oob_region *oobecc)
+{
+       // ECC area is not accessible
+       return -ERANGE;
+}
+
+static int mtk_snand_ooblayout_free(struct mtd_info *mtd, int section,
+                                   struct mtd_oob_region *oobfree)
+{
+       struct nand_device *nand = mtd_to_nanddev(mtd);
+       struct mtk_snand *ms = nand_to_mtk_snand(nand);
+
+       if (section >= ms->nfi_cfg.nsectors)
+               return -ERANGE;
+
+       oobfree->length = ms->caps->fdm_size - 1;
+       oobfree->offset = section * ms->caps->fdm_size + 1;
+       return 0;
+}
+
+static const struct mtd_ooblayout_ops mtk_snand_ooblayout = {
+       .ecc = mtk_snand_ooblayout_ecc,
+       .free = mtk_snand_ooblayout_free,
+};
+
+static int mtk_snand_ecc_init_ctx(struct nand_device *nand)
+{
+       struct mtk_snand *snf = nand_to_mtk_snand(nand);
+       struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
+       struct nand_ecc_props *reqs = &nand->ecc.requirements;
+       struct nand_ecc_props *user = &nand->ecc.user_conf;
+       struct mtd_info *mtd = nanddev_to_mtd(nand);
+       int step_size = 0, strength = 0, desired_correction = 0, steps;
+       bool ecc_user = false;
+       int ret;
+       u32 parity_bits, max_ecc_bytes;
+       struct mtk_ecc_config *ecc_cfg;
+
+       ret = mtk_snand_setup_pagefmt(snf, nand->memorg.pagesize,
+                                     nand->memorg.oobsize);
+       if (ret)
+               return ret;
+
+       ecc_cfg = kzalloc(sizeof(*ecc_cfg), GFP_KERNEL);
+       if (!ecc_cfg)
+               return -ENOMEM;
+
+       nand->ecc.ctx.priv = ecc_cfg;
+
+       if (user->step_size && user->strength) {
+               step_size = user->step_size;
+               strength = user->strength;
+               ecc_user = true;
+       } else if (reqs->step_size && reqs->strength) {
+               step_size = reqs->step_size;
+               strength = reqs->strength;
+       }
+
+       if (step_size && strength) {
+               steps = mtd->writesize / step_size;
+               desired_correction = steps * strength;
+               strength = desired_correction / snf->nfi_cfg.nsectors;
+       }
+
+       ecc_cfg->mode = ECC_NFI_MODE;
+       ecc_cfg->sectors = snf->nfi_cfg.nsectors;
+       ecc_cfg->len = snf->caps->sector_size + snf->caps->fdm_ecc_size;
+
+       // calculate the max possible strength under current page format
+       parity_bits = mtk_ecc_get_parity_bits(snf->ecc);
+       max_ecc_bytes = snf->nfi_cfg.spare_size - snf->caps->fdm_size;
+       ecc_cfg->strength = max_ecc_bytes * 8 / parity_bits;
+       mtk_ecc_adjust_strength(snf->ecc, &ecc_cfg->strength);
+
+       // if there's a user requested strength, find the minimum strength that
+       // meets the requirement. Otherwise use the maximum strength which is
+       // expected by BootROM.
+       if (ecc_user && strength) {
+               u32 s_next = ecc_cfg->strength - 1;
+
+               while (1) {
+                       mtk_ecc_adjust_strength(snf->ecc, &s_next);
+                       if (s_next >= ecc_cfg->strength)
+                               break;
+                       if (s_next < strength)
+                               break;
+                       s_next = ecc_cfg->strength - 1;
+               }
+       }
+
+       mtd_set_ooblayout(mtd, &mtk_snand_ooblayout);
+
+       conf->step_size = snf->caps->sector_size;
+       conf->strength = ecc_cfg->strength;
+
+       if (ecc_cfg->strength < strength)
+               dev_warn(snf->dev, "unable to fulfill ECC of %u bits.\n",
+                        strength);
+       dev_info(snf->dev, "ECC strength: %u bits per %u bytes\n",
+                ecc_cfg->strength, snf->caps->sector_size);
+
+       return 0;
+}
+
+static void mtk_snand_ecc_cleanup_ctx(struct nand_device *nand)
+{
+       struct mtk_ecc_config *ecc_cfg = nand_to_ecc_ctx(nand);
+
+       kfree(ecc_cfg);
+}
+
+static int mtk_snand_ecc_prepare_io_req(struct nand_device *nand,
+                                       struct nand_page_io_req *req)
+{
+       struct mtk_snand *snf = nand_to_mtk_snand(nand);
+       struct mtk_ecc_config *ecc_cfg = nand_to_ecc_ctx(nand);
+       int ret;
+
+       ret = mtk_snand_setup_pagefmt(snf, nand->memorg.pagesize,
+                                     nand->memorg.oobsize);
+       if (ret)
+               return ret;
+       snf->autofmt = true;
+       snf->ecc_cfg = ecc_cfg;
+       return 0;
+}
+
+static int mtk_snand_ecc_finish_io_req(struct nand_device *nand,
+                                      struct nand_page_io_req *req)
+{
+       struct mtk_snand *snf = nand_to_mtk_snand(nand);
+       struct mtd_info *mtd = nanddev_to_mtd(nand);
+
+       snf->ecc_cfg = NULL;
+       snf->autofmt = false;
+       if ((req->mode == MTD_OPS_RAW) || (req->type != NAND_PAGE_READ))
+               return 0;
+
+       if (snf->ecc_stats.failed)
+               mtd->ecc_stats.failed += snf->ecc_stats.failed;
+       mtd->ecc_stats.corrected += snf->ecc_stats.corrected;
+       return snf->ecc_stats.failed ? -EBADMSG : snf->ecc_stats.bitflips;
+}
+
+static struct nand_ecc_engine_ops mtk_snfi_ecc_engine_ops = {
+       .init_ctx = mtk_snand_ecc_init_ctx,
+       .cleanup_ctx = mtk_snand_ecc_cleanup_ctx,
+       .prepare_io_req = mtk_snand_ecc_prepare_io_req,
+       .finish_io_req = mtk_snand_ecc_finish_io_req,
+};
+
+static void mtk_snand_read_fdm(struct mtk_snand *snf, u8 *buf)
+{
+       u32 vall, valm;
+       u8 *oobptr = buf;
+       int i, j;
+
+       for (i = 0; i < snf->nfi_cfg.nsectors; i++) {
+               vall = nfi_read32(snf, NFI_FDML(i));
+               valm = nfi_read32(snf, NFI_FDMM(i));
+
+               for (j = 0; j < snf->caps->fdm_size; j++)
+                       oobptr[j] = (j >= 4 ? valm : vall) >> ((j % 4) * 8);
+
+               oobptr += snf->caps->fdm_size;
+       }
+}
+
+static void mtk_snand_write_fdm(struct mtk_snand *snf, const u8 *buf)
+{
+       u32 fdm_size = snf->caps->fdm_size;
+       const u8 *oobptr = buf;
+       u32 vall, valm;
+       int i, j;
+
+       for (i = 0; i < snf->nfi_cfg.nsectors; i++) {
+               vall = 0;
+               valm = 0;
+
+               for (j = 0; j < 8; j++) {
+                       if (j < 4)
+                               vall |= (j < fdm_size ? oobptr[j] : 0xff)
+                                       << (j * 8);
+                       else
+                               valm |= (j < fdm_size ? oobptr[j] : 0xff)
+                                       << ((j - 4) * 8);
+               }
+
+               nfi_write32(snf, NFI_FDML(i), vall);
+               nfi_write32(snf, NFI_FDMM(i), valm);
+
+               oobptr += fdm_size;
+       }
+}
+
+static void mtk_snand_bm_swap(struct mtk_snand *snf, u8 *buf)
+{
+       u32 buf_bbm_pos, fdm_bbm_pos;
+
+       if (!snf->caps->bbm_swap || snf->nfi_cfg.nsectors == 1)
+               return;
+
+       // swap [pagesize] byte on nand with the first fdm byte
+       // in the last sector.
+       buf_bbm_pos = snf->nfi_cfg.page_size -
+                     (snf->nfi_cfg.nsectors - 1) * snf->nfi_cfg.spare_size;
+       fdm_bbm_pos = snf->nfi_cfg.page_size +
+                     (snf->nfi_cfg.nsectors - 1) * snf->caps->fdm_size;
+
+       swap(snf->buf[fdm_bbm_pos], buf[buf_bbm_pos]);
+}
+
+static void mtk_snand_fdm_bm_swap(struct mtk_snand *snf)
+{
+       u32 fdm_bbm_pos1, fdm_bbm_pos2;
+
+       if (!snf->caps->bbm_swap || snf->nfi_cfg.nsectors == 1)
+               return;
+
+       // swap the first fdm byte in the first and the last sector.
+       fdm_bbm_pos1 = snf->nfi_cfg.page_size;
+       fdm_bbm_pos2 = snf->nfi_cfg.page_size +
+                      (snf->nfi_cfg.nsectors - 1) * snf->caps->fdm_size;
+       swap(snf->buf[fdm_bbm_pos1], snf->buf[fdm_bbm_pos2]);
+}
+
+static int mtk_snand_read_page_cache(struct mtk_snand *snf,
+                                    const struct spi_mem_op *op)
+{
+       u8 *buf = snf->buf;
+       u8 *buf_fdm = buf + snf->nfi_cfg.page_size;
+       // the address part to be sent by the controller
+       u32 op_addr = op->addr.val;
+       // where to start copying data from bounce buffer
+       u32 rd_offset = 0;
+       u32 dummy_clk = (op->dummy.nbytes * BITS_PER_BYTE / op->dummy.buswidth);
+       u32 op_mode = 0;
+       u32 dma_len = snf->buf_len;
+       int ret = 0;
+       u32 rd_mode, rd_bytes, val;
+       dma_addr_t buf_dma;
+
+       if (snf->autofmt) {
+               u32 last_bit;
+               u32 mask;
+
+               dma_len = snf->nfi_cfg.page_size;
+               op_mode = CNFG_AUTO_FMT_EN;
+               if (op->data.ecc)
+                       op_mode |= CNFG_HW_ECC_EN;
+               // extract the plane bit:
+               // Find the highest bit set in (pagesize+oobsize).
+               // Bits higher than that in op->addr are kept and sent over SPI
+               // Lower bits are used as an offset for copying data from DMA
+               // bounce buffer.
+               last_bit = fls(snf->nfi_cfg.page_size + snf->nfi_cfg.oob_size);
+               mask = (1 << last_bit) - 1;
+               rd_offset = op_addr & mask;
+               op_addr &= ~mask;
+
+               // check if we can dma to the caller memory
+               if (rd_offset == 0 && op->data.nbytes >= snf->nfi_cfg.page_size)
+                       buf = op->data.buf.in;
+       }
+       mtk_snand_mac_reset(snf);
+       mtk_nfi_reset(snf);
+
+       // command and dummy cycles
+       nfi_write32(snf, SNF_RD_CTL2,
+                   (dummy_clk << DATA_READ_DUMMY_S) |
+                           (op->cmd.opcode << DATA_READ_CMD_S));
+
+       // read address
+       nfi_write32(snf, SNF_RD_CTL3, op_addr);
+
+       // Set read op_mode
+       if (op->data.buswidth == 4)
+               rd_mode = op->addr.buswidth == 4 ? DATA_READ_MODE_QUAD :
+                                                  DATA_READ_MODE_X4;
+       else if (op->data.buswidth == 2)
+               rd_mode = op->addr.buswidth == 2 ? DATA_READ_MODE_DUAL :
+                                                  DATA_READ_MODE_X2;
+       else
+               rd_mode = DATA_READ_MODE_X1;
+       rd_mode <<= DATA_READ_MODE_S;
+       nfi_rmw32(snf, SNF_MISC_CTL, DATA_READ_MODE,
+                 rd_mode | DATARD_CUSTOM_EN);
+
+       // Set bytes to read
+       rd_bytes = (snf->nfi_cfg.spare_size + snf->caps->sector_size) *
+                  snf->nfi_cfg.nsectors;
+       nfi_write32(snf, SNF_MISC_CTL2,
+                   (rd_bytes << PROGRAM_LOAD_BYTE_NUM_S) | rd_bytes);
+
+       // NFI read prepare
+       nfi_write16(snf, NFI_CNFG,
+                   (CNFG_OP_MODE_CUST << CNFG_OP_MODE_S) | CNFG_DMA_BURST_EN |
+                           CNFG_READ_MODE | CNFG_DMA_MODE | op_mode);
+
+       nfi_write32(snf, NFI_CON, (snf->nfi_cfg.nsectors << CON_SEC_NUM_S));
+
+       buf_dma = dma_map_single(snf->dev, buf, dma_len, DMA_FROM_DEVICE);
+       ret = dma_mapping_error(snf->dev, buf_dma);
+       if (ret) {
+               dev_err(snf->dev, "DMA mapping failed.\n");
+               goto cleanup;
+       }
+       nfi_write32(snf, NFI_STRADDR, buf_dma);
+       if (op->data.ecc) {
+               snf->ecc_cfg->op = ECC_DECODE;
+               ret = mtk_ecc_enable(snf->ecc, snf->ecc_cfg);
+               if (ret)
+                       goto cleanup_dma;
+       }
+       // Prepare for custom read interrupt
+       nfi_write32(snf, NFI_INTR_EN, NFI_IRQ_INTR_EN | NFI_IRQ_CUS_READ);
+       reinit_completion(&snf->op_done);
+
+       // Trigger NFI into custom mode
+       nfi_write16(snf, NFI_CMD, NFI_CMD_DUMMY_READ);
+
+       // Start DMA read
+       nfi_rmw32(snf, NFI_CON, 0, CON_BRD);
+       nfi_write16(snf, NFI_STRDATA, STR_DATA);
+
+       if (!wait_for_completion_timeout(
+                   &snf->op_done, usecs_to_jiffies(SNFI_POLL_INTERVAL))) {
+               dev_err(snf->dev, "DMA timed out for reading from cache.\n");
+               ret = -ETIMEDOUT;
+               goto cleanup;
+       }
+
+       // Wait for BUS_SEC_CNTR returning expected value
+       ret = readl_poll_timeout(snf->nfi_base + NFI_BYTELEN, val,
+                                BUS_SEC_CNTR(val) >= snf->nfi_cfg.nsectors, 0,
+                                SNFI_POLL_INTERVAL);
+       if (ret) {
+               dev_err(snf->dev, "Timed out waiting for BUS_SEC_CNTR\n");
+               goto cleanup2;
+       }
+
+       // Wait for bus becoming idle
+       ret = readl_poll_timeout(snf->nfi_base + NFI_MASTERSTA, val,
+                                !(val & snf->caps->mastersta_mask), 0,
+                                SNFI_POLL_INTERVAL);
+       if (ret) {
+               dev_err(snf->dev, "Timed out waiting for bus becoming idle\n");
+               goto cleanup2;
+       }
+
+       if (op->data.ecc) {
+               ret = mtk_ecc_wait_done(snf->ecc, ECC_DECODE);
+               if (ret) {
+                       dev_err(snf->dev, "wait ecc done timeout\n");
+                       goto cleanup2;
+               }
+               // save status before disabling ecc
+               mtk_ecc_get_stats(snf->ecc, &snf->ecc_stats,
+                                 snf->nfi_cfg.nsectors);
+       }
+
+       dma_unmap_single(snf->dev, buf_dma, dma_len, DMA_FROM_DEVICE);
+
+       if (snf->autofmt) {
+               mtk_snand_read_fdm(snf, buf_fdm);
+               if (snf->caps->bbm_swap) {
+                       mtk_snand_bm_swap(snf, buf);
+                       mtk_snand_fdm_bm_swap(snf);
+               }
+       }
+
+       // copy data back
+       if (nfi_read32(snf, NFI_STA) & READ_EMPTY) {
+               memset(op->data.buf.in, 0xff, op->data.nbytes);
+               snf->ecc_stats.bitflips = 0;
+               snf->ecc_stats.failed = 0;
+               snf->ecc_stats.corrected = 0;
+       } else {
+               if (buf == op->data.buf.in) {
+                       u32 cap_len = snf->buf_len - snf->nfi_cfg.page_size;
+                       u32 req_left = op->data.nbytes - snf->nfi_cfg.page_size;
+
+                       if (req_left)
+                               memcpy(op->data.buf.in + snf->nfi_cfg.page_size,
+                                      buf_fdm,
+                                      cap_len < req_left ? cap_len : req_left);
+               } else if (rd_offset < snf->buf_len) {
+                       u32 cap_len = snf->buf_len - rd_offset;
+
+                       if (op->data.nbytes < cap_len)
+                               cap_len = op->data.nbytes;
+                       memcpy(op->data.buf.in, snf->buf + rd_offset, cap_len);
+               }
+       }
+cleanup2:
+       if (op->data.ecc)
+               mtk_ecc_disable(snf->ecc);
+cleanup_dma:
+       // unmap dma only if any error happens. (otherwise it's done before
+       // data copying)
+       if (ret)
+               dma_unmap_single(snf->dev, buf_dma, dma_len, DMA_FROM_DEVICE);
+cleanup:
+       // Stop read
+       nfi_write32(snf, NFI_CON, 0);
+       nfi_write16(snf, NFI_CNFG, 0);
+
+       // Clear SNF done flag
+       nfi_rmw32(snf, SNF_STA_CTL1, 0, CUS_READ_DONE);
+       nfi_write32(snf, SNF_STA_CTL1, 0);
+
+       // Disable interrupt
+       nfi_read32(snf, NFI_INTR_STA);
+       nfi_write32(snf, NFI_INTR_EN, 0);
+
+       nfi_rmw32(snf, SNF_MISC_CTL, DATARD_CUSTOM_EN, 0);
+       return ret;
+}
+
+static int mtk_snand_write_page_cache(struct mtk_snand *snf,
+                                     const struct spi_mem_op *op)
+{
+       // the address part to be sent by the controller
+       u32 op_addr = op->addr.val;
+       // where to start copying data from bounce buffer
+       u32 wr_offset = 0;
+       u32 op_mode = 0;
+       int ret = 0;
+       u32 wr_mode = 0;
+       u32 dma_len = snf->buf_len;
+       u32 wr_bytes, val;
+       size_t cap_len;
+       dma_addr_t buf_dma;
+
+       if (snf->autofmt) {
+               u32 last_bit;
+               u32 mask;
+
+               dma_len = snf->nfi_cfg.page_size;
+               op_mode = CNFG_AUTO_FMT_EN;
+               if (op->data.ecc)
+                       op_mode |= CNFG_HW_ECC_EN;
+
+               last_bit = fls(snf->nfi_cfg.page_size + snf->nfi_cfg.oob_size);
+               mask = (1 << last_bit) - 1;
+               wr_offset = op_addr & mask;
+               op_addr &= ~mask;
+       }
+       mtk_snand_mac_reset(snf);
+       mtk_nfi_reset(snf);
+
+       if (wr_offset)
+               memset(snf->buf, 0xff, wr_offset);
+
+       cap_len = snf->buf_len - wr_offset;
+       if (op->data.nbytes < cap_len)
+               cap_len = op->data.nbytes;
+       memcpy(snf->buf + wr_offset, op->data.buf.out, cap_len);
+       if (snf->autofmt) {
+               if (snf->caps->bbm_swap) {
+                       mtk_snand_fdm_bm_swap(snf);
+                       mtk_snand_bm_swap(snf, snf->buf);
+               }
+               mtk_snand_write_fdm(snf, snf->buf + snf->nfi_cfg.page_size);
+       }
+
+       // Command
+       nfi_write32(snf, SNF_PG_CTL1, (op->cmd.opcode << PG_LOAD_CMD_S));
+
+       // write address
+       nfi_write32(snf, SNF_PG_CTL2, op_addr);
+
+       // Set read op_mode
+       if (op->data.buswidth == 4)
+               wr_mode = PG_LOAD_X4_EN;
+
+       nfi_rmw32(snf, SNF_MISC_CTL, PG_LOAD_X4_EN,
+                 wr_mode | PG_LOAD_CUSTOM_EN);
+
+       // Set bytes to write
+       wr_bytes = (snf->nfi_cfg.spare_size + snf->caps->sector_size) *
+                  snf->nfi_cfg.nsectors;
+       nfi_write32(snf, SNF_MISC_CTL2,
+                   (wr_bytes << PROGRAM_LOAD_BYTE_NUM_S) | wr_bytes);
+
+       // NFI write prepare
+       nfi_write16(snf, NFI_CNFG,
+                   (CNFG_OP_MODE_PROGRAM << CNFG_OP_MODE_S) |
+                           CNFG_DMA_BURST_EN | CNFG_DMA_MODE | op_mode);
+
+       nfi_write32(snf, NFI_CON, (snf->nfi_cfg.nsectors << CON_SEC_NUM_S));
+       buf_dma = dma_map_single(snf->dev, snf->buf, dma_len, DMA_TO_DEVICE);
+       ret = dma_mapping_error(snf->dev, buf_dma);
+       if (ret) {
+               dev_err(snf->dev, "DMA mapping failed.\n");
+               goto cleanup;
+       }
+       nfi_write32(snf, NFI_STRADDR, buf_dma);
+       if (op->data.ecc) {
+               snf->ecc_cfg->op = ECC_ENCODE;
+               ret = mtk_ecc_enable(snf->ecc, snf->ecc_cfg);
+               if (ret)
+                       goto cleanup_dma;
+       }
+       // Prepare for custom write interrupt
+       nfi_write32(snf, NFI_INTR_EN, NFI_IRQ_INTR_EN | NFI_IRQ_CUS_PG);
+       reinit_completion(&snf->op_done);
+       ;
+
+       // Trigger NFI into custom mode
+       nfi_write16(snf, NFI_CMD, NFI_CMD_DUMMY_WRITE);
+
+       // Start DMA write
+       nfi_rmw32(snf, NFI_CON, 0, CON_BWR);
+       nfi_write16(snf, NFI_STRDATA, STR_DATA);
+
+       if (!wait_for_completion_timeout(
+                   &snf->op_done, usecs_to_jiffies(SNFI_POLL_INTERVAL))) {
+               dev_err(snf->dev, "DMA timed out for program load.\n");
+               ret = -ETIMEDOUT;
+               goto cleanup_ecc;
+       }
+
+       // Wait for NFI_SEC_CNTR returning expected value
+       ret = readl_poll_timeout(snf->nfi_base + NFI_ADDRCNTR, val,
+                                NFI_SEC_CNTR(val) >= snf->nfi_cfg.nsectors, 0,
+                                SNFI_POLL_INTERVAL);
+       if (ret)
+               dev_err(snf->dev, "Timed out waiting for NFI_SEC_CNTR\n");
+
+cleanup_ecc:
+       if (op->data.ecc)
+               mtk_ecc_disable(snf->ecc);
+cleanup_dma:
+       dma_unmap_single(snf->dev, buf_dma, dma_len, DMA_TO_DEVICE);
+cleanup:
+       // Stop write
+       nfi_write32(snf, NFI_CON, 0);
+       nfi_write16(snf, NFI_CNFG, 0);
+
+       // Clear SNF done flag
+       nfi_rmw32(snf, SNF_STA_CTL1, 0, CUS_PG_DONE);
+       nfi_write32(snf, SNF_STA_CTL1, 0);
+
+       // Disable interrupt
+       nfi_read32(snf, NFI_INTR_STA);
+       nfi_write32(snf, NFI_INTR_EN, 0);
+
+       nfi_rmw32(snf, SNF_MISC_CTL, PG_LOAD_CUSTOM_EN, 0);
+
+       return ret;
+}
+
+/**
+ * mtk_snand_is_page_ops() - check if the op is a controller supported page op.
+ * @op spi-mem op to check
+ *
+ * Check whether op can be executed with read_from_cache or program_load
+ * mode in the controller.
+ * This controller can execute typical Read From Cache and Program Load
+ * instructions found on SPI-NAND with 2-byte address.
+ * DTR and cmd buswidth & nbytes should be checked before calling this.
+ *
+ * Return: true if the op matches the instruction template
+ */
+static bool mtk_snand_is_page_ops(const struct spi_mem_op *op)
+{
+       if (op->addr.nbytes != 2)
+               return false;
+
+       if (op->addr.buswidth != 1 && op->addr.buswidth != 2 &&
+           op->addr.buswidth != 4)
+               return false;
+
+       // match read from page instructions
+       if (op->data.dir == SPI_MEM_DATA_IN) {
+               // check dummy cycle first
+               if (op->dummy.nbytes * BITS_PER_BYTE / op->dummy.buswidth >
+                   DATA_READ_MAX_DUMMY)
+                       return false;
+               // quad io / quad out
+               if ((op->addr.buswidth == 4 || op->addr.buswidth == 1) &&
+                   op->data.buswidth == 4)
+                       return true;
+
+               // dual io / dual out
+               if ((op->addr.buswidth == 2 || op->addr.buswidth == 1) &&
+                   op->data.buswidth == 2)
+                       return true;
+
+               // standard spi
+               if (op->addr.buswidth == 1 && op->data.buswidth == 1)
+                       return true;
+       } else if (op->data.dir == SPI_MEM_DATA_OUT) {
+               // check dummy cycle first
+               if (op->dummy.nbytes)
+                       return false;
+               // program load quad out
+               if (op->addr.buswidth == 1 && op->data.buswidth == 4)
+                       return true;
+               // standard spi
+               if (op->addr.buswidth == 1 && op->data.buswidth == 1)
+                       return true;
+       }
+       return false;
+}
+
+static bool mtk_snand_supports_op(struct spi_mem *mem,
+                                 const struct spi_mem_op *op)
+{
+       if (!spi_mem_default_supports_op(mem, op))
+               return false;
+       if (op->cmd.nbytes != 1 || op->cmd.buswidth != 1)
+               return false;
+       if (mtk_snand_is_page_ops(op))
+               return true;
+       return ((op->addr.nbytes == 0 || op->addr.buswidth == 1) &&
+               (op->dummy.nbytes == 0 || op->dummy.buswidth == 1) &&
+               (op->data.nbytes == 0 || op->data.buswidth == 1));
+}
+
+static int mtk_snand_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+       struct mtk_snand *ms = spi_controller_get_devdata(mem->spi->master);
+       // page ops transfer size must be exactly ((sector_size + spare_size) *
+       // nsectors). Limit the op size if the caller requests more than that.
+       // exec_op will read more than needed and discard the leftover if the
+       // caller requests less data.
+       if (mtk_snand_is_page_ops(op)) {
+               size_t l;
+               // skip adjust_op_size for page ops
+               if (ms->autofmt)
+                       return 0;
+               l = ms->caps->sector_size + ms->nfi_cfg.spare_size;
+               l *= ms->nfi_cfg.nsectors;
+               if (op->data.nbytes > l)
+                       op->data.nbytes = l;
+       } else {
+               size_t hl = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
+
+               if (hl >= SNF_GPRAM_SIZE)
+                       return -EOPNOTSUPP;
+               if (op->data.nbytes > SNF_GPRAM_SIZE - hl)
+                       op->data.nbytes = SNF_GPRAM_SIZE - hl;
+       }
+       return 0;
+}
+
+static int mtk_snand_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+       struct mtk_snand *ms = spi_controller_get_devdata(mem->spi->master);
+
+       dev_dbg(ms->dev, "OP %02x ADDR %08llX@%d:%u DATA %d:%u", op->cmd.opcode,
+               op->addr.val, op->addr.buswidth, op->addr.nbytes,
+               op->data.buswidth, op->data.nbytes);
+       if (mtk_snand_is_page_ops(op)) {
+               if (op->data.dir == SPI_MEM_DATA_IN)
+                       return mtk_snand_read_page_cache(ms, op);
+               else
+                       return mtk_snand_write_page_cache(ms, op);
+       } else {
+               return mtk_snand_mac_io(ms, op);
+       }
+}
+
+static const struct spi_controller_mem_ops mtk_snand_mem_ops = {
+       .adjust_op_size = mtk_snand_adjust_op_size,
+       .supports_op = mtk_snand_supports_op,
+       .exec_op = mtk_snand_exec_op,
+};
+
+static const struct spi_controller_mem_caps mtk_snand_mem_caps = {
+       .ecc = true,
+};
+
+static irqreturn_t mtk_snand_irq(int irq, void *id)
+{
+       struct mtk_snand *snf = id;
+       u32 sta, ien;
+
+       sta = nfi_read32(snf, NFI_INTR_STA);
+       ien = nfi_read32(snf, NFI_INTR_EN);
+
+       if (!(sta & ien))
+               return IRQ_NONE;
+
+       nfi_write32(snf, NFI_INTR_EN, 0);
+       complete(&snf->op_done);
+       return IRQ_HANDLED;
+}
+
+static const struct of_device_id mtk_snand_ids[] = {
+       { .compatible = "mediatek,mt7622-snand", .data = &mt7622_snand_caps },
+       { .compatible = "mediatek,mt7629-snand", .data = &mt7629_snand_caps },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, mtk_snand_ids);
+
+static int mtk_snand_enable_clk(struct mtk_snand *ms)
+{
+       int ret;
+
+       ret = clk_prepare_enable(ms->nfi_clk);
+       if (ret) {
+               dev_err(ms->dev, "unable to enable nfi clk\n");
+               return ret;
+       }
+       ret = clk_prepare_enable(ms->pad_clk);
+       if (ret) {
+               dev_err(ms->dev, "unable to enable pad clk\n");
+               goto err1;
+       }
+       return 0;
+err1:
+       clk_disable_unprepare(ms->nfi_clk);
+       return ret;
+}
+
+static void mtk_snand_disable_clk(struct mtk_snand *ms)
+{
+       clk_disable_unprepare(ms->pad_clk);
+       clk_disable_unprepare(ms->nfi_clk);
+}
+
+static int mtk_snand_probe(struct platform_device *pdev)
+{
+       struct device_node *np = pdev->dev.of_node;
+       const struct of_device_id *dev_id;
+       struct spi_controller *ctlr;
+       struct mtk_snand *ms;
+       int ret;
+
+       dev_id = of_match_node(mtk_snand_ids, np);
+       if (!dev_id)
+               return -EINVAL;
+
+       ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*ms));
+       if (!ctlr)
+               return -ENOMEM;
+       platform_set_drvdata(pdev, ctlr);
+
+       ms = spi_controller_get_devdata(ctlr);
+
+       ms->ctlr = ctlr;
+       ms->caps = dev_id->data;
+
+       ms->ecc = of_mtk_ecc_get(np);
+       if (IS_ERR(ms->ecc))
+               return PTR_ERR(ms->ecc);
+       else if (!ms->ecc)
+               return -ENODEV;
+
+       ms->nfi_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(ms->nfi_base)) {
+               ret = PTR_ERR(ms->nfi_base);
+               goto release_ecc;
+       }
+
+       ms->dev = &pdev->dev;
+
+       ms->nfi_clk = devm_clk_get(&pdev->dev, "nfi_clk");
+       if (IS_ERR(ms->nfi_clk)) {
+               ret = PTR_ERR(ms->nfi_clk);
+               dev_err(&pdev->dev, "unable to get nfi_clk, err = %d\n", ret);
+               goto release_ecc;
+       }
+
+       ms->pad_clk = devm_clk_get(&pdev->dev, "pad_clk");
+       if (IS_ERR(ms->pad_clk)) {
+               ret = PTR_ERR(ms->pad_clk);
+               dev_err(&pdev->dev, "unable to get pad_clk, err = %d\n", ret);
+               goto release_ecc;
+       }
+
+       ret = mtk_snand_enable_clk(ms);
+       if (ret)
+               goto release_ecc;
+
+       init_completion(&ms->op_done);
+
+       ms->irq = platform_get_irq(pdev, 0);
+       if (ms->irq < 0) {
+               ret = ms->irq;
+               goto disable_clk;
+       }
+       ret = devm_request_irq(ms->dev, ms->irq, mtk_snand_irq, 0x0,
+                              "mtk-snand", ms);
+       if (ret) {
+               dev_err(ms->dev, "failed to request snfi irq\n");
+               goto disable_clk;
+       }
+
+       ret = dma_set_mask(ms->dev, DMA_BIT_MASK(32));
+       if (ret) {
+               dev_err(ms->dev, "failed to set dma mask\n");
+               goto disable_clk;
+       }
+
+       // switch to SNFI mode
+       nfi_write32(ms, SNF_CFG, SPI_MODE);
+
+       // setup an initial page format for ops matching page_cache_op template
+       // before ECC is called.
+       ret = mtk_snand_setup_pagefmt(ms, ms->caps->sector_size,
+                                     ms->caps->spare_sizes[0]);
+       if (ret) {
+               dev_err(ms->dev, "failed to set initial page format\n");
+               goto disable_clk;
+       }
+
+       // setup ECC engine
+       ms->ecc_eng.dev = &pdev->dev;
+       ms->ecc_eng.integration = NAND_ECC_ENGINE_INTEGRATION_PIPELINED;
+       ms->ecc_eng.ops = &mtk_snfi_ecc_engine_ops;
+       ms->ecc_eng.priv = ms;
+
+       ret = nand_ecc_register_on_host_hw_engine(&ms->ecc_eng);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to register ecc engine.\n");
+               goto disable_clk;
+       }
+
+       ctlr->num_chipselect = 1;
+       ctlr->mem_ops = &mtk_snand_mem_ops;
+       ctlr->mem_caps = &mtk_snand_mem_caps;
+       ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+       ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD;
+       ctlr->dev.of_node = pdev->dev.of_node;
+       ret = spi_register_controller(ctlr);
+       if (ret) {
+               dev_err(&pdev->dev, "spi_register_controller failed.\n");
+               goto disable_clk;
+       }
+
+       return 0;
+disable_clk:
+       mtk_snand_disable_clk(ms);
+release_ecc:
+       mtk_ecc_release(ms->ecc);
+       return ret;
+}
+
+static int mtk_snand_remove(struct platform_device *pdev)
+{
+       struct spi_controller *ctlr = platform_get_drvdata(pdev);
+       struct mtk_snand *ms = spi_controller_get_devdata(ctlr);
+
+       spi_unregister_controller(ctlr);
+       mtk_snand_disable_clk(ms);
+       mtk_ecc_release(ms->ecc);
+       kfree(ms->buf);
+       return 0;
+}
+
+static struct platform_driver mtk_snand_driver = {
+       .probe = mtk_snand_probe,
+       .remove = mtk_snand_remove,
+       .driver = {
+               .name = "mtk-snand",
+               .of_match_table = mtk_snand_ids,
+       },
+};
+
+module_platform_driver(mtk_snand_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chuanhong Guo <gch981213@gmail.com>");
+MODULE_DESCRIPTION("MeidaTek SPI-NAND Flash Controller Driver");
index 435309b09227ea3297651a51531ab429538b48e3..55178579f3c6fb73eaa0fd896d687f9398bf7ad6 100644 (file)
@@ -605,9 +605,8 @@ static int mxs_spi_probe(struct platform_device *pdev)
                }
        }
 
-       ret = pm_runtime_get_sync(ssp->dev);
+       ret = pm_runtime_resume_and_get(ssp->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(ssp->dev);
                dev_err(ssp->dev, "runtime_get_sync failed\n");
                goto out_pm_runtime_disable;
        }
index 60c9cdf1c94b455023567226cf190cd1a4862444..c42e59df38fedf3af18bf866382a8d4bb1499276 100644 (file)
@@ -246,9 +246,8 @@ static void omap2_mcspi_set_cs(struct spi_device *spi, bool enable)
                enable = !enable;
 
        if (spi->controller_state) {
-               int err = pm_runtime_get_sync(mcspi->dev);
+               int err = pm_runtime_resume_and_get(mcspi->dev);
                if (err < 0) {
-                       pm_runtime_put_noidle(mcspi->dev);
                        dev_err(mcspi->dev, "failed to get sync: %d\n", err);
                        return;
                }
@@ -758,6 +757,8 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
                                dev_vdbg(&spi->dev, "read-%d %02x\n",
                                                word_len, *(rx - 1));
                        }
+                       /* Add word delay between each word */
+                       spi_delay_exec(&xfer->word_delay, xfer);
                } while (c);
        } else if (word_len <= 16) {
                u16             *rx;
@@ -805,6 +806,8 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
                                dev_vdbg(&spi->dev, "read-%d %04x\n",
                                                word_len, *(rx - 1));
                        }
+                       /* Add word delay between each word */
+                       spi_delay_exec(&xfer->word_delay, xfer);
                } while (c >= 2);
        } else if (word_len <= 32) {
                u32             *rx;
@@ -852,6 +855,8 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer)
                                dev_vdbg(&spi->dev, "read-%d %08x\n",
                                                word_len, *(rx - 1));
                        }
+                       /* Add word delay between each word */
+                       spi_delay_exec(&xfer->word_delay, xfer);
                } while (c >= 4);
        }
 
@@ -1068,9 +1073,8 @@ static int omap2_mcspi_setup(struct spi_device *spi)
                initial_setup = true;
        }
 
-       ret = pm_runtime_get_sync(mcspi->dev);
+       ret = pm_runtime_resume_and_get(mcspi->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(mcspi->dev);
                if (initial_setup)
                        omap2_mcspi_cleanup(spi);
 
@@ -1317,12 +1321,9 @@ static int omap2_mcspi_controller_setup(struct omap2_mcspi *mcspi)
        struct omap2_mcspi_regs *ctx = &mcspi->ctx;
        int                     ret = 0;
 
-       ret = pm_runtime_get_sync(mcspi->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(mcspi->dev);
-
+       ret = pm_runtime_resume_and_get(mcspi->dev);
+       if (ret < 0)
                return ret;
-       }
 
        mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE,
                        OMAP2_MCSPI_WAKEUPENABLE_WKEN);
index cdc16eecaf6b581ec11735d33bd5de6a6f11b3b8..a08215eb9e1486c84aad623767c3a1c2f3ebfd73 100644 (file)
@@ -196,6 +196,8 @@ struct rockchip_spi {
 
        bool slave_abort;
        bool cs_inactive; /* spi slave tansmition stop when cs inactive */
+       bool cs_high_supported; /* native CS supports active-high polarity */
+
        struct spi_transfer *xfer; /* Store xfer temporarily */
 };
 
@@ -719,6 +721,11 @@ static int rockchip_spi_setup(struct spi_device *spi)
        struct rockchip_spi *rs = spi_controller_get_devdata(spi->controller);
        u32 cr0;
 
+       if (!spi->cs_gpiod && (spi->mode & SPI_CS_HIGH) && !rs->cs_high_supported) {
+               dev_warn(&spi->dev, "setup: non GPIO CS can't be active-high\n");
+               return -EINVAL;
+       }
+
        pm_runtime_get_sync(rs->dev);
 
        cr0 = readl_relaxed(rs->regs + ROCKCHIP_SPI_CTRLR0);
@@ -899,6 +906,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 
        switch (readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION)) {
        case ROCKCHIP_SPI_VER2_TYPE2:
+               rs->cs_high_supported = true;
                ctlr->mode_bits |= SPI_CS_HIGH;
                if (ctlr->can_dma && slave_mode)
                        rs->cs_inactive = true;
index bd5708d7e5a15b7bbc079d9b9416b48fe8f7bcc3..7a014eeec2d0d9a2072a4190042d14ffe14bf454 100644 (file)
@@ -1108,14 +1108,11 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev,
        }
 
        memset(&cfg, 0, sizeof(cfg));
+       cfg.dst_addr = port_addr + RSPI_SPDR;
+       cfg.src_addr = port_addr + RSPI_SPDR;
+       cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+       cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
        cfg.direction = dir;
-       if (dir == DMA_MEM_TO_DEV) {
-               cfg.dst_addr = port_addr;
-               cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-       } else {
-               cfg.src_addr = port_addr;
-               cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-       }
 
        ret = dmaengine_slave_config(chan, &cfg);
        if (ret) {
@@ -1146,12 +1143,12 @@ static int rspi_request_dma(struct device *dev, struct spi_controller *ctlr,
        }
 
        ctlr->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id,
-                                            res->start + RSPI_SPDR);
+                                            res->start);
        if (!ctlr->dma_tx)
                return -ENODEV;
 
        ctlr->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id,
-                                            res->start + RSPI_SPDR);
+                                            res->start);
        if (!ctlr->dma_rx) {
                dma_release_channel(ctlr->dma_tx);
                ctlr->dma_tx = NULL;
index 28e70db9bbba852d40ab53d211aada4353cf93a2..65b8075da4eb50a2a1a57a8f474a9f4c1ee4eb5e 100644 (file)
@@ -1008,9 +1008,8 @@ static int sprd_spi_remove(struct platform_device *pdev)
        struct sprd_spi *ss = spi_controller_get_devdata(sctlr);
        int ret;
 
-       ret = pm_runtime_get_sync(ss->dev);
+       ret = pm_runtime_resume_and_get(ss->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(ss->dev);
                dev_err(ss->dev, "failed to resume SPI controller\n");
                return ret;
        }
index ffdc55f87e8210bb36da18e9658214eba006b6c5..c0239e405c392218e174b08fabbde8c9199a69a9 100644 (file)
@@ -305,10 +305,8 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
        u32 cr, sr;
        int err = 0;
 
-       if (!op->data.nbytes)
-               goto wait_nobusy;
-
-       if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF)
+       if ((readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) ||
+           qspi->fmode == CCR_FMODE_APM)
                goto out;
 
        reinit_completion(&qspi->data_completion);
@@ -327,7 +325,6 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
 out:
        /* clear flags */
        writel_relaxed(FCR_CTCF | FCR_CTEF, qspi->io_base + QSPI_FCR);
-wait_nobusy:
        if (!err)
                err = stm32_qspi_wait_nobusy(qspi);
 
@@ -372,10 +369,6 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
                op->dummy.buswidth, op->data.buswidth,
                op->addr.val, op->data.nbytes);
 
-       err = stm32_qspi_wait_nobusy(qspi);
-       if (err)
-               goto abort;
-
        cr = readl_relaxed(qspi->io_base + QSPI_CR);
        cr &= ~CR_PRESC_MASK & ~CR_FSEL;
        cr |= FIELD_PREP(CR_PRESC_MASK, flash->presc);
@@ -463,11 +456,9 @@ static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *
        if (!spi_mem_supports_op(mem, op))
                return -EOPNOTSUPP;
 
-       ret = pm_runtime_get_sync(qspi->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(qspi->dev);
+       ret = pm_runtime_resume_and_get(qspi->dev);
+       if (ret < 0)
                return ret;
-       }
 
        mutex_lock(&qspi->lock);
 
@@ -490,11 +481,9 @@ static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
        struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
        int ret;
 
-       ret = pm_runtime_get_sync(qspi->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(qspi->dev);
+       ret = pm_runtime_resume_and_get(qspi->dev);
+       if (ret < 0)
                return ret;
-       }
 
        mutex_lock(&qspi->lock);
        if (op->data.dir == SPI_MEM_DATA_IN && op->data.nbytes)
@@ -536,11 +525,9 @@ static ssize_t stm32_qspi_dirmap_read(struct spi_mem_dirmap_desc *desc,
        u32 addr_max;
        int ret;
 
-       ret = pm_runtime_get_sync(qspi->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(qspi->dev);
+       ret = pm_runtime_resume_and_get(qspi->dev);
+       if (ret < 0)
                return ret;
-       }
 
        mutex_lock(&qspi->lock);
        /* make a local copy of desc op_tmpl and complete dirmap rdesc
@@ -583,11 +570,9 @@ static int stm32_qspi_setup(struct spi_device *spi)
        if (!spi->max_speed_hz)
                return -EINVAL;
 
-       ret = pm_runtime_get_sync(qspi->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(qspi->dev);
+       ret = pm_runtime_resume_and_get(qspi->dev);
+       if (ret < 0)
                return ret;
-       }
 
        presc = DIV_ROUND_UP(qspi->clk_rate, spi->max_speed_hz) - 1;
 
@@ -851,11 +836,9 @@ static int __maybe_unused stm32_qspi_resume(struct device *dev)
 
        pinctrl_pm_select_default_state(dev);
 
-       ret = pm_runtime_get_sync(dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(dev);
+       ret = pm_runtime_resume_and_get(dev);
+       if (ret < 0)
                return ret;
-       }
 
        writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
        writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
index a6adc20f686263a2091199195f1605805e3de762..6fe617b445a595ec06312936406eb60f2644cc1b 100644 (file)
@@ -2000,9 +2000,8 @@ static int __maybe_unused stm32_spi_resume(struct device *dev)
                return ret;
        }
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(dev);
                dev_err(dev, "Unable to power device:%d\n", ret);
                return ret;
        }
index f989f7b99296175a0b4380b0f67338d31007e173..f1fa88777575dfd670dbc1d3458a31df10a19e45 100644 (file)
@@ -85,8 +85,6 @@ struct sp7021_spi_ctlr {
        int s_irq;
        struct clk *spi_clk;
        struct reset_control *rstc;
-       // irq spin lock
-       spinlock_t lock;
        // data xfer lock
        struct mutex buf_lock;
        struct completion isr_done;
@@ -199,8 +197,6 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
        if (tx_len == 0 && total_len == 0)
                return IRQ_NONE;
 
-       spin_lock_irq(&pspim->lock);
-
        rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status);
        if (fd_status & SP7021_RX_FULL_FLAG)
                rx_cnt = pspim->data_unit;
@@ -239,7 +235,6 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
 
        if (isrdone)
                complete(&pspim->isr_done);
-       spin_unlock_irq(&pspim->lock);
        return IRQ_HANDLED;
 }
 
@@ -446,7 +441,6 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev)
        pspim->mode = mode;
        pspim->ctlr = ctlr;
        pspim->dev = dev;
-       spin_lock_init(&pspim->lock);
        mutex_init(&pspim->buf_lock);
        init_completion(&pspim->isr_done);
        init_completion(&pspim->slave_isr);
index 8f345247a8c3221417c2f9df498b4d39a2727a40..d9be80e3e1bcb52581af65c090b8de0b9ccc509c 100644 (file)
@@ -964,9 +964,8 @@ static int tegra_spi_setup(struct spi_device *spi)
                spi->controller_data = cdata;
        }
 
-       ret = pm_runtime_get_sync(tspi->dev);
+       ret = pm_runtime_resume_and_get(tspi->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(tspi->dev);
                dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret);
                if (cdata)
                        tegra_spi_cleanup(spi);
@@ -1394,10 +1393,9 @@ static int tegra_spi_probe(struct platform_device *pdev)
                        goto exit_pm_disable;
        }
 
-       ret = pm_runtime_get_sync(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
        if (ret < 0) {
                dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
-               pm_runtime_put_noidle(&pdev->dev);
                goto exit_pm_disable;
        }
 
@@ -1476,9 +1474,8 @@ static int tegra_spi_resume(struct device *dev)
        struct tegra_spi_data *tspi = spi_master_get_devdata(master);
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(dev);
                dev_err(dev, "pm runtime failed, e = %d\n", ret);
                return ret;
        }
index 2888d8a8dc6d5cb5f9ede2ae84be14ac03aec0ee..220ee08c4a06cc93eff57303d51c591aeec69aab 100644 (file)
@@ -486,10 +486,9 @@ static int tegra_sflash_probe(struct platform_device *pdev)
                        goto exit_pm_disable;
        }
 
-       ret = pm_runtime_get_sync(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
        if (ret < 0) {
                dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
-               pm_runtime_put_noidle(&pdev->dev);
                goto exit_pm_disable;
        }
 
@@ -549,9 +548,8 @@ static int tegra_sflash_resume(struct device *dev)
        struct tegra_sflash_data *tsd = spi_master_get_devdata(master);
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(dev);
                dev_err(dev, "pm runtime failed, e = %d\n", ret);
                return ret;
        }
index 80c3787deea9de60bcd3be5d6aa3369313aa9511..38360434d6e9e06e7821da53b52eb8b2eba01e4e 100644 (file)
@@ -749,9 +749,8 @@ static int tegra_slink_setup(struct spi_device *spi)
                spi->mode & SPI_CPHA ? "" : "~",
                spi->max_speed_hz);
 
-       ret = pm_runtime_get_sync(tspi->dev);
+       ret = pm_runtime_resume_and_get(tspi->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(tspi->dev);
                dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret);
                return ret;
        }
@@ -1169,9 +1168,8 @@ static int tegra_slink_resume(struct device *dev)
        struct tegra_slink_data *tspi = spi_master_get_devdata(master);
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(dev);
                dev_err(dev, "pm runtime failed, e = %d\n", ret);
                return ret;
        }
index e06aafe169e0cb5333eb800329154c28196253f1..b5b65d882d7adf77b3e283189c51d0205759e34b 100644 (file)
@@ -172,9 +172,8 @@ static int ti_qspi_setup(struct spi_device *spi)
        dev_dbg(qspi->dev, "hz: %d, clock divider %d\n",
                        qspi->spi_max_frequency, clk_div);
 
-       ret = pm_runtime_get_sync(qspi->dev);
+       ret = pm_runtime_resume_and_get(qspi->dev);
        if (ret < 0) {
-               pm_runtime_put_noidle(qspi->dev);
                dev_err(qspi->dev, "pm_runtime_get_sync() failed\n");
                return ret;
        }
@@ -448,6 +447,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
        enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
        struct dma_async_tx_descriptor *tx;
        int ret;
+       unsigned long time_left;
 
        tx = dmaengine_prep_dma_memcpy(chan, dma_dst, dma_src, len, flags);
        if (!tx) {
@@ -467,9 +467,9 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
        }
 
        dma_async_issue_pending(chan);
-       ret = wait_for_completion_timeout(&qspi->transfer_complete,
+       time_left = wait_for_completion_timeout(&qspi->transfer_complete,
                                          msecs_to_jiffies(len));
-       if (ret <= 0) {
+       if (time_left == 0) {
                dmaengine_terminate_sync(chan);
                dev_err(qspi->dev, "DMA wait_for_completion_timeout\n");
                return -ETIMEDOUT;
index 2e6d6bbeb7842e5def216a589e670c813e5caf42..fe252a8075a7d5e88cd6168bd8d22ee0171d3adc 100644 (file)
@@ -1611,9 +1611,8 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
        mutex_lock(&ctlr->io_mutex);
 
        if (!was_busy && ctlr->auto_runtime_pm) {
-               ret = pm_runtime_get_sync(ctlr->dev.parent);
+               ret = pm_runtime_resume_and_get(ctlr->dev.parent);
                if (ret < 0) {
-                       pm_runtime_put_noidle(ctlr->dev.parent);
                        dev_err(&ctlr->dev, "Failed to power device: %d\n",
                                ret);
                        mutex_unlock(&ctlr->io_mutex);
@@ -3475,7 +3474,7 @@ static int __spi_validate_bits_per_word(struct spi_controller *ctlr,
 int spi_setup(struct spi_device *spi)
 {
        unsigned        bad_bits, ugly_bits;
-       int             status;
+       int             status = 0;
 
        /*
         * Check mode to prevent that any two of DUAL, QUAD and NO_MOSI/MISO
@@ -3518,13 +3517,18 @@ int spi_setup(struct spi_device *spi)
                return -EINVAL;
        }
 
-       if (!spi->bits_per_word)
+       if (!spi->bits_per_word) {
                spi->bits_per_word = 8;
-
-       status = __spi_validate_bits_per_word(spi->controller,
-                                             spi->bits_per_word);
-       if (status)
-               return status;
+       } else {
+               /*
+                * Some controllers may not support the default 8 bits-per-word
+                * so only perform the check when this is explicitly provided.
+                */
+               status = __spi_validate_bits_per_word(spi->controller,
+                                                     spi->bits_per_word);
+               if (status)
+                       return status;
+       }
 
        if (spi->controller->max_speed_hz &&
            (!spi->max_speed_hz ||
@@ -3544,10 +3548,9 @@ int spi_setup(struct spi_device *spi)
        }
 
        if (spi->controller->auto_runtime_pm && spi->controller->set_cs) {
-               status = pm_runtime_get_sync(spi->controller->dev.parent);
+               status = pm_runtime_resume_and_get(spi->controller->dev.parent);
                if (status < 0) {
                        mutex_unlock(&spi->controller->io_mutex);
-                       pm_runtime_put_noidle(spi->controller->dev.parent);
                        dev_err(&spi->controller->dev, "Failed to power device: %d\n",
                                status);
                        return status;
index 53a55171426530cbada4894a4d083c5b49b2fe29..b2775d82d2d7b9af0da870093f5ff7adb74ee0be 100644 (file)
@@ -8,19 +8,18 @@
  */
 
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/ioctl.h>
 #include <linux/fs.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/list.h>
 #include <linux/errno.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/acpi.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/spidev.h>
@@ -46,6 +45,7 @@
 
 static DECLARE_BITMAP(minors, N_SPI_MINORS);
 
+static_assert(N_SPI_MINORS > 0 && N_SPI_MINORS <= 256);
 
 /* Bit masks for spi_device.mode management.  Note that incorrect
  * settings for some settings can cause *lots* of trouble for other
@@ -63,7 +63,8 @@ static DECLARE_BITMAP(minors, N_SPI_MINORS);
                                | SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \
                                | SPI_NO_CS | SPI_READY | SPI_TX_DUAL \
                                | SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL \
-                               | SPI_RX_QUAD | SPI_RX_OCTAL)
+                               | SPI_RX_QUAD | SPI_RX_OCTAL \
+                               | SPI_RX_CPHA_FLIP)
 
 struct spidev_data {
        dev_t                   devt;
@@ -568,19 +569,20 @@ spidev_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 static int spidev_open(struct inode *inode, struct file *filp)
 {
-       struct spidev_data      *spidev;
+       struct spidev_data      *spidev = NULL, *iter;
        int                     status = -ENXIO;
 
        mutex_lock(&device_list_lock);
 
-       list_for_each_entry(spidev, &device_list, device_entry) {
-               if (spidev->devt == inode->i_rdev) {
+       list_for_each_entry(iter, &device_list, device_entry) {
+               if (iter->devt == inode->i_rdev) {
                        status = 0;
+                       spidev = iter;
                        break;
                }
        }
 
-       if (status) {
+       if (!spidev) {
                pr_debug("spidev: nothing for minor %d\n", iminor(inode));
                goto err_find_dev;
        }
@@ -693,25 +695,38 @@ static const struct spi_device_id spidev_spi_ids[] = {
 };
 MODULE_DEVICE_TABLE(spi, spidev_spi_ids);
 
-#ifdef CONFIG_OF
+/*
+ * spidev should never be referenced in DT without a specific compatible string,
+ * it is a Linux implementation thing rather than a description of the hardware.
+ */
+static int spidev_of_check(struct device *dev)
+{
+       if (device_property_match_string(dev, "compatible", "spidev") < 0)
+               return 0;
+
+       dev_err(dev, "spidev listed directly in DT is not supported\n");
+       return -EINVAL;
+}
+
 static const struct of_device_id spidev_dt_ids[] = {
-       { .compatible = "rohm,dh2228fv" },
-       { .compatible = "lineartechnology,ltc2488" },
-       { .compatible = "semtech,sx1301" },
-       { .compatible = "lwn,bk4" },
-       { .compatible = "dh,dhcom-board" },
-       { .compatible = "menlo,m53cpld" },
-       { .compatible = "cisco,spi-petra" },
-       { .compatible = "micron,spi-authenta" },
+       { .compatible = "rohm,dh2228fv", .data = &spidev_of_check },
+       { .compatible = "lineartechnology,ltc2488", .data = &spidev_of_check },
+       { .compatible = "semtech,sx1301", .data = &spidev_of_check },
+       { .compatible = "lwn,bk4", .data = &spidev_of_check },
+       { .compatible = "dh,dhcom-board", .data = &spidev_of_check },
+       { .compatible = "menlo,m53cpld", .data = &spidev_of_check },
+       { .compatible = "cisco,spi-petra", .data = &spidev_of_check },
+       { .compatible = "micron,spi-authenta", .data = &spidev_of_check },
        {},
 };
 MODULE_DEVICE_TABLE(of, spidev_dt_ids);
-#endif
-
-#ifdef CONFIG_ACPI
 
 /* Dummy SPI devices not to be used in production systems */
-#define SPIDEV_ACPI_DUMMY      1
+static int spidev_acpi_check(struct device *dev)
+{
+       dev_warn(dev, "do not use this driver in production systems!\n");
+       return 0;
+}
 
 static const struct acpi_device_id spidev_acpi_ids[] = {
        /*
@@ -720,51 +735,29 @@ static const struct acpi_device_id spidev_acpi_ids[] = {
         * description of the connected peripheral and they should also use
         * a proper driver instead of poking directly to the SPI bus.
         */
-       { "SPT0001", SPIDEV_ACPI_DUMMY },
-       { "SPT0002", SPIDEV_ACPI_DUMMY },
-       { "SPT0003", SPIDEV_ACPI_DUMMY },
+       { "SPT0001", (kernel_ulong_t)&spidev_acpi_check },
+       { "SPT0002", (kernel_ulong_t)&spidev_acpi_check },
+       { "SPT0003", (kernel_ulong_t)&spidev_acpi_check },
        {},
 };
 MODULE_DEVICE_TABLE(acpi, spidev_acpi_ids);
 
-static void spidev_probe_acpi(struct spi_device *spi)
-{
-       const struct acpi_device_id *id;
-
-       if (!has_acpi_companion(&spi->dev))
-               return;
-
-       id = acpi_match_device(spidev_acpi_ids, &spi->dev);
-       if (WARN_ON(!id))
-               return;
-
-       if (id->driver_data == SPIDEV_ACPI_DUMMY)
-               dev_warn(&spi->dev, "do not use this driver in production systems!\n");
-}
-#else
-static inline void spidev_probe_acpi(struct spi_device *spi) {}
-#endif
-
 /*-------------------------------------------------------------------------*/
 
 static int spidev_probe(struct spi_device *spi)
 {
+       int (*match)(struct device *dev);
        struct spidev_data      *spidev;
        int                     status;
        unsigned long           minor;
 
-       /*
-        * spidev should never be referenced in DT without a specific
-        * compatible string, it is a Linux implementation thing
-        * rather than a description of the hardware.
-        */
-       if (spi->dev.of_node && of_device_is_compatible(spi->dev.of_node, "spidev")) {
-               dev_err(&spi->dev, "spidev listed directly in DT is not supported\n");
-               return -EINVAL;
+       match = device_get_match_data(&spi->dev);
+       if (match) {
+               status = match(&spi->dev);
+               if (status)
+                       return status;
        }
 
-       spidev_probe_acpi(spi);
-
        /* Allocate driver data */
        spidev = kzalloc(sizeof(*spidev), GFP_KERNEL);
        if (!spidev)
@@ -832,8 +825,8 @@ static void spidev_remove(struct spi_device *spi)
 static struct spi_driver spidev_spi_driver = {
        .driver = {
                .name =         "spidev",
-               .of_match_table = of_match_ptr(spidev_dt_ids),
-               .acpi_match_table = ACPI_PTR(spidev_acpi_ids),
+               .of_match_table = spidev_dt_ids,
+               .acpi_match_table = spidev_acpi_ids,
        },
        .probe =        spidev_probe,
        .remove =       spidev_remove,
@@ -856,7 +849,6 @@ static int __init spidev_init(void)
         * that will key udev/mdev to add/remove /dev nodes.  Last, register
         * the driver which manages those device numbers.
         */
-       BUILD_BUG_ON(N_SPI_MINORS > 256);
        status = register_chrdev(SPIDEV_MAJOR, "spi", &spidev_fops);
        if (status < 0)
                return status;
index 6fe6a6bab3f465703a82c578ae8365f8d5931be0..ddf6c2a7212bc5993551b9a5bbd07964843e857c 100644 (file)
@@ -3596,10 +3596,7 @@ static int iscsit_send_reject(
 void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
 {
        int ord, cpu;
-       cpumask_t conn_allowed_cpumask;
-
-       cpumask_and(&conn_allowed_cpumask, iscsit_global->allowed_cpumask,
-                   cpu_online_mask);
+       cpumask_var_t conn_allowed_cpumask;
 
        /*
         * bitmap_id is assigned from iscsit_global->ts_bitmap from
@@ -3609,13 +3606,28 @@ void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
         * iSCSI connection's RX/TX threads will be scheduled to
         * execute upon.
         */
-       cpumask_clear(conn->conn_cpumask);
-       ord = conn->bitmap_id % cpumask_weight(&conn_allowed_cpumask);
-       for_each_cpu(cpu, &conn_allowed_cpumask) {
-               if (ord-- == 0) {
-                       cpumask_set_cpu(cpu, conn->conn_cpumask);
-                       return;
+       if (!zalloc_cpumask_var(&conn_allowed_cpumask, GFP_KERNEL)) {
+               ord = conn->bitmap_id % cpumask_weight(cpu_online_mask);
+               for_each_online_cpu(cpu) {
+                       if (ord-- == 0) {
+                               cpumask_set_cpu(cpu, conn->conn_cpumask);
+                               return;
+                       }
+               }
+       } else {
+               cpumask_and(conn_allowed_cpumask, iscsit_global->allowed_cpumask,
+                       cpu_online_mask);
+
+               cpumask_clear(conn->conn_cpumask);
+               ord = conn->bitmap_id % cpumask_weight(conn_allowed_cpumask);
+               for_each_cpu(cpu, conn_allowed_cpumask) {
+                       if (ord-- == 0) {
+                               cpumask_set_cpu(cpu, conn->conn_cpumask);
+                               free_cpumask_var(conn_allowed_cpumask);
+                               return;
+                       }
                }
+               free_cpumask_var(conn_allowed_cpumask);
        }
        /*
         * This should never be reached..
index 0cedcfe207b56b8b5bae41cdac9a876b2227dc78..57b4fd56d92aba1a7a2becb04a83aca60500fb68 100644 (file)
@@ -1137,23 +1137,27 @@ static ssize_t lio_target_wwn_cpus_allowed_list_show(
 static ssize_t lio_target_wwn_cpus_allowed_list_store(
                struct config_item *item, const char *page, size_t count)
 {
-       int ret;
+       int ret = -ENOMEM;
        char *orig;
-       cpumask_t new_allowed_cpumask;
+       cpumask_var_t new_allowed_cpumask;
+
+       if (!zalloc_cpumask_var(&new_allowed_cpumask, GFP_KERNEL))
+               goto out;
 
        orig = kstrdup(page, GFP_KERNEL);
        if (!orig)
-               return -ENOMEM;
+               goto out_free_cpumask;
 
-       cpumask_clear(&new_allowed_cpumask);
-       ret = cpulist_parse(orig, &new_allowed_cpumask);
+       ret = cpulist_parse(orig, new_allowed_cpumask);
+       if (!ret)
+               cpumask_copy(iscsit_global->allowed_cpumask,
+                            new_allowed_cpumask);
 
        kfree(orig);
-       if (ret != 0)
-               return ret;
-
-       cpumask_copy(iscsit_global->allowed_cpumask, &new_allowed_cpumask);
-       return count;
+out_free_cpumask:
+       free_cpumask_var(new_allowed_cpumask);
+out:
+       return ret ? ret : count;
 }
 
 CONFIGFS_ATTR(lio_target_wwn_, cpus_allowed_list);
index 44bb380e7390c7319cd8eaa3ab00c7aa57158685..25f33eb25337c685c6b19461c98c83702b8b6ee9 100644 (file)
@@ -829,28 +829,26 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 }
 
 /*
- * Check if the underlying struct block_device request_queue supports
- * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
- * in ATA and we need to set TPE=1
+ * Check if the underlying struct block_device supports discard and if yes
+ * configure the UNMAP parameters.
  */
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-                                      struct request_queue *q)
+                                      struct block_device *bdev)
 {
-       int block_size = queue_logical_block_size(q);
+       int block_size = bdev_logical_block_size(bdev);
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(bdev))
                return false;
 
        attrib->max_unmap_lba_count =
-               q->limits.max_discard_sectors >> (ilog2(block_size) - 9);
+               bdev_max_discard_sectors(bdev) >> (ilog2(block_size) - 9);
        /*
         * Currently hardcoded to 1 in Linux/SCSI code..
         */
        attrib->max_unmap_block_desc_count = 1;
-       attrib->unmap_granularity = q->limits.discard_granularity / block_size;
-       attrib->unmap_granularity_alignment = q->limits.discard_alignment /
-                                                               block_size;
-       attrib->unmap_zeroes_data = !!(q->limits.max_write_zeroes_sectors);
+       attrib->unmap_granularity = bdev_discard_granularity(bdev) / block_size;
+       attrib->unmap_granularity_alignment =
+               bdev_discard_alignment(bdev) / block_size;
        return true;
 }
 EXPORT_SYMBOL(target_configure_unmap_from_queue);
index 8190b840065f3e9137d6d872dbbdb8c051358e49..e68f1cc8ef98bd56098bdad6680576a4e6167b5b 100644 (file)
@@ -134,10 +134,10 @@ static int fd_configure_device(struct se_device *dev)
         */
        inode = file->f_mapping->host;
        if (S_ISBLK(inode->i_mode)) {
-               struct request_queue *q = bdev_get_queue(I_BDEV(inode));
+               struct block_device *bdev = I_BDEV(inode);
                unsigned long long dev_size;
 
-               fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode));
+               fd_dev->fd_block_size = bdev_logical_block_size(bdev);
                /*
                 * Determine the number of bytes from i_size_read() minus
                 * one (1) logical sector from underlying struct block_device
@@ -150,7 +150,7 @@ static int fd_configure_device(struct se_device *dev)
                        dev_size, div_u64(dev_size, fd_dev->fd_block_size),
                        fd_dev->fd_block_size);
 
-               if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+               if (target_configure_unmap_from_queue(&dev->dev_attrib, bdev))
                        pr_debug("IFILE: BLOCK Discard support available,"
                                 " disabled by default\n");
                /*
@@ -159,7 +159,7 @@ static int fd_configure_device(struct se_device *dev)
                 */
                dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-               if (blk_queue_nonrot(q))
+               if (bdev_nonrot(bdev))
                        dev->dev_attrib.is_nonrot = 1;
        } else {
                if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) {
@@ -558,7 +558,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
                ret = blkdev_issue_discard(bdev,
                                           target_to_linux_sector(dev, lba),
                                           target_to_linux_sector(dev,  nolb),
-                                          GFP_KERNEL, 0);
+                                          GFP_KERNEL);
                if (ret < 0) {
                        pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n",
                                ret);
index 87ede165ddba435ef8eeb3912e9368599e87b0c4..378c80313a0f27a4aef7ece5f6215bcb30c237e1 100644 (file)
@@ -119,7 +119,7 @@ static int iblock_configure_device(struct se_device *dev)
        dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
        dev->dev_attrib.hw_queue_depth = q->nr_requests;
 
-       if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+       if (target_configure_unmap_from_queue(&dev->dev_attrib, bd))
                pr_debug("IBLOCK: BLOCK Discard support available,"
                         " disabled by default\n");
 
@@ -133,7 +133,7 @@ static int iblock_configure_device(struct se_device *dev)
        else
                dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-       if (blk_queue_nonrot(q))
+       if (bdev_nonrot(bd))
                dev->dev_attrib.is_nonrot = 1;
 
        bi = bdev_get_integrity(bd);
@@ -434,7 +434,7 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
        ret = blkdev_issue_discard(bdev,
                                   target_to_linux_sector(dev, lba),
                                   target_to_linux_sector(dev,  nolb),
-                                  GFP_KERNEL, 0);
+                                  GFP_KERNEL);
        if (ret < 0) {
                pr_err("blkdev_issue_discard() failed: %d\n", ret);
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -727,17 +727,16 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 
        if (data_direction == DMA_TO_DEVICE) {
                struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
-               struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd);
                /*
                 * Force writethrough using REQ_FUA if a volatile write cache
                 * is not enabled, or if initiator set the Force Unit Access bit.
                 */
                opf = REQ_OP_WRITE;
                miter_dir = SG_MITER_TO_SG;
-               if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) {
+               if (bdev_fua(ib_dev->ibd_bd)) {
                        if (cmd->se_cmd_flags & SCF_FUA)
                                opf |= REQ_FUA;
-                       else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+                       else if (!bdev_write_cache(ib_dev->ibd_bd))
                                opf |= REQ_FUA;
                }
        } else {
@@ -886,11 +885,7 @@ iblock_parse_cdb(struct se_cmd *cmd)
 
 static bool iblock_get_write_cache(struct se_device *dev)
 {
-       struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
-       struct block_device *bd = ib_dev->ibd_bd;
-       struct request_queue *q = bdev_get_queue(bd);
-
-       return test_bit(QUEUE_FLAG_WC, &q->queue_flags);
+       return bdev_write_cache(IBLOCK_DEV(dev)->ibd_bd);
 }
 
 static const struct target_backend_ops iblock_ops = {
index 60dafe4c581b482aec3f0e544a71716b20b3e7de..bb3fb18b2316d5a6236e7f8ae8035d41f26306fc 100644 (file)
@@ -818,24 +818,8 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b)
 
 static void pscsi_bi_endio(struct bio *bio)
 {
-       bio_put(bio);
-}
-
-static inline struct bio *pscsi_get_bio(int nr_vecs)
-{
-       struct bio *bio;
-       /*
-        * Use bio_malloc() following the comment in for bio -> struct request
-        * in block/blk-core.c:blk_make_request()
-        */
-       bio = bio_kmalloc(GFP_KERNEL, nr_vecs);
-       if (!bio) {
-               pr_err("PSCSI: bio_kmalloc() failed\n");
-               return NULL;
-       }
-       bio->bi_end_io = pscsi_bi_endio;
-
-       return bio;
+       bio_uninit(bio);
+       kfree(bio);
 }
 
 static sense_reason_t
@@ -878,15 +862,12 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                        if (!bio) {
 new_bio:
                                nr_vecs = bio_max_segs(nr_pages);
-                               /*
-                                * Calls bio_kmalloc() and sets bio->bi_end_io()
-                                */
-                               bio = pscsi_get_bio(nr_vecs);
+                               bio = bio_kmalloc(nr_vecs, GFP_KERNEL);
                                if (!bio)
                                        goto fail;
-
-                               if (rw)
-                                       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+                               bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs,
+                                        rw ? REQ_OP_WRITE : REQ_OP_READ);
+                               bio->bi_end_io = pscsi_bi_endio;
 
                                pr_debug("PSCSI: Allocated bio: %p,"
                                        " dir: %s nr_vecs: %d\n", bio,
@@ -912,11 +893,6 @@ new_bio:
                                        goto fail;
                                }
 
-                               /*
-                                * Clear the pointer so that another bio will
-                                * be allocated with pscsi_get_bio() above.
-                                */
-                               bio = NULL;
                                goto new_bio;
                        }
 
index f0c36a1530d579d89a995a7e712e9b5443290466..def8e1a0399c33fd8a847a860c747ca686d6145b 100644 (file)
@@ -28,7 +28,7 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL)        += cpuidle_cooling.o
 # devfreq cooling
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
-obj-$(CONFIG_K3_THERMAL)       += k3_bandgap.o
+obj-$(CONFIG_K3_THERMAL)       += k3_bandgap.o k3_j72xx_bandgap.o
 # platform thermal drivers
 obj-y                          += broadcom/
 obj-$(CONFIG_THERMAL_MMIO)             += thermal_mmio.o
index 1ec57d9ecf5395eb557199204d52b4ea23302536..e9bef5c3414b6fe56cc7be0568cea422f95fc4c9 100644 (file)
@@ -38,7 +38,6 @@ static int bcm2711_get_temp(void *data, int *temp)
        int offset = thermal_zone_get_offset(priv->thermal);
        u32 val;
        int ret;
-       long t;
 
        ret = regmap_read(priv->regmap, AVS_RO_TEMP_STATUS, &val);
        if (ret)
@@ -50,9 +49,7 @@ static int bcm2711_get_temp(void *data, int *temp)
        val &= AVS_RO_TEMP_STATUS_DATA_MSK;
 
        /* Convert a HW code to a temperature reading (millidegree celsius) */
-       t = slope * val + offset;
-
-       *temp = t < 0 ? 0 : t;
+       *temp = slope * val + offset;
 
        return 0;
 }
index 475ce2900771337f5398b21f406a3c9877324222..85ab9edd580cc7151824bcb1e1681e1d63a7259c 100644 (file)
@@ -60,6 +60,9 @@ static int sr_thermal_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENOENT;
+
        sr_thermal->regs = (void __iomem *)devm_memremap(&pdev->dev, res->start,
                                                         resource_size(res),
                                                         MEMREMAP_WB);
index 0bfb8eebd126062a304672c79e7f5ed3e13bd85e..b8151d95a8068b83fb6d1ce8788dd7d3f8ee671d 100644 (file)
@@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
        struct cpufreq_policy *policy;
        unsigned int nr_levels;
 
-       if (!em)
+       if (!em || em_is_artificial(em))
                return false;
 
        policy = cpufreq_cdev->policy;
index 4310cb342a9fb1f58ac362168df270c6e33fc8d3..8c76f9655e5774446fbef0d0e16be75b63f1a015 100644 (file)
@@ -358,28 +358,37 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
        struct thermal_cooling_device *cdev;
        struct device *dev = df->dev.parent;
        struct devfreq_cooling_device *dfc;
+       struct em_perf_domain *em;
+       struct thermal_cooling_device_ops *ops;
        char *name;
        int err, num_opps;
 
-       dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
-       if (!dfc)
+       ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
+       if (!ops)
                return ERR_PTR(-ENOMEM);
 
+       dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
+       if (!dfc) {
+               err = -ENOMEM;
+               goto free_ops;
+       }
+
        dfc->devfreq = df;
 
-       dfc->em_pd = em_pd_get(dev);
-       if (dfc->em_pd) {
-               devfreq_cooling_ops.get_requested_power =
+       em = em_pd_get(dev);
+       if (em && !em_is_artificial(em)) {
+               dfc->em_pd = em;
+               ops->get_requested_power =
                        devfreq_cooling_get_requested_power;
-               devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
-               devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
+               ops->state2power = devfreq_cooling_state2power;
+               ops->power2state = devfreq_cooling_power2state;
 
                dfc->power_ops = dfc_power;
 
                num_opps = em_pd_nr_perf_states(dfc->em_pd);
        } else {
                /* Backward compatibility for drivers which do not use IPA */
-               dev_dbg(dev, "missing EM for cooling device\n");
+               dev_dbg(dev, "missing proper EM for cooling device\n");
 
                num_opps = dev_pm_opp_get_opp_count(dev);
 
@@ -407,8 +416,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
        if (!name)
                goto remove_qos_req;
 
-       cdev = thermal_of_cooling_device_register(np, name, dfc,
-                                                 &devfreq_cooling_ops);
+       cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
        kfree(name);
 
        if (IS_ERR(cdev)) {
@@ -429,6 +437,8 @@ free_table:
        kfree(dfc->freq_table);
 free_dfc:
        kfree(dfc);
+free_ops:
+       kfree(ops);
 
        return ERR_PTR(err);
 }
@@ -510,11 +520,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
        struct devfreq_cooling_device *dfc;
+       const struct thermal_cooling_device_ops *ops;
        struct device *dev;
 
        if (IS_ERR_OR_NULL(cdev))
                return;
 
+       ops = cdev->ops;
        dfc = cdev->devdata;
        dev = dfc->devfreq->dev.parent;
 
@@ -525,5 +537,6 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
        kfree(dfc->freq_table);
        kfree(dfc);
+       kfree(ops);
 }
 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
index 9a21ac0ceb1124ebc1be16a6f5b4e272b228447b..b29ab09040d51591dd75fef43e0f062b2df3c9b9 100644 (file)
@@ -629,7 +629,6 @@ static int hisi_thermal_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int hisi_thermal_suspend(struct device *dev)
 {
        struct hisi_thermal_data *data = dev_get_drvdata(dev);
@@ -651,15 +650,14 @@ static int hisi_thermal_resume(struct device *dev)
 
        return ret;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(hisi_thermal_pm_ops,
+static DEFINE_SIMPLE_DEV_PM_OPS(hisi_thermal_pm_ops,
                         hisi_thermal_suspend, hisi_thermal_resume);
 
 static struct platform_driver hisi_thermal_driver = {
        .driver = {
                .name           = "hisi_thermal",
-               .pm             = &hisi_thermal_pm_ops,
+               .pm             = pm_sleep_ptr(&hisi_thermal_pm_ops),
                .of_match_table = of_hisi_thermal_match,
        },
        .probe  = hisi_thermal_probe,
index 8d76dbfde6a9ff425f3e24b2c6e388bfad5649ba..331a241eb0ef315606b24bb7c841706d21c710b1 100644 (file)
@@ -94,8 +94,8 @@ static int imx_sc_thermal_probe(struct platform_device *pdev)
                sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL);
                if (!sensor) {
                        of_node_put(child);
-                       of_node_put(sensor_np);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto put_node;
                }
 
                ret = thermal_zone_of_get_sensor_id(child,
@@ -124,7 +124,9 @@ static int imx_sc_thermal_probe(struct platform_device *pdev)
                        dev_warn(&pdev->dev, "failed to add hwmon sysfs attributes\n");
        }
 
+put_node:
        of_node_put(sensor_np);
+       of_node_put(np);
 
        return ret;
 }
index d97f496bab9be96416499092e9bc042af160c69a..80d4e0676083ac56ade7bea96834203f1d95eed0 100644 (file)
@@ -169,37 +169,53 @@ static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enab
        acpi_status status;
        int result = 0;
        struct acpi_osc_context context = {
-               .uuid_str = NULL,
+               .uuid_str = uuid_str,
                .rev = 1,
                .cap.length = 8,
+               .cap.pointer = buf,
        };
 
-       context.uuid_str = uuid_str;
-
        buf[OSC_QUERY_DWORD] = 0;
        buf[OSC_SUPPORT_DWORD] = *enable;
 
-       context.cap.pointer = buf;
-
        status = acpi_run_osc(handle, &context);
        if (ACPI_SUCCESS(status)) {
                ret = *((u32 *)(context.ret.pointer + 4));
                if (ret != *enable)
                        result = -EPERM;
+
+               kfree(context.ret.pointer);
        } else
                result = -EPERM;
 
-       kfree(context.ret.pointer);
-
        return result;
 }
 
+static int set_os_uuid_mask(struct int3400_thermal_priv *priv, u32 mask)
+{
+       int cap = 0;
+
+       /*
+        * Capability bits:
+        * Bit 0: set to 1 to indicate DPTF is active
+        * Bi1 1: set to 1 to active cooling is supported by user space daemon
+        * Bit 2: set to 1 to passive cooling is supported by user space daemon
+        * Bit 3: set to 1 to critical trip is handled by user space daemon
+        */
+       if (mask)
+               cap = (priv->os_uuid_mask << 1) | 0x01;
+
+       return int3400_thermal_run_osc(priv->adev->handle,
+                                      "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+                                      &cap);
+}
+
 static ssize_t current_uuid_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
 {
        struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
-       int i;
+       int ret, i;
 
        for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) {
                if (!strncmp(buf, int3400_thermal_uuids[i],
@@ -231,19 +247,7 @@ static ssize_t current_uuid_store(struct device *dev,
        }
 
        if (priv->os_uuid_mask) {
-               int cap, ret;
-
-               /*
-                * Capability bits:
-                * Bit 0: set to 1 to indicate DPTF is active
-                * Bi1 1: set to 1 to active cooling is supported by user space daemon
-                * Bit 2: set to 1 to passive cooling is supported by user space daemon
-                * Bit 3: set to 1 to critical trip is handled by user space daemon
-                */
-               cap = ((priv->os_uuid_mask << 1) | 0x01);
-               ret = int3400_thermal_run_osc(priv->adev->handle,
-                                             "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
-                                             &cap);
+               ret = set_os_uuid_mask(priv, priv->os_uuid_mask);
                if (ret)
                        return ret;
        }
@@ -469,17 +473,26 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
        if (mode != thermal->mode) {
                int enabled;
 
+               enabled = mode == THERMAL_DEVICE_ENABLED;
+
+               if (priv->os_uuid_mask) {
+                       if (!enabled) {
+                               priv->os_uuid_mask = 0;
+                               result = set_os_uuid_mask(priv, priv->os_uuid_mask);
+                       }
+                       goto eval_odvp;
+               }
+
                if (priv->current_uuid_index < 0 ||
                    priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
                        return -EINVAL;
 
-               enabled = (mode == THERMAL_DEVICE_ENABLED);
                result = int3400_thermal_run_osc(priv->adev->handle,
                                                 int3400_thermal_uuids[priv->current_uuid_index],
                                                 &enabled);
        }
 
-
+eval_odvp:
        evaluate_odvp(priv);
 
        return result;
@@ -508,21 +521,18 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
 
        obj = buffer.pointer;
        if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 1
-           || obj->package.elements[0].type != ACPI_TYPE_BUFFER) {
-               kfree(buffer.pointer);
-               return;
-       }
+           || obj->package.elements[0].type != ACPI_TYPE_BUFFER)
+               goto out_free;
 
        priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
                                   obj->package.elements[0].buffer.length,
                                   GFP_KERNEL);
-       if (!priv->data_vault) {
-               kfree(buffer.pointer);
-               return;
-       }
+       if (!priv->data_vault)
+               goto out_free;
 
        bin_attr_data_vault.private = priv->data_vault;
        bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
+out_free:
        kfree(buffer.pointer);
 }
 
@@ -653,6 +663,7 @@ static const struct acpi_device_id int3400_thermal_match[] = {
        {"INT3400", 0},
        {"INTC1040", 0},
        {"INTC1041", 0},
+       {"INTC1042", 0},
        {"INTC10A0", 0},
        {}
 };
index 07e25321dfe3bd911e0e2a99f4b008163e1f09e8..71d084c4c456dd41df7f13f2f9b51db4aa8f5ffd 100644 (file)
@@ -285,6 +285,7 @@ static const struct acpi_device_id int3403_device_ids[] = {
        {"INT3403", 0},
        {"INTC1043", 0},
        {"INTC1046", 0},
+       {"INTC1062", 0},
        {"INTC10A1", 0},
        {"", 0},
 };
index 730fd121df6ec3582499d94503f0ec89dc89cb1c..a0640f762dc5de9501648550c6a9ab13064d8150 100644 (file)
@@ -243,8 +243,6 @@ static void hfi_update_work_fn(struct work_struct *work)
 
        hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
                                    update_work);
-       if (!hfi_instance)
-               return;
 
        update_capabilities(hfi_instance);
 }
index 527c91f5960be344912f3d846ef8e7b5e52f9327..c1fa2b29b153b81fc7e9d568681ae1d131ad0e69 100644 (file)
@@ -70,8 +70,8 @@ static unsigned int delay_timeout = 100;
 module_param(delay_timeout, int, 0644);
 MODULE_PARM_DESC(delay_timeout, "amount of time delay for each iteration.");
 
-/* Number of iterations for cooling delay, 10 counts by default for now */
-static unsigned int delay_cnt = 10;
+/* Number of iterations for cooling delay, 600 counts by default for now */
+static unsigned int delay_cnt = 600;
 module_param(delay_cnt, int, 0644);
 MODULE_PARM_DESC(delay_cnt, "total number of iterations for time delay.");
 
@@ -193,10 +193,11 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
        return 0;
 }
 
+/* Cool the PCH when it's overheat in .suspend_noirq phase */
 static int pch_wpt_suspend(struct pch_thermal_device *ptd)
 {
        u8 tsel;
-       u8 pch_delay_cnt = 1;
+       int pch_delay_cnt = 0;
        u16 pch_thr_temp, pch_cur_temp;
 
        /* Shutdown the thermal sensor if it is not enabled by BIOS */
@@ -232,26 +233,38 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd)
         * temperature stays above threshold, notify the warning message
         * which helps to indentify the reason why S0ix entry was rejected.
         */
-       while (pch_delay_cnt <= delay_cnt) {
-               if (pch_cur_temp <= pch_thr_temp)
+       while (pch_delay_cnt < delay_cnt) {
+               if (pch_cur_temp < pch_thr_temp)
                        break;
 
-               dev_warn(&ptd->pdev->dev,
+               if (pm_wakeup_pending()) {
+                       dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n");
+                       return 0;
+               }
+
+               pch_delay_cnt++;
+               dev_dbg(&ptd->pdev->dev,
                        "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n",
                        pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout);
                msleep(delay_timeout);
                /* Read the PCH current temperature for next cycle. */
                pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
-               pch_delay_cnt++;
        }
 
-       if (pch_cur_temp > pch_thr_temp)
+       if (pch_cur_temp >= pch_thr_temp)
                dev_warn(&ptd->pdev->dev,
-                       "CPU-PCH is hot [%dC] even after delay, continue to suspend. S0ix might fail\n",
-                       pch_cur_temp);
-       else
-               dev_info(&ptd->pdev->dev,
-                       "CPU-PCH is cool [%dC], continue to suspend\n", pch_cur_temp);
+                       "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n",
+                       pch_cur_temp, pch_delay_cnt * delay_timeout);
+       else {
+               if (pch_delay_cnt)
+                       dev_info(&ptd->pdev->dev,
+                               "CPU-PCH is cool [%dC] after %d ms delay\n",
+                               pch_cur_temp, pch_delay_cnt * delay_timeout);
+               else
+                       dev_info(&ptd->pdev->dev,
+                               "CPU-PCH is cool [%dC]\n",
+                               pch_cur_temp);
+       }
 
        return 0;
 }
@@ -455,7 +468,7 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev)
        pci_disable_device(pdev);
 }
 
-static int intel_pch_thermal_suspend(struct device *device)
+static int intel_pch_thermal_suspend_noirq(struct device *device)
 {
        struct pch_thermal_device *ptd = dev_get_drvdata(device);
 
@@ -495,7 +508,7 @@ static const struct pci_device_id intel_pch_thermal_id[] = {
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
 
 static const struct dev_pm_ops intel_pch_pm_ops = {
-       .suspend = intel_pch_thermal_suspend,
+       .suspend_noirq = intel_pch_thermal_suspend_noirq,
        .resume = intel_pch_thermal_resume,
 };
 
index 35f41e8a0b75930f9cacb3fa549df9b8e83e7502..5d0b3ffc6f46c65943373743e89567e2dcf7fb1f 100644 (file)
@@ -16,6 +16,8 @@
 #include <linux/thermal.h>
 #include <linux/types.h>
 
+#include "thermal_hwmon.h"
+
 #define K3_VTM_DEVINFO_PWR0_OFFSET             0x4
 #define K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK   0xf0
 #define K3_VTM_TMPSENS0_CTRL_OFFSET    0x80
@@ -219,6 +221,9 @@ static int k3_bandgap_probe(struct platform_device *pdev)
                        ret = PTR_ERR(data[id].tzd);
                        goto err_alloc;
                }
+
+               if (devm_thermal_add_hwmon_sysfs(data[id].tzd))
+                       dev_warn(dev, "Failed to add hwmon sysfs attributes\n");
        }
 
        platform_set_drvdata(pdev, bgp);
diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
new file mode 100644 (file)
index 0000000..64e3231
--- /dev/null
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * TI Bandgap temperature sensor driver for J72XX SoC Family
+ *
+ * Copyright (C) 2021 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+#include <linux/math.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#define K3_VTM_DEVINFO_PWR0_OFFSET             0x4
+#define K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK   0xf0
+#define K3_VTM_TMPSENS0_CTRL_OFFSET            0x300
+#define K3_VTM_MISC_CTRL_OFFSET                        0xc
+#define K3_VTM_TMPSENS_STAT_OFFSET             0x8
+#define K3_VTM_ANYMAXT_OUTRG_ALERT_EN          0x1
+#define K3_VTM_MISC_CTRL2_OFFSET               0x10
+#define K3_VTM_TS_STAT_DTEMP_MASK              0x3ff
+#define K3_VTM_MAX_NUM_TS                      8
+#define K3_VTM_TMPSENS_CTRL_SOC                        BIT(5)
+#define K3_VTM_TMPSENS_CTRL_CLRZ               BIT(6)
+#define K3_VTM_TMPSENS_CTRL_CLKON_REQ          BIT(7)
+#define K3_VTM_TMPSENS_CTRL_MAXT_OUTRG_EN      BIT(11)
+
+#define K3_VTM_CORRECTION_TEMP_CNT             3
+
+#define MINUS40CREF                            5
+#define PLUS30CREF                             253
+#define PLUS125CREF                            730
+#define PLUS150CREF                            940
+
+#define TABLE_SIZE                             1024
+#define MAX_TEMP                               123000
+#define COOL_DOWN_TEMP                         105000
+
+#define FACTORS_REDUCTION                      13
+static int *derived_table;
+
+static int compute_value(int index, const s64 *factors, int nr_factors,
+                        int reduction)
+{
+       s64 value = 0;
+       int i;
+
+       for (i = 0; i < nr_factors; i++)
+               value += factors[i] * int_pow(index, i);
+
+       return (int)div64_s64(value, int_pow(10, reduction));
+}
+
+static void init_table(int factors_size, int *table, const s64 *factors)
+{
+       int i;
+
+       for (i = 0; i < TABLE_SIZE; i++)
+               table[i] = compute_value(i, factors, factors_size,
+                                        FACTORS_REDUCTION);
+}
+
+/**
+ * struct err_values - structure containing error/reference values
+ * @refs: reference error values for -40C, 30C, 125C & 150C
+ * @errs: Actual error values for -40C, 30C, 125C & 150C read from the efuse
+ */
+struct err_values {
+       int refs[4];
+       int errs[4];
+};
+
+static void create_table_segments(struct err_values *err_vals, int seg,
+                                 int *ref_table)
+{
+       int m = 0, c, num, den, i, err, idx1, idx2, err1, err2, ref1, ref2;
+
+       if (seg == 0)
+               idx1 = 0;
+       else
+               idx1 = err_vals->refs[seg];
+
+       idx2 = err_vals->refs[seg + 1];
+       err1 = err_vals->errs[seg];
+       err2 = err_vals->errs[seg + 1];
+       ref1 = err_vals->refs[seg];
+       ref2 = err_vals->refs[seg + 1];
+
+       /*
+        * Calculate the slope with adc values read from the register
+        * as the y-axis param and err in adc value as x-axis param
+        */
+       num = ref2 - ref1;
+       den = err2 - err1;
+       if (den)
+               m = num / den;
+       c = ref2 - m * err2;
+
+       /*
+        * Take care of divide by zero error if error values are same
+        * Or when the slope is 0
+        */
+       if (den != 0 && m != 0) {
+               for (i = idx1; i <= idx2; i++) {
+                       err = (i - c) / m;
+                       if (((i + err) < 0) || ((i + err) >= TABLE_SIZE))
+                               continue;
+                       derived_table[i] = ref_table[i + err];
+               }
+       } else { /* Constant error take care of divide by zero */
+               for (i = idx1; i <= idx2; i++) {
+                       if (((i + err1) < 0) || ((i + err1) >= TABLE_SIZE))
+                               continue;
+                       derived_table[i] = ref_table[i + err1];
+               }
+       }
+}
+
+static int prep_lookup_table(struct err_values *err_vals, int *ref_table)
+{
+       int inc, i, seg;
+
+       /*
+        * Fill up the lookup table under 3 segments
+        * region -40C to +30C
+        * region +30C to +125C
+        * region +125C to +150C
+        */
+       for (seg = 0; seg < 3; seg++)
+               create_table_segments(err_vals, seg, ref_table);
+
+       /* Get to the first valid temperature */
+       i = 0;
+       while (!derived_table[i])
+               i++;
+
+       /*
+        * Get to the last zero index and back fill the temperature for
+        * sake of continuity
+        */
+       if (i) {
+               /* 300 milli celsius steps */
+               while (i--)
+                       derived_table[i] = derived_table[i + 1] - 300;
+               /* case 0 */
+               derived_table[i] = derived_table[i + 1] - 300;
+       }
+
+       /*
+        * Fill the last trailing 0s which are unfilled with increments of
+        * 100 milli celsius till 1023 code
+        */
+       i = TABLE_SIZE - 1;
+       while (!derived_table[i])
+               i--;
+
+       i++;
+       inc = 1;
+       while (i < TABLE_SIZE) {
+               derived_table[i] = derived_table[i - 1] + inc * 100;
+               i++;
+       }
+
+       return 0;
+}
+
+struct k3_thermal_data;
+
+struct k3_j72xx_bandgap {
+       struct device *dev;
+       void __iomem *base;
+       void __iomem *cfg2_base;
+       void __iomem *fuse_base;
+       struct k3_thermal_data *ts_data[K3_VTM_MAX_NUM_TS];
+};
+
+/* common data structures */
+struct k3_thermal_data {
+       struct k3_j72xx_bandgap *bgp;
+       u32 ctrl_offset;
+       u32 stat_offset;
+};
+
+static int two_cmp(int tmp, int mask)
+{
+       tmp = ~(tmp);
+       tmp &= mask;
+       tmp += 1;
+
+       /* Return negative value */
+       return (0 - tmp);
+}
+
+static unsigned int vtm_get_best_value(unsigned int s0, unsigned int s1,
+                                      unsigned int s2)
+{
+       int d01 = abs(s0 - s1);
+       int d02 = abs(s0 - s2);
+       int d12 = abs(s1 - s2);
+
+       if (d01 <= d02 && d01 <= d12)
+               return (s0 + s1) / 2;
+
+       if (d02 <= d01 && d02 <= d12)
+               return (s0 + s2) / 2;
+
+       return (s1 + s2) / 2;
+}
+
+static inline int k3_bgp_read_temp(struct k3_thermal_data *devdata,
+                                  int *temp)
+{
+       struct k3_j72xx_bandgap *bgp;
+       unsigned int dtemp, s0, s1, s2;
+
+       bgp = devdata->bgp;
+       /*
+        * Errata is applicable for am654 pg 1.0 silicon/J7ES. There
+        * is a variation of the order for certain degree centigrade on AM654.
+        * Work around that by getting the average of two closest
+        * readings out of three readings everytime we want to
+        * report temperatures.
+        *
+        * Errata workaround.
+        */
+       s0 = readl(bgp->base + devdata->stat_offset) &
+               K3_VTM_TS_STAT_DTEMP_MASK;
+       s1 = readl(bgp->base + devdata->stat_offset) &
+               K3_VTM_TS_STAT_DTEMP_MASK;
+       s2 = readl(bgp->base + devdata->stat_offset) &
+               K3_VTM_TS_STAT_DTEMP_MASK;
+       dtemp = vtm_get_best_value(s0, s1, s2);
+
+       if (dtemp < 0 || dtemp >= TABLE_SIZE)
+               return -EINVAL;
+
+       *temp = derived_table[dtemp];
+
+       return 0;
+}
+
+/* Get temperature callback function for thermal zone */
+static int k3_thermal_get_temp(void *devdata, int *temp)
+{
+       struct k3_thermal_data *data = devdata;
+       int ret = 0;
+
+       ret = k3_bgp_read_temp(data, temp);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
+static const struct thermal_zone_of_device_ops k3_of_thermal_ops = {
+       .get_temp = k3_thermal_get_temp,
+};
+
+static int k3_j72xx_bandgap_temp_to_adc_code(int temp)
+{
+       int low = 0, high = TABLE_SIZE - 1, mid;
+
+       if (temp > 160000 || temp < -50000)
+               return -EINVAL;
+
+       /* Binary search to find the adc code */
+       while (low < (high - 1)) {
+               mid = (low + high) / 2;
+               if (temp <= derived_table[mid])
+                       high = mid;
+               else
+                       low = mid;
+       }
+
+       return mid;
+}
+
+static void get_efuse_values(int id, struct k3_thermal_data *data, int *err,
+                            struct k3_j72xx_bandgap *bgp)
+{
+       int i, tmp, pow;
+       int ct_offsets[5][K3_VTM_CORRECTION_TEMP_CNT] = {
+               { 0x0, 0x8, 0x4 },
+               { 0x0, 0x8, 0x4 },
+               { 0x0, -1,  0x4 },
+               { 0x0, 0xC, -1 },
+               { 0x0, 0xc, 0x8 }
+       };
+       int ct_bm[5][K3_VTM_CORRECTION_TEMP_CNT] = {
+               { 0x3f, 0x1fe000, 0x1ff },
+               { 0xfc0, 0x1fe000, 0x3fe00 },
+               { 0x3f000, 0x7f800000, 0x7fc0000 },
+               { 0xfc0000, 0x1fe0, 0x1f800000 },
+               { 0x3f000000, 0x1fe000, 0x1ff0 }
+       };
+
+       for (i = 0; i < 3; i++) {
+               /* Extract the offset value using bit-mask */
+               if (ct_offsets[id][i] == -1 && i == 1) {
+                       /* 25C offset Case of Sensor 2 split between 2 regs */
+                       tmp = (readl(bgp->fuse_base + 0x8) & 0xE0000000) >> (29);
+                       tmp |= ((readl(bgp->fuse_base + 0xC) & 0x1F) << 3);
+                       pow = tmp & 0x80;
+               } else if (ct_offsets[id][i] == -1 && i == 2) {
+                       /* 125C Case of Sensor 3 split between 2 regs */
+                       tmp = (readl(bgp->fuse_base + 0x4) & 0xF8000000) >> (27);
+                       tmp |= ((readl(bgp->fuse_base + 0x8) & 0xF) << 5);
+                       pow = tmp & 0x100;
+               } else {
+                       tmp = readl(bgp->fuse_base + ct_offsets[id][i]);
+                       tmp &= ct_bm[id][i];
+                       tmp = tmp >> __ffs(ct_bm[id][i]);
+
+                       /* Obtain the sign bit pow*/
+                       pow = ct_bm[id][i] >> __ffs(ct_bm[id][i]);
+                       pow += 1;
+                       pow /= 2;
+               }
+
+               /* Check for negative value */
+               if (tmp & pow) {
+                       /* 2's complement value */
+                       tmp = two_cmp(tmp, ct_bm[id][i] >> __ffs(ct_bm[id][i]));
+               }
+               err[i] = tmp;
+       }
+
+       /* Err value for 150C is set to 0 */
+       err[i] = 0;
+}
+
+static void print_look_up_table(struct device *dev, int *ref_table)
+{
+       int i;
+
+       dev_dbg(dev, "The contents of derived array\n");
+       dev_dbg(dev, "Code   Temperature\n");
+       for (i = 0; i < TABLE_SIZE; i++)
+               dev_dbg(dev, "%d       %d %d\n", i, derived_table[i], ref_table[i]);
+}
+
+struct k3_j72xx_bandgap_data {
+       unsigned int has_errata_i2128;
+};
+
+static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
+{
+       int ret = 0, cnt, val, id;
+       int high_max, low_temp;
+       struct resource *res;
+       struct device *dev = &pdev->dev;
+       struct k3_j72xx_bandgap *bgp;
+       struct k3_thermal_data *data;
+       int workaround_needed = 0;
+       const struct k3_j72xx_bandgap_data *driver_data;
+       struct thermal_zone_device *ti_thermal;
+       int *ref_table;
+       struct err_values err_vals;
+
+       const s64 golden_factors[] = {
+               -490019999999999936,
+               3251200000000000,
+               -1705800000000,
+               603730000,
+               -92627,
+       };
+
+       const s64 pvt_wa_factors[] = {
+               -415230000000000000,
+               3126600000000000,
+               -1157800000000,
+       };
+
+       bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL);
+       if (!bgp)
+               return -ENOMEM;
+
+       bgp->dev = dev;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       bgp->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(bgp->base))
+               return PTR_ERR(bgp->base);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       bgp->cfg2_base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(bgp->cfg2_base))
+               return PTR_ERR(bgp->cfg2_base);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+       bgp->fuse_base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(bgp->fuse_base))
+               return PTR_ERR(bgp->fuse_base);
+
+       driver_data = of_device_get_match_data(dev);
+       if (driver_data)
+               workaround_needed = driver_data->has_errata_i2128;
+
+       pm_runtime_enable(dev);
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(dev);
+               pm_runtime_disable(dev);
+               return ret;
+       }
+
+       /* Get the sensor count in the VTM */
+       val = readl(bgp->base + K3_VTM_DEVINFO_PWR0_OFFSET);
+       cnt = val & K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK;
+       cnt >>= __ffs(K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK);
+
+       data = devm_kcalloc(bgp->dev, cnt, sizeof(*data), GFP_KERNEL);
+       if (!data) {
+               ret = -ENOMEM;
+               goto err_alloc;
+       }
+
+       ref_table = kzalloc(sizeof(*ref_table) * TABLE_SIZE, GFP_KERNEL);
+       if (!ref_table) {
+               ret = -ENOMEM;
+               goto err_alloc;
+       }
+
+       derived_table = devm_kzalloc(bgp->dev, sizeof(*derived_table) * TABLE_SIZE,
+                                    GFP_KERNEL);
+       if (!derived_table) {
+               ret = -ENOMEM;
+               goto err_alloc;
+       }
+
+       /* Workaround not needed if bit30/bit31 is set even for J721e */
+       if (workaround_needed && (readl(bgp->fuse_base + 0x0) & 0xc0000000) == 0xc0000000)
+               workaround_needed = false;
+
+       dev_dbg(bgp->dev, "Work around %sneeded\n",
+               workaround_needed ? "not " : "");
+
+       if (!workaround_needed)
+               init_table(5, ref_table, golden_factors);
+       else
+               init_table(3, ref_table, pvt_wa_factors);
+
+       /* Register the thermal sensors */
+       for (id = 0; id < cnt; id++) {
+               data[id].bgp = bgp;
+               data[id].ctrl_offset = K3_VTM_TMPSENS0_CTRL_OFFSET + id * 0x20;
+               data[id].stat_offset = data[id].ctrl_offset +
+                                       K3_VTM_TMPSENS_STAT_OFFSET;
+
+               if (workaround_needed) {
+                       /* ref adc values for -40C, 30C & 125C respectively */
+                       err_vals.refs[0] = MINUS40CREF;
+                       err_vals.refs[1] = PLUS30CREF;
+                       err_vals.refs[2] = PLUS125CREF;
+                       err_vals.refs[3] = PLUS150CREF;
+                       get_efuse_values(id, &data[id], err_vals.errs, bgp);
+               }
+
+               if (id == 0 && workaround_needed)
+                       prep_lookup_table(&err_vals, ref_table);
+               else if (id == 0 && !workaround_needed)
+                       memcpy(derived_table, ref_table, TABLE_SIZE * 4);
+
+               val = readl(data[id].bgp->cfg2_base + data[id].ctrl_offset);
+               val |= (K3_VTM_TMPSENS_CTRL_MAXT_OUTRG_EN |
+                       K3_VTM_TMPSENS_CTRL_SOC |
+                       K3_VTM_TMPSENS_CTRL_CLRZ | BIT(4));
+               writel(val, data[id].bgp->cfg2_base + data[id].ctrl_offset);
+
+               bgp->ts_data[id] = &data[id];
+               ti_thermal =
+               devm_thermal_zone_of_sensor_register(bgp->dev, id,
+                                                    &data[id],
+                                                    &k3_of_thermal_ops);
+               if (IS_ERR(ti_thermal)) {
+                       dev_err(bgp->dev, "thermal zone device is NULL\n");
+                       ret = PTR_ERR(ti_thermal);
+                       goto err_alloc;
+               }
+       }
+
+       /*
+        * Program TSHUT thresholds
+        * Step 1: set the thresholds to ~123C and 105C WKUP_VTM_MISC_CTRL2
+        * Step 2: WKUP_VTM_TMPSENS_CTRL_j set the MAXT_OUTRG_EN  bit
+        *         This is already taken care as per of init
+        * Step 3: WKUP_VTM_MISC_CTRL set the ANYMAXT_OUTRG_ALERT_EN  bit
+        */
+       high_max = k3_j72xx_bandgap_temp_to_adc_code(MAX_TEMP);
+       low_temp = k3_j72xx_bandgap_temp_to_adc_code(COOL_DOWN_TEMP);
+
+       writel((low_temp << 16) | high_max, data[0].bgp->cfg2_base +
+              K3_VTM_MISC_CTRL2_OFFSET);
+       mdelay(100);
+       writel(K3_VTM_ANYMAXT_OUTRG_ALERT_EN, data[0].bgp->cfg2_base +
+              K3_VTM_MISC_CTRL_OFFSET);
+
+       platform_set_drvdata(pdev, bgp);
+
+       print_look_up_table(dev, ref_table);
+       /*
+        * Now that the derived_table has the appropriate look up values
+        * Free up the ref_table
+        */
+       kfree(ref_table);
+
+       return 0;
+
+err_alloc:
+       pm_runtime_put_sync(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+
+       return ret;
+}
+
+static int k3_j72xx_bandgap_remove(struct platform_device *pdev)
+{
+       pm_runtime_put_sync(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+
+       return 0;
+}
+
+const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j721e_data = {
+       .has_errata_i2128 = 1,
+};
+
+const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j7200_data = {
+       .has_errata_i2128 = 0,
+};
+
+static const struct of_device_id of_k3_j72xx_bandgap_match[] = {
+       {
+               .compatible = "ti,j721e-vtm",
+               .data = &k3_j72xx_bandgap_j721e_data,
+       },
+       {
+               .compatible = "ti,j7200-vtm",
+               .data = &k3_j72xx_bandgap_j7200_data,
+       },
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, of_k3_j72xx_bandgap_match);
+
+static struct platform_driver k3_j72xx_bandgap_sensor_driver = {
+       .probe = k3_j72xx_bandgap_probe,
+       .remove = k3_j72xx_bandgap_remove,
+       .driver = {
+               .name = "k3-j72xx-soc-thermal",
+               .of_match_table = of_k3_j72xx_bandgap_match,
+       },
+};
+
+module_platform_driver(k3_j72xx_bandgap_sensor_driver);
+
+MODULE_DESCRIPTION("K3 bandgap temperature sensor driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("J Keerthy <j-keerthy@ti.com>");
index c7f91cbdccc7b4974d16624dda37c1127f0ff19f..d3d9b9fa49e8191d7bcd8a4a506ea82f02db2685 100644 (file)
@@ -220,6 +220,7 @@ static int lmh_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id lmh_table[] = {
+       { .compatible = "qcom,sc8180x-lmh", },
        { .compatible = "qcom,sdm845-lmh", .data = (void *)LMH_ENABLE_ALGOS},
        { .compatible = "qcom,sm8150-lmh", },
        {}
index 824671cf494acb1e755d564908b8f4925a3486d3..d9c9c975f931ca12d315da3090866cef8526b33f 100644 (file)
@@ -4,7 +4,10 @@
  *
  * Based on original driver:
  * Copyright (c) 2012-2020, The Linux Foundation. All rights reserved.
+ *
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
  */
+
 #include <linux/bitfield.h>
 #include <linux/iio/adc/qcom-vadc-common.h>
 #include <linux/iio/consumer.h>
@@ -15,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>
+#include <asm-generic/unaligned.h>
 
 /*
  * Thermal monitoring block consists of 8 (ADC_TM5_NUM_CHANNELS) channels. Each
 #define ADC_TM5_M_HIGH_THR_INT_EN                      BIT(1)
 #define ADC_TM5_M_LOW_THR_INT_EN                       BIT(0)
 
+#define ADC_TM_GEN2_STATUS1                    0x08
+#define ADC_TM_GEN2_STATUS_LOW_SET             0x09
+#define ADC_TM_GEN2_STATUS_LOW_CLR             0x0a
+#define ADC_TM_GEN2_STATUS_HIGH_SET            0x0b
+#define ADC_TM_GEN2_STATUS_HIGH_CLR            0x0c
+
+#define ADC_TM_GEN2_CFG_HS_SET                 0x0d
+#define ADC_TM_GEN2_CFG_HS_FLAG                        BIT(0)
+#define ADC_TM_GEN2_CFG_HS_CLR                 0x0e
+
+#define ADC_TM_GEN2_SID                                0x40
+
+#define ADC_TM_GEN2_CH_CTL                     0x41
+#define ADC_TM_GEN2_TM_CH_SEL                  GENMASK(7, 5)
+#define ADC_TM_GEN2_MEAS_INT_SEL               GENMASK(3, 2)
+
+#define ADC_TM_GEN2_ADC_DIG_PARAM              0x42
+#define ADC_TM_GEN2_CTL_CAL_SEL                        GENMASK(5, 4)
+#define ADC_TM_GEN2_CTL_DEC_RATIO_MASK         GENMASK(3, 2)
+
+#define ADC_TM_GEN2_FAST_AVG_CTL               0x43
+#define ADC_TM_GEN2_FAST_AVG_EN                        BIT(7)
+
+#define ADC_TM_GEN2_ADC_CH_SEL_CTL             0x44
+
+#define ADC_TM_GEN2_DELAY_CTL                  0x45
+#define ADC_TM_GEN2_HW_SETTLE_DELAY            GENMASK(3, 0)
+
+#define ADC_TM_GEN2_EN_CTL1                    0x46
+#define ADC_TM_GEN2_EN                         BIT(7)
+
+#define ADC_TM_GEN2_CONV_REQ                   0x47
+#define ADC_TM_GEN2_CONV_REQ_EN                        BIT(7)
+
+#define ADC_TM_GEN2_LOW_THR0                   0x49
+#define ADC_TM_GEN2_LOW_THR1                   0x4a
+#define ADC_TM_GEN2_HIGH_THR0                  0x4b
+#define ADC_TM_GEN2_HIGH_THR1                  0x4c
+#define ADC_TM_GEN2_LOWER_MASK(n)              ((n) & GENMASK(7, 0))
+#define ADC_TM_GEN2_UPPER_MASK(n)              (((n) & GENMASK(15, 8)) >> 8)
+
+#define ADC_TM_GEN2_MEAS_IRQ_EN                        0x4d
+#define ADC_TM_GEN2_MEAS_EN                    BIT(7)
+#define ADC_TM5_GEN2_HIGH_THR_INT_EN           BIT(1)
+#define ADC_TM5_GEN2_LOW_THR_INT_EN            BIT(0)
+
+#define ADC_TM_GEN2_MEAS_INT_LSB               0x50
+#define ADC_TM_GEN2_MEAS_INT_MSB               0x51
+#define ADC_TM_GEN2_MEAS_INT_MODE              0x52
+
+#define ADC_TM_GEN2_Mn_DATA0(n)                        ((n * 2) + 0xa0)
+#define ADC_TM_GEN2_Mn_DATA1(n)                        ((n * 2) + 0xa1)
+#define ADC_TM_GEN2_DATA_SHIFT                 8
+
 enum adc5_timer_select {
        ADC5_TIMER_SEL_1 = 0,
        ADC5_TIMER_SEL_2,
@@ -78,11 +136,11 @@ enum adc5_timer_select {
        ADC5_TIMER_SEL_NONE,
 };
 
-struct adc_tm5_data {
-       const u32       full_scale_code_volt;
-       unsigned int    *decimation;
-       unsigned int    *hw_settle;
-       bool            is_hc;
+enum adc5_gen {
+       ADC_TM5,
+       ADC_TM_HC,
+       ADC_TM5_GEN2,
+       ADC_TM5_MAX
 };
 
 enum adc_tm5_cal_method {
@@ -91,7 +149,28 @@ enum adc_tm5_cal_method {
        ADC_TM5_ABSOLUTE_CAL
 };
 
+enum adc_tm_gen2_time_select {
+       MEAS_INT_50MS = 0,
+       MEAS_INT_100MS,
+       MEAS_INT_1S,
+       MEAS_INT_SET,
+       MEAS_INT_NONE,
+};
+
 struct adc_tm5_chip;
+struct adc_tm5_channel;
+
+struct adc_tm5_data {
+       const u32 full_scale_code_volt;
+       unsigned int *decimation;
+       unsigned int *hw_settle;
+       int (*disable_channel)(struct adc_tm5_channel *channel);
+       int (*configure)(struct adc_tm5_channel *channel, int low, int high);
+       irqreturn_t (*isr)(int irq, void *data);
+       int (*init)(struct adc_tm5_chip *chip);
+       char *irq_name;
+       int gen;
+};
 
 /**
  * struct adc_tm5_channel - ADC Thermal Monitoring channel data.
@@ -101,6 +180,12 @@ struct adc_tm5_chip;
  * @prescale: channel scaling performed on the input signal.
  * @hw_settle_time: the time between AMUX being configured and the
  *     start of conversion.
+ * @decimation: sampling rate supported for the channel.
+ * @avg_samples: ability to provide single result from the ADC
+ *     that is an average of multiple measurements.
+ * @high_thr_en: channel upper voltage threshold enable state.
+ * @low_thr_en: channel lower voltage threshold enable state.
+ * @meas_en: recurring measurement enable state
  * @iio: IIO channel instance used by this channel.
  * @chip: ADC TM chip instance.
  * @tzd: thermal zone device used by this channel.
@@ -111,6 +196,11 @@ struct adc_tm5_channel {
        enum adc_tm5_cal_method cal_method;
        unsigned int            prescale;
        unsigned int            hw_settle_time;
+       unsigned int            decimation;     /* For Gen2 ADC_TM */
+       unsigned int            avg_samples;    /* For Gen2 ADC_TM */
+       bool                    high_thr_en;    /* For Gen2 ADC_TM */
+       bool                    low_thr_en;     /* For Gen2 ADC_TM */
+       bool                    meas_en;        /* For Gen2 ADC_TM */
        struct iio_channel      *iio;
        struct adc_tm5_chip     *chip;
        struct thermal_zone_device *tzd;
@@ -124,9 +214,15 @@ struct adc_tm5_channel {
  * @channels: array of ADC TM channel data.
  * @nchannels: amount of channels defined/allocated
  * @decimation: sampling rate supported for the channel.
+ *      Applies to all channels, used only on Gen1 ADC_TM.
  * @avg_samples: ability to provide single result from the ADC
- *     that is an average of multiple measurements.
+ *      that is an average of multiple measurements. Applies to all
+ *      channels, used only on Gen1 ADC_TM.
  * @base: base address of TM registers.
+ * @adc_mutex_lock: ADC_TM mutex lock, used only on Gen2 ADC_TM.
+ *      It is used to ensure only one ADC channel configuration
+ *      is done at a time using the shared set of configuration
+ *      registers.
  */
 struct adc_tm5_chip {
        struct regmap           *regmap;
@@ -137,22 +233,7 @@ struct adc_tm5_chip {
        unsigned int            decimation;
        unsigned int            avg_samples;
        u16                     base;
-};
-
-static const struct adc_tm5_data adc_tm5_data_pmic = {
-       .full_scale_code_volt = 0x70e4,
-       .decimation = (unsigned int []) { 250, 420, 840 },
-       .hw_settle = (unsigned int []) { 15, 100, 200, 300, 400, 500, 600, 700,
-                                        1000, 2000, 4000, 8000, 16000, 32000,
-                                        64000, 128000 },
-};
-
-static const struct adc_tm5_data adc_tm_hc_data_pmic = {
-       .full_scale_code_volt = 0x70e4,
-       .decimation = (unsigned int []) { 256, 512, 1024 },
-       .hw_settle = (unsigned int []) { 0, 100, 200, 300, 400, 500, 600, 700,
-                                        1000, 2000, 4000, 6000, 8000, 10000 },
-       .is_hc = true,
+       struct mutex            adc_mutex_lock;
 };
 
 static int adc_tm5_read(struct adc_tm5_chip *adc_tm, u16 offset, u8 *data, int len)
@@ -219,6 +300,61 @@ static irqreturn_t adc_tm5_isr(int irq, void *data)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t adc_tm5_gen2_isr(int irq, void *data)
+{
+       struct adc_tm5_chip *chip = data;
+       u8 status_low, status_high;
+       int ret, i;
+
+       ret = adc_tm5_read(chip, ADC_TM_GEN2_STATUS_LOW_CLR, &status_low, sizeof(status_low));
+       if (ret) {
+               dev_err(chip->dev, "read status_low failed: %d\n", ret);
+               return IRQ_HANDLED;
+       }
+
+       ret = adc_tm5_read(chip, ADC_TM_GEN2_STATUS_HIGH_CLR, &status_high, sizeof(status_high));
+       if (ret) {
+               dev_err(chip->dev, "read status_high failed: %d\n", ret);
+               return IRQ_HANDLED;
+       }
+
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_STATUS_LOW_CLR, &status_low, sizeof(status_low));
+       if (ret < 0) {
+               dev_err(chip->dev, "clear status low failed with %d\n", ret);
+               return IRQ_HANDLED;
+       }
+
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_STATUS_HIGH_CLR, &status_high, sizeof(status_high));
+       if (ret < 0) {
+               dev_err(chip->dev, "clear status high failed with %d\n", ret);
+               return IRQ_HANDLED;
+       }
+
+       for (i = 0; i < chip->nchannels; i++) {
+               bool upper_set = false, lower_set = false;
+               unsigned int ch = chip->channels[i].channel;
+
+               /* No TZD, we warned at the boot time */
+               if (!chip->channels[i].tzd)
+                       continue;
+
+               if (!chip->channels[i].meas_en)
+                       continue;
+
+               lower_set = (status_low & BIT(ch)) &&
+                       (chip->channels[i].low_thr_en);
+
+               upper_set = (status_high & BIT(ch)) &&
+                       (chip->channels[i].high_thr_en);
+
+               if (upper_set || lower_set)
+                       thermal_zone_device_update(chip->channels[i].tzd,
+                                                  THERMAL_EVENT_UNSPECIFIED);
+       }
+
+       return IRQ_HANDLED;
+}
+
 static int adc_tm5_get_temp(void *data, int *temp)
 {
        struct adc_tm5_channel *channel = data;
@@ -249,6 +385,104 @@ static int adc_tm5_disable_channel(struct adc_tm5_channel *channel)
                                  0);
 }
 
+#define ADC_TM_GEN2_POLL_DELAY_MIN_US          100
+#define ADC_TM_GEN2_POLL_DELAY_MAX_US          110
+#define ADC_TM_GEN2_POLL_RETRY_COUNT           3
+
+static int32_t adc_tm5_gen2_conv_req(struct adc_tm5_chip *chip)
+{
+       int ret;
+       u8 data;
+       unsigned int count;
+
+       data = ADC_TM_GEN2_EN;
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_EN_CTL1, &data, 1);
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm enable failed with %d\n", ret);
+               return ret;
+       }
+
+       data = ADC_TM_GEN2_CFG_HS_FLAG;
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_CFG_HS_SET, &data, 1);
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm handshake failed with %d\n", ret);
+               return ret;
+       }
+
+       data = ADC_TM_GEN2_CONV_REQ_EN;
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_CONV_REQ, &data, 1);
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm request conversion failed with %d\n", ret);
+               return ret;
+       }
+
+       /*
+        * SW sets a handshake bit and waits for PBS to clear it
+        * before the next conversion request can be queued.
+        */
+
+       for (count = 0; count < ADC_TM_GEN2_POLL_RETRY_COUNT; count++) {
+               ret = adc_tm5_read(chip, ADC_TM_GEN2_CFG_HS_SET, &data, sizeof(data));
+               if (ret < 0) {
+                       dev_err(chip->dev, "adc-tm read failed with %d\n", ret);
+                       return ret;
+               }
+
+               if (!(data & ADC_TM_GEN2_CFG_HS_FLAG))
+                       return ret;
+               usleep_range(ADC_TM_GEN2_POLL_DELAY_MIN_US,
+                       ADC_TM_GEN2_POLL_DELAY_MAX_US);
+       }
+
+       dev_err(chip->dev, "adc-tm conversion request handshake timed out\n");
+
+       return -ETIMEDOUT;
+}
+
+static int adc_tm5_gen2_disable_channel(struct adc_tm5_channel *channel)
+{
+       struct adc_tm5_chip *chip = channel->chip;
+       int ret;
+       u8 val;
+
+       mutex_lock(&chip->adc_mutex_lock);
+
+       channel->meas_en = false;
+       channel->high_thr_en = false;
+       channel->low_thr_en = false;
+
+       ret = adc_tm5_read(chip, ADC_TM_GEN2_CH_CTL, &val, sizeof(val));
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm block read failed with %d\n", ret);
+               goto disable_fail;
+       }
+
+       val &= ~ADC_TM_GEN2_TM_CH_SEL;
+       val |= FIELD_PREP(ADC_TM_GEN2_TM_CH_SEL, channel->channel);
+
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_CH_CTL, &val, 1);
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm channel disable failed with %d\n", ret);
+               goto disable_fail;
+       }
+
+       val = 0;
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_MEAS_IRQ_EN, &val, 1);
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm interrupt disable failed with %d\n", ret);
+               goto disable_fail;
+       }
+
+
+       ret = adc_tm5_gen2_conv_req(channel->chip);
+       if (ret < 0)
+               dev_err(chip->dev, "adc-tm channel configure failed with %d\n", ret);
+
+disable_fail:
+       mutex_unlock(&chip->adc_mutex_lock);
+       return ret;
+}
+
 static int adc_tm5_enable(struct adc_tm5_chip *chip)
 {
        int ret;
@@ -291,8 +525,7 @@ static int adc_tm5_configure(struct adc_tm5_channel *channel, int low, int high)
                u16 adc_code = qcom_adc_tm5_temp_volt_scale(channel->prescale,
                                chip->data->full_scale_code_volt, high);
 
-               buf[1] = adc_code & 0xff;
-               buf[2] = adc_code >> 8;
+               put_unaligned_le16(adc_code, &buf[1]);
                buf[7] |= ADC_TM5_M_LOW_THR_INT_EN;
        } else {
                buf[7] &= ~ADC_TM5_M_LOW_THR_INT_EN;
@@ -303,8 +536,7 @@ static int adc_tm5_configure(struct adc_tm5_channel *channel, int low, int high)
                u16 adc_code = qcom_adc_tm5_temp_volt_scale(channel->prescale,
                                chip->data->full_scale_code_volt, low);
 
-               buf[3] = adc_code & 0xff;
-               buf[4] = adc_code >> 8;
+               put_unaligned_le16(adc_code, &buf[3]);
                buf[7] |= ADC_TM5_M_HIGH_THR_INT_EN;
        } else {
                buf[7] &= ~ADC_TM5_M_HIGH_THR_INT_EN;
@@ -329,6 +561,82 @@ static int adc_tm5_configure(struct adc_tm5_channel *channel, int low, int high)
        return adc_tm5_enable(chip);
 }
 
+static int adc_tm5_gen2_configure(struct adc_tm5_channel *channel, int low, int high)
+{
+       struct adc_tm5_chip *chip = channel->chip;
+       int ret;
+       u8 buf[14];
+       u16 adc_code;
+
+       mutex_lock(&chip->adc_mutex_lock);
+
+       channel->meas_en = true;
+
+       ret = adc_tm5_read(chip, ADC_TM_GEN2_SID, buf, sizeof(buf));
+       if (ret < 0) {
+               dev_err(chip->dev, "adc-tm block read failed with %d\n", ret);
+               goto config_fail;
+       }
+
+       /* Set SID from virtual channel number */
+       buf[0] = channel->adc_channel >> 8;
+
+       /* Set TM channel number used and measurement interval */
+       buf[1] &= ~ADC_TM_GEN2_TM_CH_SEL;
+       buf[1] |= FIELD_PREP(ADC_TM_GEN2_TM_CH_SEL, channel->channel);
+       buf[1] &= ~ADC_TM_GEN2_MEAS_INT_SEL;
+       buf[1] |= FIELD_PREP(ADC_TM_GEN2_MEAS_INT_SEL, MEAS_INT_1S);
+
+       buf[2] &= ~ADC_TM_GEN2_CTL_DEC_RATIO_MASK;
+       buf[2] |= FIELD_PREP(ADC_TM_GEN2_CTL_DEC_RATIO_MASK, channel->decimation);
+       buf[2] &= ~ADC_TM_GEN2_CTL_CAL_SEL;
+       buf[2] |= FIELD_PREP(ADC_TM_GEN2_CTL_CAL_SEL, channel->cal_method);
+
+       buf[3] = channel->avg_samples | ADC_TM_GEN2_FAST_AVG_EN;
+
+       buf[4] = channel->adc_channel & 0xff;
+
+       buf[5] = channel->hw_settle_time & ADC_TM_GEN2_HW_SETTLE_DELAY;
+
+       /* High temperature corresponds to low voltage threshold */
+       if (high != INT_MAX) {
+               channel->low_thr_en = true;
+               adc_code = qcom_adc_tm5_gen2_temp_res_scale(high);
+               put_unaligned_le16(adc_code, &buf[9]);
+       } else {
+               channel->low_thr_en = false;
+       }
+
+       /* Low temperature corresponds to high voltage threshold */
+       if (low != -INT_MAX) {
+               channel->high_thr_en = true;
+               adc_code = qcom_adc_tm5_gen2_temp_res_scale(low);
+               put_unaligned_le16(adc_code, &buf[11]);
+       } else {
+               channel->high_thr_en = false;
+       }
+
+       buf[13] = ADC_TM_GEN2_MEAS_EN;
+       if (channel->high_thr_en)
+               buf[13] |= ADC_TM5_GEN2_HIGH_THR_INT_EN;
+       if (channel->low_thr_en)
+               buf[13] |= ADC_TM5_GEN2_LOW_THR_INT_EN;
+
+       ret = adc_tm5_write(chip, ADC_TM_GEN2_SID, buf, sizeof(buf));
+       if (ret) {
+               dev_err(chip->dev, "channel %d params write failed: %d\n", channel->channel, ret);
+               goto config_fail;
+       }
+
+       ret = adc_tm5_gen2_conv_req(channel->chip);
+       if (ret < 0)
+               dev_err(chip->dev, "adc-tm channel configure failed with %d\n", ret);
+
+config_fail:
+       mutex_unlock(&chip->adc_mutex_lock);
+       return ret;
+}
+
 static int adc_tm5_set_trips(void *data, int low, int high)
 {
        struct adc_tm5_channel *channel = data;
@@ -343,14 +651,14 @@ static int adc_tm5_set_trips(void *data, int low, int high)
                channel->channel, low, high);
 
        if (high == INT_MAX && low <= -INT_MAX)
-               ret = adc_tm5_disable_channel(channel);
+               ret = chip->data->disable_channel(channel);
        else
-               ret = adc_tm5_configure(channel, low, high);
+               ret = chip->data->configure(channel, low, high);
 
        return ret;
 }
 
-static struct thermal_zone_of_device_ops adc_tm5_ops = {
+static struct thermal_zone_of_device_ops adc_tm5_thermal_ops = {
        .get_temp = adc_tm5_get_temp,
        .set_trips = adc_tm5_set_trips,
 };
@@ -366,7 +674,7 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm)
                tzd = devm_thermal_zone_of_sensor_register(adc_tm->dev,
                                                           adc_tm->channels[i].channel,
                                                           &adc_tm->channels[i],
-                                                          &adc_tm5_ops);
+                                                          &adc_tm5_thermal_ops);
                if (IS_ERR(tzd)) {
                        if (PTR_ERR(tzd) == -ENODEV) {
                                dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n",
@@ -442,12 +750,37 @@ static int adc_tm5_init(struct adc_tm5_chip *chip)
        return ret;
 }
 
+static int adc_tm5_gen2_init(struct adc_tm5_chip *chip)
+{
+       u8 channels_available;
+       int ret;
+       unsigned int i;
+
+       ret = adc_tm5_read(chip, ADC_TM5_NUM_BTM,
+                          &channels_available, sizeof(channels_available));
+       if (ret) {
+               dev_err(chip->dev, "read failed for BTM channels\n");
+               return ret;
+       }
+
+       for (i = 0; i < chip->nchannels; i++) {
+               if (chip->channels[i].channel >= channels_available) {
+                       dev_err(chip->dev, "Invalid channel %d\n", chip->channels[i].channel);
+                       return -EINVAL;
+               }
+       }
+
+       mutex_init(&chip->adc_mutex_lock);
+
+       return ret;
+}
+
 static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm,
                                       struct adc_tm5_channel *channel,
                                       struct device_node *node)
 {
        const char *name = node->name;
-       u32 chan, value, varr[2];
+       u32 chan, value, adc_channel, varr[2];
        int ret;
        struct device *dev = adc_tm->dev;
        struct of_phandle_args args;
@@ -477,7 +810,16 @@ static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm,
        }
        of_node_put(args.np);
 
-       if (args.args_count != 1 || args.args[0] >= ADC5_MAX_CHANNEL) {
+       if (args.args_count != 1) {
+               dev_err(dev, "%s: invalid args count for ADC channel %d\n", name, chan);
+               return -EINVAL;
+       }
+
+       adc_channel = args.args[0];
+       if (adc_tm->data->gen == ADC_TM5_GEN2)
+               adc_channel &= 0xff;
+
+       if (adc_channel >= ADC5_MAX_CHANNEL) {
                dev_err(dev, "%s: invalid ADC channel number %d\n", name, chan);
                return -EINVAL;
        }
@@ -523,9 +865,76 @@ static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm,
        else
                channel->cal_method = ADC_TM5_ABSOLUTE_CAL;
 
+       if (adc_tm->data->gen == ADC_TM5_GEN2) {
+               ret = of_property_read_u32(node, "qcom,decimation", &value);
+               if (!ret) {
+                       ret = qcom_adc5_decimation_from_dt(value, adc_tm->data->decimation);
+                       if (ret < 0) {
+                               dev_err(dev, "invalid decimation %d\n", value);
+                               return ret;
+                       }
+                       channel->decimation = ret;
+               } else {
+                       channel->decimation = ADC5_DECIMATION_DEFAULT;
+               }
+
+               ret = of_property_read_u32(node, "qcom,avg-samples", &value);
+               if (!ret) {
+                       ret = qcom_adc5_avg_samples_from_dt(value);
+                       if (ret < 0) {
+                               dev_err(dev, "invalid avg-samples %d\n", value);
+                               return ret;
+                       }
+                       channel->avg_samples = ret;
+               } else {
+                       channel->avg_samples = VADC_DEF_AVG_SAMPLES;
+               }
+       }
+
        return 0;
 }
 
+static const struct adc_tm5_data adc_tm5_data_pmic = {
+       .full_scale_code_volt = 0x70e4,
+       .decimation = (unsigned int []) { 250, 420, 840 },
+       .hw_settle = (unsigned int []) { 15, 100, 200, 300, 400, 500, 600, 700,
+                                        1000, 2000, 4000, 8000, 16000, 32000,
+                                        64000, 128000 },
+       .disable_channel = adc_tm5_disable_channel,
+       .configure = adc_tm5_configure,
+       .isr = adc_tm5_isr,
+       .init = adc_tm5_init,
+       .irq_name = "pm-adc-tm5",
+       .gen = ADC_TM5,
+};
+
+static const struct adc_tm5_data adc_tm_hc_data_pmic = {
+       .full_scale_code_volt = 0x70e4,
+       .decimation = (unsigned int []) { 256, 512, 1024 },
+       .hw_settle = (unsigned int []) { 0, 100, 200, 300, 400, 500, 600, 700,
+                                        1000, 2000, 4000, 6000, 8000, 10000 },
+       .disable_channel = adc_tm5_disable_channel,
+       .configure = adc_tm5_configure,
+       .isr = adc_tm5_isr,
+       .init = adc_tm_hc_init,
+       .irq_name = "pm-adc-tm5",
+       .gen = ADC_TM_HC,
+};
+
+static const struct adc_tm5_data adc_tm5_gen2_data_pmic = {
+       .full_scale_code_volt = 0x70e4,
+       .decimation = (unsigned int []) { 85, 340, 1360 },
+       .hw_settle = (unsigned int []) { 15, 100, 200, 300, 400, 500, 600, 700,
+                                        1000, 2000, 4000, 8000, 16000, 32000,
+                                        64000, 128000 },
+       .disable_channel = adc_tm5_gen2_disable_channel,
+       .configure = adc_tm5_gen2_configure,
+       .isr = adc_tm5_gen2_isr,
+       .init = adc_tm5_gen2_init,
+       .irq_name = "pm-adc-tm5-gen2",
+       .gen = ADC_TM5_GEN2,
+};
+
 static int adc_tm5_get_dt_data(struct adc_tm5_chip *adc_tm, struct device_node *node)
 {
        struct adc_tm5_channel *channels;
@@ -623,10 +1032,7 @@ static int adc_tm5_probe(struct platform_device *pdev)
                return ret;
        }
 
-       if (adc_tm->data->is_hc)
-               ret = adc_tm_hc_init(adc_tm);
-       else
-               ret = adc_tm5_init(adc_tm);
+       ret = adc_tm->data->init(adc_tm);
        if (ret) {
                dev_err(dev, "adc-tm init failed\n");
                return ret;
@@ -638,8 +1044,8 @@ static int adc_tm5_probe(struct platform_device *pdev)
                return ret;
        }
 
-       return devm_request_threaded_irq(dev, irq, NULL, adc_tm5_isr,
-                                        IRQF_ONESHOT, "pm-adc-tm5", adc_tm);
+       return devm_request_threaded_irq(dev, irq, NULL, adc_tm->data->isr,
+                       IRQF_ONESHOT, adc_tm->data->irq_name, adc_tm);
 }
 
 static const struct of_device_id adc_tm5_match_table[] = {
@@ -651,6 +1057,10 @@ static const struct of_device_id adc_tm5_match_table[] = {
                .compatible = "qcom,spmi-adc-tm-hc",
                .data = &adc_tm_hc_data_pmic,
        },
+       {
+               .compatible = "qcom,spmi-adc-tm5-gen2",
+               .data = &adc_tm5_gen2_data_pmic,
+       },
        { }
 };
 MODULE_DEVICE_TABLE(of, adc_tm5_match_table);
index 154d3cb19c88db0522f3e477ded0fea4011d1087..7963ee33bf75b79a345e8ad94fe6a74dd715b727 100644 (file)
@@ -979,6 +979,9 @@ static const struct of_device_id tsens_table[] = {
        }, {
                .compatible = "qcom,msm8939-tsens",
                .data = &data_8939,
+       }, {
+               .compatible = "qcom,msm8960-tsens",
+               .data = &data_8960,
        }, {
                .compatible = "qcom,msm8974-tsens",
                .data = &data_8974,
index b49f04daaf477976807d92cf0ba7f563a1c271c0..1d729ed4d68567971964b5f11160dc735d8a6a81 100644 (file)
@@ -445,7 +445,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
        struct rcar_thermal_common *common;
        struct rcar_thermal_priv *priv;
        struct device *dev = &pdev->dev;
-       struct resource *res, *irq;
+       struct resource *res;
        const struct rcar_thermal_chip *chip = of_device_get_match_data(dev);
        int mres = 0;
        int i;
@@ -467,9 +467,16 @@ static int rcar_thermal_probe(struct platform_device *pdev)
        pm_runtime_get_sync(dev);
 
        for (i = 0; i < chip->nirqs; i++) {
-               irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);
-               if (!irq)
-                       continue;
+               int irq;
+
+               ret = platform_get_irq_optional(pdev, i);
+               if (ret < 0 && ret != -ENXIO)
+                       goto error_unregister;
+               if (ret > 0)
+                       irq = ret;
+               else
+                       break;
+
                if (!common->base) {
                        /*
                         * platform has IRQ support.
@@ -487,7 +494,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                        idle = 0; /* polling delay is not needed */
                }
 
-               ret = devm_request_irq(dev, irq->start, rcar_thermal_irq,
+               ret = devm_request_irq(dev, irq, rcar_thermal_irq,
                                       IRQF_SHARED, dev_name(dev), common);
                if (ret) {
                        dev_err(dev, "irq request failed\n ");
index 7a9cdc1f37ca9dece6314d0a2cfbfe7b7c9dcdfa..be07e04c6926104a539368a62d20f5713f165806 100644 (file)
@@ -32,6 +32,8 @@
 #define TSU_SS         0x10
 
 #define OTPTSUTRIM_REG(n)      (0x18 + ((n) * 0x4))
+#define OTPTSUTRIM_EN_MASK     BIT(31)
+#define OTPTSUTRIM_MASK                GENMASK(11, 0)
 
 /* Sensor Mode Register(TSU_SM) */
 #define TSU_SM_EN_TS           BIT(0)
@@ -183,11 +185,15 @@ static int rzg2l_thermal_probe(struct platform_device *pdev)
        pm_runtime_get_sync(dev);
 
        priv->calib0 = rzg2l_thermal_read(priv, OTPTSUTRIM_REG(0));
-       if (!priv->calib0)
+       if (priv->calib0 & OTPTSUTRIM_EN_MASK)
+               priv->calib0 &= OTPTSUTRIM_MASK;
+       else
                priv->calib0 = SW_CALIB0_VAL;
 
        priv->calib1 = rzg2l_thermal_read(priv, OTPTSUTRIM_REG(1));
-       if (!priv->calib1)
+       if (priv->calib1 & OTPTSUTRIM_EN_MASK)
+               priv->calib1 &= OTPTSUTRIM_MASK;
+       else
                priv->calib1 = SW_CALIB1_VAL;
 
        platform_set_drvdata(pdev, priv);
index 82654dc8382b85e5a7a17733fa112757d61fccf9..cdc0552e8c42e1cf14ee469d5747499489c0b3b3 100644 (file)
@@ -947,6 +947,7 @@ __thermal_cooling_device_register(struct device_node *np,
        return cdev;
 
 out_kfree_type:
+       thermal_cooling_device_destroy_sysfs(cdev);
        kfree(cdev->type);
        put_device(&cdev->device);
        cdev = NULL;
index ad03262cca5692ca5f44906d03e664422c820105..09e49ec8b6f48926e5a80838509fec73d7da900a 100644 (file)
@@ -149,8 +149,8 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
        INIT_LIST_HEAD(&hwmon->tz_list);
        strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH);
        strreplace(hwmon->type, '-', '_');
-       hwmon->device = hwmon_device_register_with_info(&tz->device, hwmon->type,
-                                                       hwmon, NULL, NULL);
+       hwmon->device = hwmon_device_register_for_thermal(&tz->device,
+                                                         hwmon->type, hwmon);
        if (IS_ERR(hwmon->device)) {
                result = PTR_ERR(hwmon->device);
                goto free_mem;
@@ -277,3 +277,5 @@ int devm_thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
        return ret;
 }
 EXPORT_SYMBOL_GPL(devm_thermal_add_hwmon_sysfs);
+
+MODULE_IMPORT_NS(HWMON_THERMAL);
index 9233f7e744544641a8361bdc8bb6da6f071ded4e..b65d435cb92f6366844a22deab1edad7e0958501 100644 (file)
@@ -35,7 +35,7 @@ struct __thermal_cooling_bind_param {
 };
 
 /**
- * struct __thermal_bind_param - a match between trip and cooling device
+ * struct __thermal_bind_params - a match between trip and cooling device
  * @tcbp: a pointer to an array of cooling devices
  * @count: number of elements in array
  * @trip_id: the trip point index
@@ -203,6 +203,14 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
        return data->ops->get_trend(data->sensor_data, trip, trend);
 }
 
+static int of_thermal_change_mode(struct thermal_zone_device *tz,
+                               enum thermal_device_mode mode)
+{
+       struct __thermal_zone *data = tz->devdata;
+
+       return data->ops->change_mode(data->sensor_data, mode);
+}
+
 static int of_thermal_bind(struct thermal_zone_device *thermal,
                           struct thermal_cooling_device *cdev)
 {
@@ -408,6 +416,9 @@ thermal_zone_of_add_sensor(struct device_node *zone,
        if (ops->set_emul_temp)
                tzd->ops->set_emul_temp = of_thermal_set_emul_temp;
 
+       if (ops->change_mode)
+               tzd->ops->change_mode = of_thermal_change_mode;
+
        mutex_unlock(&tzd->lock);
 
        return tzd;
@@ -569,6 +580,7 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
        tzd->ops->get_temp = NULL;
        tzd->ops->get_trend = NULL;
        tzd->ops->set_emul_temp = NULL;
+       tzd->ops->change_mode = NULL;
 
        tz->ops = NULL;
        tz->sensor_data = NULL;
index ad13532e92fe25bc11124ee6171df9633263341f..9e8ccb8ed6d69474092e25ffa52ccc35b87ebf50 100644 (file)
@@ -61,13 +61,13 @@ static void do_rw_io(struct goldfish_tty *qtty,
        spin_lock_irqsave(&qtty->lock, irq_flags);
        gf_write_ptr((void *)address, base + GOLDFISH_TTY_REG_DATA_PTR,
                     base + GOLDFISH_TTY_REG_DATA_PTR_HIGH);
-       __raw_writel(count, base + GOLDFISH_TTY_REG_DATA_LEN);
+       gf_iowrite32(count, base + GOLDFISH_TTY_REG_DATA_LEN);
 
        if (is_write)
-               __raw_writel(GOLDFISH_TTY_CMD_WRITE_BUFFER,
+               gf_iowrite32(GOLDFISH_TTY_CMD_WRITE_BUFFER,
                       base + GOLDFISH_TTY_REG_CMD);
        else
-               __raw_writel(GOLDFISH_TTY_CMD_READ_BUFFER,
+               gf_iowrite32(GOLDFISH_TTY_CMD_READ_BUFFER,
                       base + GOLDFISH_TTY_REG_CMD);
 
        spin_unlock_irqrestore(&qtty->lock, irq_flags);
@@ -142,7 +142,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id)
        unsigned char *buf;
        u32 count;
 
-       count = __raw_readl(base + GOLDFISH_TTY_REG_BYTES_READY);
+       count = gf_ioread32(base + GOLDFISH_TTY_REG_BYTES_READY);
        if (count == 0)
                return IRQ_NONE;
 
@@ -159,7 +159,7 @@ static int goldfish_tty_activate(struct tty_port *port, struct tty_struct *tty)
 {
        struct goldfish_tty *qtty = container_of(port, struct goldfish_tty,
                                                                        port);
-       __raw_writel(GOLDFISH_TTY_CMD_INT_ENABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
+       gf_iowrite32(GOLDFISH_TTY_CMD_INT_ENABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
        return 0;
 }
 
@@ -167,7 +167,7 @@ static void goldfish_tty_shutdown(struct tty_port *port)
 {
        struct goldfish_tty *qtty = container_of(port, struct goldfish_tty,
                                                                        port);
-       __raw_writel(GOLDFISH_TTY_CMD_INT_DISABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
+       gf_iowrite32(GOLDFISH_TTY_CMD_INT_DISABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
 }
 
 static int goldfish_tty_open(struct tty_struct *tty, struct file *filp)
@@ -202,7 +202,7 @@ static unsigned int goldfish_tty_chars_in_buffer(struct tty_struct *tty)
 {
        struct goldfish_tty *qtty = &goldfish_ttys[tty->index];
        void __iomem *base = qtty->base;
-       return __raw_readl(base + GOLDFISH_TTY_REG_BYTES_READY);
+       return gf_ioread32(base + GOLDFISH_TTY_REG_BYTES_READY);
 }
 
 static void goldfish_tty_console_write(struct console *co, const char *b,
@@ -355,7 +355,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
         * on Ranchu emulator (qemu2) returns 1 here and
         * driver will use physical addresses.
         */
-       qtty->version = __raw_readl(base + GOLDFISH_TTY_REG_VERSION);
+       qtty->version = gf_ioread32(base + GOLDFISH_TTY_REG_VERSION);
 
        /*
         * Goldfish TTY device on Ranchu emulator (qemu2)
@@ -374,7 +374,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
                }
        }
 
-       __raw_writel(GOLDFISH_TTY_CMD_INT_DISABLE, base + GOLDFISH_TTY_REG_CMD);
+       gf_iowrite32(GOLDFISH_TTY_CMD_INT_DISABLE, base + GOLDFISH_TTY_REG_CMD);
 
        ret = request_irq(irq, goldfish_tty_interrupt, IRQF_SHARED,
                          "goldfish_tty", qtty);
@@ -436,7 +436,7 @@ static int goldfish_tty_remove(struct platform_device *pdev)
 #ifdef CONFIG_GOLDFISH_TTY_EARLY_CONSOLE
 static void gf_early_console_putchar(struct uart_port *port, unsigned char ch)
 {
-       __raw_writel(ch, port->membase);
+       gf_iowrite32(ch, port->membase);
 }
 
 static void gf_early_write(struct console *con, const char *s, unsigned int n)
index a38b922bcbc10724f58c321513d746858c26079b..fd8b86dde525507ee09d0961492f786e5c291a4a 100644 (file)
@@ -137,6 +137,7 @@ struct gsm_dlci {
        int retries;
        /* Uplink tty if active */
        struct tty_port port;   /* The tty bound to this DLCI if there is one */
+#define TX_SIZE                4096    /* Must be power of 2. */
        struct kfifo fifo;      /* Queue fifo for the DLCI */
        int adaption;           /* Adaption layer in use */
        int prev_adaption;
@@ -1658,6 +1659,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
                        if (len == 0)
                                return;
                }
+               len--;
                slen++;
                tty = tty_port_tty_get(port);
                if (tty) {
@@ -1730,7 +1732,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
                return NULL;
        spin_lock_init(&dlci->lock);
        mutex_init(&dlci->mutex);
-       if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) {
+       if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) {
                kfree(dlci);
                return NULL;
        }
@@ -2351,6 +2353,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm,
 
 static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
 {
+       int ret = 0;
        int need_close = 0;
        int need_restart = 0;
 
@@ -2418,10 +2421,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
         * FIXME: We need to separate activation/deactivation from adding
         * and removing from the mux array
         */
-       if (need_restart)
-               gsm_activate_mux(gsm);
-       if (gsm->initiator && need_close)
-               gsm_dlci_begin_open(gsm->dlci[0]);
+       if (gsm->dead) {
+               ret = gsm_activate_mux(gsm);
+               if (ret)
+                       return ret;
+               if (gsm->initiator)
+                       gsm_dlci_begin_open(gsm->dlci[0]);
+       }
        return 0;
 }
 
@@ -2971,8 +2977,6 @@ static struct tty_ldisc_ops tty_ldisc_packet = {
  *     Virtual tty side
  */
 
-#define TX_SIZE                512
-
 /**
  *     gsm_modem_upd_via_data  -       send modem bits via convergence layer
  *     @dlci: channel
@@ -3212,7 +3216,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty)
        struct gsm_dlci *dlci = tty->driver_data;
        if (dlci->state == DLCI_CLOSED)
                return 0;
-       return TX_SIZE - kfifo_len(&dlci->fifo);
+       return kfifo_avail(&dlci->fifo);
 }
 
 static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
index f4a0caa56f84ace5cb1b251dc1518fd502ad5e05..21053db93ff1e5ad546a21e79b9485dee2213b89 100644 (file)
@@ -37,6 +37,7 @@
 #define MTK_UART_IER_RTSI      0x40    /* Enable RTS Modem status interrupt */
 #define MTK_UART_IER_CTSI      0x80    /* Enable CTS Modem status interrupt */
 
+#define MTK_UART_EFR           38      /* I/O: Extended Features Register */
 #define MTK_UART_EFR_EN                0x10    /* Enable enhancement feature */
 #define MTK_UART_EFR_RTS       0x40    /* Enable hardware rx flow control */
 #define MTK_UART_EFR_CTS       0x80    /* Enable hardware tx flow control */
 #define MTK_UART_TX_TRIGGER    1
 #define MTK_UART_RX_TRIGGER    MTK_UART_RX_SIZE
 
+#define MTK_UART_FEATURE_SEL   39      /* Feature Selection register */
+#define MTK_UART_FEAT_NEWRMAP  BIT(0)  /* Use new register map */
+
+#define MTK_UART_XON1          40      /* I/O: Xon character 1 */
+#define MTK_UART_XOFF1         42      /* I/O: Xoff character 1 */
+
 #ifdef CONFIG_SERIAL_8250_DMA
 enum dma_rx_status {
        DMA_RX_START = 0,
@@ -169,7 +176,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up)
                   MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX);
 
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-       serial_out(up, UART_EFR, UART_EFR_ECB);
+       serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
        serial_out(up, UART_LCR, lcr);
 
        if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0)
@@ -232,7 +239,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
        int lcr = serial_in(up, UART_LCR);
 
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-       serial_out(up, UART_EFR, UART_EFR_ECB);
+       serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
        serial_out(up, UART_LCR, lcr);
        lcr = serial_in(up, UART_LCR);
 
@@ -241,7 +248,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
                serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR);
                serial_out(up, MTK_UART_ESCAPE_EN, 0x00);
                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-               serial_out(up, UART_EFR, serial_in(up, UART_EFR) &
+               serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) &
                        (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)));
                serial_out(up, UART_LCR, lcr);
                mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI |
@@ -255,8 +262,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
                /*enable hw flow control*/
-               serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC |
-                       (serial_in(up, UART_EFR) &
+               serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC |
+                       (serial_in(up, MTK_UART_EFR) &
                        (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
 
                serial_out(up, UART_LCR, lcr);
@@ -270,12 +277,12 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
                /*enable sw flow control */
-               serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
-                       (serial_in(up, UART_EFR) &
+               serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
+                       (serial_in(up, MTK_UART_EFR) &
                        (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
 
-               serial_out(up, UART_XON1, START_CHAR(port->state->port.tty));
-               serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty));
+               serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty));
+               serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty));
                serial_out(up, UART_LCR, lcr);
                mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI);
                mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI);
@@ -568,6 +575,10 @@ static int mtk8250_probe(struct platform_device *pdev)
                uart.dma = data->dma;
 #endif
 
+       /* Set AP UART new register map */
+       writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase +
+              (MTK_UART_FEATURE_SEL << uart.port.regshift));
+
        /* Disable Rate Fix function */
        writel(0x0, uart.port.membase +
                        (MTK_UART_RATE_FIX << uart.port.regshift));
index 6d70fea76bb3e55a39a7838d10d63a29645beb05..e37a917b9dbbc77f6d69045f8d851fe5a79c7c65 100644 (file)
@@ -471,11 +471,10 @@ static int digicolor_uart_probe(struct platform_device *pdev)
        if (IS_ERR(uart_clk))
                return PTR_ERR(uart_clk);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       dp->port.mapbase = res->start;
-       dp->port.membase = devm_ioremap_resource(&pdev->dev, res);
+       dp->port.membase = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(dp->port.membase))
                return PTR_ERR(dp->port.membase);
+       dp->port.mapbase = res->start;
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
index 87789872f400660cc917331703d787662e35a490..be12fee94db55a8ee1db85836da3f576a5818b7d 100644 (file)
@@ -2664,6 +2664,7 @@ static int lpuart_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct lpuart_port *sport;
        struct resource *res;
+       irq_handler_t handler;
        int ret;
 
        sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
@@ -2741,17 +2742,11 @@ static int lpuart_probe(struct platform_device *pdev)
 
        if (lpuart_is_32(sport)) {
                lpuart_reg.cons = LPUART32_CONSOLE;
-               ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0,
-                                       DRIVER_NAME, sport);
+               handler = lpuart32_int;
        } else {
                lpuart_reg.cons = LPUART_CONSOLE;
-               ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0,
-                                       DRIVER_NAME, sport);
+               handler = lpuart_int;
        }
-
-       if (ret)
-               goto failed_irq_request;
-
        ret = uart_add_one_port(&lpuart_reg, &sport->port);
        if (ret)
                goto failed_attach_port;
@@ -2773,13 +2768,18 @@ static int lpuart_probe(struct platform_device *pdev)
 
        sport->port.rs485_config(&sport->port, &sport->port.rs485);
 
+       ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0,
+                               DRIVER_NAME, sport);
+       if (ret)
+               goto failed_irq_request;
+
        return 0;
 
+failed_irq_request:
 failed_get_rs485:
 failed_reset:
        uart_remove_one_port(&lpuart_reg, &sport->port);
 failed_attach_port:
-failed_irq_request:
        lpuart_disable_clks(sport);
 failed_clock_enable:
 failed_out_of_range:
index 7f2c83f299d3283db6ee3a8f9958b6e33d6c5c5f..eebe782380fb9b55f1efb199615ea07524ca43f2 100644 (file)
@@ -774,6 +774,7 @@ static int wdm_release(struct inode *inode, struct file *file)
                        poison_urbs(desc);
                        spin_lock_irq(&desc->iuspin);
                        desc->resp_count = 0;
+                       clear_bit(WDM_RESPONDING, &desc->flags);
                        spin_unlock_irq(&desc->iuspin);
                        desc->manage_power(desc->intf, 0);
                        unpoison_urbs(desc);
index 71bb5e477dbad77d638dfda9bd0b507c0ae41743..d37965867b230ebfb5e10bfa1e0cc2bd43b132c6 100644 (file)
@@ -890,13 +890,37 @@ static void uvc_function_unbind(struct usb_configuration *c,
 {
        struct usb_composite_dev *cdev = c->cdev;
        struct uvc_device *uvc = to_uvc(f);
+       long wait_ret = 1;
 
        uvcg_info(f, "%s()\n", __func__);
 
+       /* If we know we're connected via v4l2, then there should be a cleanup
+        * of the device from userspace either via UVC_EVENT_DISCONNECT or
+        * though the video device removal uevent. Allow some time for the
+        * application to close out before things get deleted.
+        */
+       if (uvc->func_connected) {
+               uvcg_dbg(f, "waiting for clean disconnect\n");
+               wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
+                               uvc->func_connected == false, msecs_to_jiffies(500));
+               uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret);
+       }
+
        device_remove_file(&uvc->vdev.dev, &dev_attr_function_name);
        video_unregister_device(&uvc->vdev);
        v4l2_device_unregister(&uvc->v4l2_dev);
 
+       if (uvc->func_connected) {
+               /* Wait for the release to occur to ensure there are no longer any
+                * pending operations that may cause panics when resources are cleaned
+                * up.
+                */
+               uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__);
+               wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
+                               uvc->func_connected == false, msecs_to_jiffies(1000));
+               uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret);
+       }
+
        usb_ep_free_request(cdev->gadget->ep0, uvc->control_req);
        kfree(uvc->control_buf);
 
@@ -915,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi)
 
        mutex_init(&uvc->video.mutex);
        uvc->state = UVC_STATE_DISCONNECTED;
+       init_waitqueue_head(&uvc->func_connected_queue);
        opts = fi_to_f_uvc_opts(fi);
 
        mutex_lock(&opts->lock);
index c3607a32b986247ff56a22191eab5f0382d9bb39..886103a1fe9b7d21651da2ecd28c5fd041beb291 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/usb/composite.h>
 #include <linux/videodev2.h>
+#include <linux/wait.h>
 
 #include <media/v4l2-device.h>
 #include <media/v4l2-dev.h>
@@ -129,6 +130,7 @@ struct uvc_device {
        struct usb_function func;
        struct uvc_video video;
        bool func_connected;
+       wait_queue_head_t func_connected_queue;
 
        /* Descriptors */
        struct {
index a2c78690c5c288a3229aa0a87e79ab04e9f50b9e..fd8f73bb726dd1589f885b096562934af511df1f 100644 (file)
@@ -253,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh,
 
 static void uvc_v4l2_disable(struct uvc_device *uvc)
 {
-       uvc->func_connected = false;
        uvc_function_disconnect(uvc);
        uvcg_video_enable(&uvc->video, 0);
        uvcg_free_buffers(&uvc->video.queue);
+       uvc->func_connected = false;
+       wake_up_interruptible(&uvc->func_connected_queue);
 }
 
 static int
index 8d40a1f2ec57d343b0c7f1f68ad3dfe0f377467c..e9440f7bf019dbe0552a80efc4922c6c7b66b199 100644 (file)
@@ -145,6 +145,7 @@ enum dev_state {
        STATE_DEV_INVALID = 0,
        STATE_DEV_OPENED,
        STATE_DEV_INITIALIZED,
+       STATE_DEV_REGISTERING,
        STATE_DEV_RUNNING,
        STATE_DEV_CLOSED,
        STATE_DEV_FAILED
@@ -508,6 +509,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value)
                ret = -EINVAL;
                goto out_unlock;
        }
+       dev->state = STATE_DEV_REGISTERING;
        spin_unlock_irqrestore(&dev->lock, flags);
 
        ret = usb_gadget_probe_driver(&dev->driver);
index 3e487baf84224bacea4cc57cf1eeb960262c20ed..de1b091583183f5c86a90fab95e11eec97bebb4f 100644 (file)
@@ -87,8 +87,6 @@ struct xenhcd_info {
        bool error;
 };
 
-#define GRANT_INVALID_REF 0
-
 #define XENHCD_RING_JIFFIES (HZ/200)
 #define XENHCD_SCAN_JIFFIES 1
 
@@ -1100,19 +1098,10 @@ static void xenhcd_destroy_rings(struct xenhcd_info *info)
                unbind_from_irqhandler(info->irq, info);
        info->irq = 0;
 
-       if (info->urb_ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->urb_ring_ref,
-                                         (unsigned long)info->urb_ring.sring);
-               info->urb_ring_ref = GRANT_INVALID_REF;
-       }
-       info->urb_ring.sring = NULL;
-
-       if (info->conn_ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->conn_ring_ref,
-                                         (unsigned long)info->conn_ring.sring);
-               info->conn_ring_ref = GRANT_INVALID_REF;
-       }
-       info->conn_ring.sring = NULL;
+       xenbus_teardown_ring((void **)&info->urb_ring.sring, 1,
+                            &info->urb_ring_ref);
+       xenbus_teardown_ring((void **)&info->conn_ring.sring, 1,
+                            &info->conn_ring_ref);
 }
 
 static int xenhcd_setup_rings(struct xenbus_device *dev,
@@ -1120,46 +1109,24 @@ static int xenhcd_setup_rings(struct xenbus_device *dev,
 {
        struct xenusb_urb_sring *urb_sring;
        struct xenusb_conn_sring *conn_sring;
-       grant_ref_t gref;
        int err;
 
-       info->urb_ring_ref = GRANT_INVALID_REF;
-       info->conn_ring_ref = GRANT_INVALID_REF;
-
-       urb_sring = (struct xenusb_urb_sring *)get_zeroed_page(
-                                                       GFP_NOIO | __GFP_HIGH);
-       if (!urb_sring) {
-               xenbus_dev_fatal(dev, -ENOMEM, "allocating urb ring");
-               return -ENOMEM;
-       }
-       SHARED_RING_INIT(urb_sring);
-       FRONT_RING_INIT(&info->urb_ring, urb_sring, PAGE_SIZE);
-
-       err = xenbus_grant_ring(dev, urb_sring, 1, &gref);
-       if (err < 0) {
-               free_page((unsigned long)urb_sring);
-               info->urb_ring.sring = NULL;
-               goto fail;
-       }
-       info->urb_ring_ref = gref;
-
-       conn_sring = (struct xenusb_conn_sring *)get_zeroed_page(
-                                                       GFP_NOIO | __GFP_HIGH);
-       if (!conn_sring) {
-               xenbus_dev_fatal(dev, -ENOMEM, "allocating conn ring");
-               err = -ENOMEM;
-               goto fail;
+       info->conn_ring_ref = INVALID_GRANT_REF;
+       err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH,
+                               (void **)&urb_sring, 1, &info->urb_ring_ref);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "allocating urb ring");
+               return err;
        }
-       SHARED_RING_INIT(conn_sring);
-       FRONT_RING_INIT(&info->conn_ring, conn_sring, PAGE_SIZE);
+       XEN_FRONT_RING_INIT(&info->urb_ring, urb_sring, PAGE_SIZE);
 
-       err = xenbus_grant_ring(dev, conn_sring, 1, &gref);
-       if (err < 0) {
-               free_page((unsigned long)conn_sring);
-               info->conn_ring.sring = NULL;
+       err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH,
+                               (void **)&conn_sring, 1, &info->conn_ring_ref);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "allocating conn ring");
                goto fail;
        }
-       info->conn_ring_ref = gref;
+       XEN_FRONT_RING_INIT(&info->conn_ring, conn_sring, PAGE_SIZE);
 
        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err) {
index f3139ce7b0a93df35d7bae8145247fbc15abb507..06a6b19acaae6869f045197aca926d744c645d5b 100644 (file)
 #define HS_BW_BOUNDARY 6144
 /* usb2 spec section11.18.1: at most 188 FS bytes per microframe */
 #define FS_PAYLOAD_MAX 188
-/*
- * max number of microframes for split transfer,
- * for fs isoc in : 1 ss + 1 idle + 7 cs
- */
-#define TT_MICROFRAMES_MAX 9
 
 #define DBG_BUF_EN     64
 
@@ -242,28 +237,17 @@ static void drop_tt(struct usb_device *udev)
 
 static struct mu3h_sch_ep_info *
 create_sch_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev,
-             struct usb_host_endpoint *ep, struct xhci_ep_ctx *ep_ctx)
+             struct usb_host_endpoint *ep)
 {
        struct mu3h_sch_ep_info *sch_ep;
        struct mu3h_sch_bw_info *bw_info;
        struct mu3h_sch_tt *tt = NULL;
-       u32 len_bw_budget_table;
 
        bw_info = get_bw_info(mtk, udev, ep);
        if (!bw_info)
                return ERR_PTR(-ENODEV);
 
-       if (is_fs_or_ls(udev->speed))
-               len_bw_budget_table = TT_MICROFRAMES_MAX;
-       else if ((udev->speed >= USB_SPEED_SUPER)
-                       && usb_endpoint_xfer_isoc(&ep->desc))
-               len_bw_budget_table = get_esit(ep_ctx);
-       else
-               len_bw_budget_table = 1;
-
-       sch_ep = kzalloc(struct_size(sch_ep, bw_budget_table,
-                                    len_bw_budget_table),
-                        GFP_KERNEL);
+       sch_ep = kzalloc(sizeof(*sch_ep), GFP_KERNEL);
        if (!sch_ep)
                return ERR_PTR(-ENOMEM);
 
@@ -295,8 +279,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
        u32 mult;
        u32 esit_pkts;
        u32 max_esit_payload;
-       u32 *bwb_table = sch_ep->bw_budget_table;
-       int i;
 
        ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2));
        maxpkt = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
@@ -332,7 +314,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                 */
                sch_ep->pkts = max_burst + 1;
                sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
-               bwb_table[0] = sch_ep->bw_cost_per_microframe;
        } else if (sch_ep->speed >= USB_SPEED_SUPER) {
                /* usb3_r1 spec section4.4.7 & 4.4.8 */
                sch_ep->cs_count = 0;
@@ -349,7 +330,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) {
                        sch_ep->pkts = esit_pkts;
                        sch_ep->num_budget_microframes = 1;
-                       bwb_table[0] = maxpkt * sch_ep->pkts;
                }
 
                if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) {
@@ -366,15 +346,8 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                                DIV_ROUND_UP(esit_pkts, sch_ep->pkts);
 
                        sch_ep->repeat = !!(sch_ep->num_budget_microframes > 1);
-                       sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
-
-                       for (i = 0; i < sch_ep->num_budget_microframes - 1; i++)
-                               bwb_table[i] = sch_ep->bw_cost_per_microframe;
-
-                       /* last one <= bw_cost_per_microframe */
-                       bwb_table[i] = maxpkt * esit_pkts
-                                      - i * sch_ep->bw_cost_per_microframe;
                }
+               sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
        } else if (is_fs_or_ls(sch_ep->speed)) {
                sch_ep->pkts = 1; /* at most one packet for each microframe */
 
@@ -384,28 +357,7 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                 */
                sch_ep->cs_count = DIV_ROUND_UP(maxpkt, FS_PAYLOAD_MAX);
                sch_ep->num_budget_microframes = sch_ep->cs_count;
-               sch_ep->bw_cost_per_microframe =
-                       (maxpkt < FS_PAYLOAD_MAX) ? maxpkt : FS_PAYLOAD_MAX;
-
-               /* init budget table */
-               if (ep_type == ISOC_OUT_EP) {
-                       for (i = 0; i < sch_ep->num_budget_microframes; i++)
-                               bwb_table[i] =  sch_ep->bw_cost_per_microframe;
-               } else if (ep_type == INT_OUT_EP) {
-                       /* only first one consumes bandwidth, others as zero */
-                       bwb_table[0] = sch_ep->bw_cost_per_microframe;
-               } else { /* INT_IN_EP or ISOC_IN_EP */
-                       bwb_table[0] = 0; /* start split */
-                       bwb_table[1] = 0; /* idle */
-                       /*
-                        * due to cs_count will be updated according to cs
-                        * position, assign all remainder budget array
-                        * elements as @bw_cost_per_microframe, but only first
-                        * @num_budget_microframes elements will be used later
-                        */
-                       for (i = 2; i < TT_MICROFRAMES_MAX; i++)
-                               bwb_table[i] =  sch_ep->bw_cost_per_microframe;
-               }
+               sch_ep->bw_cost_per_microframe = min_t(u32, maxpkt, FS_PAYLOAD_MAX);
        }
 }
 
@@ -422,7 +374,7 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
 
                for (j = 0; j < sch_ep->num_budget_microframes; j++) {
                        k = XHCI_MTK_BW_INDEX(base + j);
-                       bw = sch_bw->bus_bw[k] + sch_ep->bw_budget_table[j];
+                       bw = sch_bw->bus_bw[k] + sch_ep->bw_cost_per_microframe;
                        if (bw > max_bw)
                                max_bw = bw;
                }
@@ -433,18 +385,16 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
 static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw,
        struct mu3h_sch_ep_info *sch_ep, bool used)
 {
+       int bw_updated;
        u32 base;
-       int i, j, k;
+       int i, j;
+
+       bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
 
        for (i = 0; i < sch_ep->num_esit; i++) {
                base = sch_ep->offset + i * sch_ep->esit;
-               for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-                       k = XHCI_MTK_BW_INDEX(base + j);
-                       if (used)
-                               sch_bw->bus_bw[k] += sch_ep->bw_budget_table[j];
-                       else
-                               sch_bw->bus_bw[k] -= sch_ep->bw_budget_table[j];
-               }
+               for (j = 0; j < sch_ep->num_budget_microframes; j++)
+                       sch_bw->bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
        }
 }
 
@@ -464,7 +414,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset)
                 */
                for (j = 0; j < sch_ep->num_budget_microframes; j++) {
                        k = XHCI_MTK_BW_INDEX(base + j);
-                       tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j];
+                       tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe;
                        if (tmp > FS_PAYLOAD_MAX)
                                return -ESCH_BW_OVERFLOW;
                }
@@ -538,19 +488,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
 static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
 {
        struct mu3h_sch_tt *tt = sch_ep->sch_tt;
+       int bw_updated;
        u32 base;
-       int i, j, k;
+       int i, j;
+
+       bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
 
        for (i = 0; i < sch_ep->num_esit; i++) {
                base = sch_ep->offset + i * sch_ep->esit;
 
-               for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-                       k = XHCI_MTK_BW_INDEX(base + j);
-                       if (used)
-                               tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j];
-                       else
-                               tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j];
-               }
+               for (j = 0; j < sch_ep->num_budget_microframes; j++)
+                       tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
        }
 
        if (used)
@@ -710,7 +658,7 @@ static int add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
 
        xhci_dbg(xhci, "%s %s\n", __func__, decode_ep(ep, udev->speed));
 
-       sch_ep = create_sch_ep(mtk, udev, ep, ep_ctx);
+       sch_ep = create_sch_ep(mtk, udev, ep);
        if (IS_ERR_OR_NULL(sch_ep))
                return -ENOMEM;
 
index ffd4b493b4ba74e6802bde3be624cf064d61c3d2..1174a510dd38810fd7403a909024d21cfc8b8713 100644 (file)
@@ -83,7 +83,6 @@ struct mu3h_sch_bw_info {
  *             times; 1: distribute the (bMaxBurst+1)*(Mult+1) packets
  *             according to @pkts and @repeat. normal mode is used by
  *             default
- * @bw_budget_table: table to record bandwidth budget per microframe
  */
 struct mu3h_sch_ep_info {
        u32 esit;
@@ -109,7 +108,6 @@ struct mu3h_sch_ep_info {
        u32 pkts;
        u32 cs_count;
        u32 burst_mode;
-       u32 bw_budget_table[];
 };
 
 #define MU3C_U3_PORT_MAX 4
index 1364ce7f0abf09369c5cc8dfe60121e9cfddbc8a..152ad882657d7dcc23a8b46da1a4aca764451382 100644 (file)
@@ -2123,10 +2123,14 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(3) },
        { USB_DEVICE(0x1508, 0x1001),                                           /* Fibocom NL668 (IOT version) */
          .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
+       { USB_DEVICE(0x1782, 0x4d10) },                                         /* Fibocom L610 (AT mode) */
+       { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) },                   /* Fibocom L610 (ECM/RNDIS mode) */
        { USB_DEVICE(0x2cb7, 0x0104),                                           /* Fibocom NL678 series */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),                     /* Fibocom NL678 series */
          .driver_info = RSVD(6) },
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) },                   /* Fibocom MA510 (ECM mode w/ diag intf.) */
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) },                   /* Fibocom MA510 (ECM mode) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) },    /* Fibocom FG150 Diag */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) },          /* Fibocom FG150 AT */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },                   /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
index 88b284d61681ae76d0eec83f32d7eca0c53d662d..1d878d05a6584d473ab143fba8d39ce9269baaaa 100644 (file)
@@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) },
+       { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) },
        { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) },
index c5406452b774ef9dc6cdc986f0aa4ce6ead1b866..732f9b13ad5d59cfbff2edb9c6e4dc266eeb63df 100644 (file)
 #define HP_TD620_PRODUCT_ID    0x0956
 #define HP_LD960_PRODUCT_ID    0x0b39
 #define HP_LD381_PRODUCT_ID    0x0f7f
+#define HP_LM930_PRODUCT_ID    0x0f9b
 #define HP_LCM220_PRODUCT_ID   0x3139
 #define HP_LCM960_PRODUCT_ID   0x3239
 #define HP_LD220_PRODUCT_ID    0x3524
index c18bf8164bc2e9676247f36e672f2c9cea489e30..586ef5551e76e41e5ac0a552429e3979bb03185b 100644 (file)
@@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = {
        {DEVICE_SWI(0x1199, 0x9090)},   /* Sierra Wireless EM7565 QDL */
        {DEVICE_SWI(0x1199, 0x9091)},   /* Sierra Wireless EM7565 */
        {DEVICE_SWI(0x1199, 0x90d2)},   /* Sierra Wireless EM9191 QDL */
+       {DEVICE_SWI(0x1199, 0xc080)},   /* Sierra Wireless EM7590 QDL */
+       {DEVICE_SWI(0x1199, 0xc081)},   /* Sierra Wireless EM7590 */
        {DEVICE_SWI(0x413c, 0x81a2)},   /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a3)},   /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a4)},   /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
index e07d26a3cd8e1d5d75bbdb50cd11243137439409..f33e08eb767094cad85b499e0f283cbd803c6848 100644 (file)
@@ -877,7 +877,7 @@ static int tcpci_remove(struct i2c_client *client)
        /* Disable chip interrupts before unregistering port */
        err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0);
        if (err < 0)
-               return err;
+               dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err));
 
        tcpci_unregister_port(chip->tcpci);
 
index f1bd9e09bc87f191962c65b43b904d32ffabe796..8a952eaf9016323aa95af33151e6776d0340c827 100644 (file)
@@ -15,6 +15,9 @@
 
 #include "tcpci.h"
 
+#define MT6360_REG_PHYCTRL1    0x80
+#define MT6360_REG_PHYCTRL3    0x82
+#define MT6360_REG_PHYCTRL7    0x86
 #define MT6360_REG_VCONNCTRL1  0x8C
 #define MT6360_REG_MODECTRL2   0x8F
 #define MT6360_REG_SWRESET     0xA0
@@ -22,6 +25,8 @@
 #define MT6360_REG_DRPCTRL1    0xA2
 #define MT6360_REG_DRPCTRL2    0xA3
 #define MT6360_REG_I2CTORST    0xBF
+#define MT6360_REG_PHYCTRL11   0xCA
+#define MT6360_REG_RXCTRL1     0xCE
 #define MT6360_REG_RXCTRL2     0xCF
 #define MT6360_REG_CTDCTRL2    0xEC
 
@@ -106,6 +111,27 @@ static int mt6360_tcpc_init(struct tcpci *tcpci, struct tcpci_data *tdata)
        if (ret)
                return ret;
 
+       /* BMC PHY */
+       ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL1, 0x3A70);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(regmap, MT6360_REG_PHYCTRL3,  0x82);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(regmap, MT6360_REG_PHYCTRL7, 0x36);
+       if (ret)
+               return ret;
+
+       ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL11, 0x3C60);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(regmap, MT6360_REG_RXCTRL1, 0xE8);
+       if (ret)
+               return ret;
+
        /* Set shipping mode off, AUTOIDLE on */
        return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A);
 }
index 6771f05e32c290cb12f033b31ce49002a0f5b1c6..8873c1644a2953d008f52f69f5435cc28d7778eb 100644 (file)
@@ -19,7 +19,7 @@
 struct ucsi_acpi {
        struct device *dev;
        struct ucsi *ucsi;
-       void __iomem *base;
+       void *base;
        struct completion complete;
        unsigned long flags;
        guid_t guid;
@@ -51,7 +51,7 @@ static int ucsi_acpi_read(struct ucsi *ucsi, unsigned int offset,
        if (ret)
                return ret;
 
-       memcpy(val, (const void __force *)(ua->base + offset), val_len);
+       memcpy(val, ua->base + offset, val_len);
 
        return 0;
 }
@@ -61,7 +61,7 @@ static int ucsi_acpi_async_write(struct ucsi *ucsi, unsigned int offset,
 {
        struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
 
-       memcpy((void __force *)(ua->base + offset), val, val_len);
+       memcpy(ua->base + offset, val, val_len);
 
        return ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_WRITE);
 }
@@ -132,20 +132,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev)
                return -ENODEV;
        }
 
-       /* This will make sure we can use ioremap() */
-       status = acpi_release_memory(ACPI_HANDLE(&pdev->dev), res, 1);
-       if (ACPI_FAILURE(status))
-               return -ENOMEM;
-
-       /*
-        * NOTE: The memory region for the data structures is used also in an
-        * operation region, which means ACPI has already reserved it. Therefore
-        * it can not be requested here, and we can not use
-        * devm_ioremap_resource().
-        */
-       ua->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
-       if (!ua->base)
-               return -ENOMEM;
+       ua->base = devm_memremap(&pdev->dev, res->start, resource_size(res), MEMREMAP_WB);
+       if (IS_ERR(ua->base))
+               return PTR_ERR(ua->base);
 
        ret = guid_parse(UCSI_DSM_UUID, &ua->guid);
        if (ret)
index 79001301b383218b990008714e8613de4903018a..e0de44000d92d177a8155174b49061a73b13002f 100644 (file)
@@ -161,6 +161,7 @@ struct mlx5_vdpa_net {
        struct mlx5_flow_handle *rx_rule_mcast;
        bool setup;
        u32 cur_num_vqs;
+       u32 rqt_size;
        struct notifier_block nb;
        struct vdpa_callback config_cb;
        struct mlx5_vdpa_wq_ent cvq_ent;
@@ -204,17 +205,12 @@ static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
        return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
 }
 
-static inline u32 mlx5_vdpa_max_qps(int max_vqs)
-{
-       return max_vqs / 2;
-}
-
 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
 {
        if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
                return 2;
 
-       return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
+       return mvdev->max_vqs;
 }
 
 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
@@ -1236,25 +1232,13 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
 static int create_rqt(struct mlx5_vdpa_net *ndev)
 {
        __be32 *list;
-       int max_rqt;
        void *rqtc;
        int inlen;
        void *in;
        int i, j;
        int err;
-       int num;
-
-       if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
-               num = 1;
-       else
-               num = ndev->cur_num_vqs / 2;
 
-       max_rqt = min_t(int, roundup_pow_of_two(num),
-                       1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
-       if (max_rqt < 1)
-               return -EOPNOTSUPP;
-
-       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
        in = kzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
@@ -1263,12 +1247,12 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 
        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
-       MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
+       MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size);
        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
-       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
-               list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
+       for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
+               list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
 
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
        kfree(in);
        if (err)
@@ -1282,19 +1266,13 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
 {
        __be32 *list;
-       int max_rqt;
        void *rqtc;
        int inlen;
        void *in;
        int i, j;
        int err;
 
-       max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2),
-                       1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
-       if (max_rqt < 1)
-               return -EOPNOTSUPP;
-
-       inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+       inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
        in = kzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
@@ -1305,10 +1283,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
 
        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
-       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
+       for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
                list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
 
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
        err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
        kfree(in);
        if (err)
@@ -1625,7 +1603,7 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
 
                newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
                if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
-                   newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
+                   newqps > ndev->rqt_size)
                        break;
 
                if (ndev->cur_num_vqs == 2 * newqps) {
@@ -1989,7 +1967,7 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
        int err;
        int i;
 
-       for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
+       for (i = 0; i < mvdev->max_vqs; i++) {
                err = setup_vq(ndev, &ndev->vqs[i]);
                if (err)
                        goto err_vq;
@@ -2060,9 +2038,11 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
 
        ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
        if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
-               ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
+               ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
        else
-               ndev->cur_num_vqs = 2;
+               ndev->rqt_size = 1;
+
+       ndev->cur_num_vqs = 2 * ndev->rqt_size;
 
        update_cvq_info(mvdev);
        return err;
@@ -2529,7 +2509,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
        struct mlx5_vdpa_virtqueue *mvq;
        int i;
 
-       for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
+       for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
                mvq = &ndev->vqs[i];
                memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
                mvq->index = i;
@@ -2671,7 +2651,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
                return -EOPNOTSUPP;
        }
 
-       max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
+       max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
+                       1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
        if (max_vqs < 2) {
                dev_warn(mdev->device,
                         "%d virtqueues are supported. At least 2 are required\n",
@@ -2742,7 +2723,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
                ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
        }
 
-       config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
+       config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
        mvdev->vdev.dma_dev = &mdev->pdev->dev;
        err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
        if (err)
@@ -2769,7 +2750,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
        ndev->nb.notifier_call = event_handler;
        mlx5_notifier_register(mdev, &ndev->nb);
        mvdev->vdev.mdev = &mgtdev->mgtdev;
-       err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1);
+       err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
        if (err)
                goto err_reg;
 
index 792ab5f2364713117914e8e42ccbcb326a194d08..297b5db474545e37619181549618c845a19ba51a 100644 (file)
@@ -1450,13 +1450,9 @@ err:
        return ERR_PTR(r);
 }
 
-static struct ptr_ring *get_tap_ptr_ring(int fd)
+static struct ptr_ring *get_tap_ptr_ring(struct file *file)
 {
        struct ptr_ring *ring;
-       struct file *file = fget(fd);
-
-       if (!file)
-               return NULL;
        ring = tun_get_tx_ring(file);
        if (!IS_ERR(ring))
                goto out;
@@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd)
                goto out;
        ring = NULL;
 out:
-       fput(file);
        return ring;
 }
 
@@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
                r = vhost_net_enable_vq(n, vq);
                if (r)
                        goto err_used;
-               if (index == VHOST_NET_VQ_RX)
-                       nvq->rx_ring = get_tap_ptr_ring(fd);
+               if (index == VHOST_NET_VQ_RX) {
+                       if (sock)
+                               nvq->rx_ring = get_tap_ptr_ring(sock->file);
+                       else
+                               nvq->rx_ring = NULL;
+               }
 
                oldubufs = nvq->ubufs;
                nvq->ubufs = ubufs;
index 97eb0dee411cf578a7971222299210cf0ab8f41a..a6bb0e4382167e121c49bfeb0445197eab5ecea4 100644 (file)
@@ -1434,10 +1434,7 @@ fb_release(struct inode *inode, struct file *file)
 __acquires(&info->lock)
 __releases(&info->lock)
 {
-       struct fb_info * const info = file_fb_info(file);
-
-       if (!info)
-               return -ENODEV;
+       struct fb_info * const info = file->private_data;
 
        lock_fb_info(info);
        if (info->fbops->fb_release)
index 26892940c21369bb3f62eb1ae863960f7d348d45..82e31a2d845e1925eca50a26c25d474001a57c18 100644 (file)
@@ -80,6 +80,10 @@ void framebuffer_release(struct fb_info *info)
 {
        if (!info)
                return;
+
+       if (WARN_ON(refcount_read(&info->count)))
+               return;
+
        kfree(info->apertures);
        kfree(info);
 }
index ea42ba6445b2ddaad7cd1a699ff793dec1bceb4c..b3d5f884c5445b68759b8f18bc7ef8047e10ec30 100644 (file)
@@ -243,6 +243,10 @@ error:
 static inline void efifb_show_boot_graphics(struct fb_info *info) {}
 #endif
 
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
 static void efifb_destroy(struct fb_info *info)
 {
        if (efifb_pci_dev)
@@ -254,10 +258,13 @@ static void efifb_destroy(struct fb_info *info)
                else
                        memunmap(info->screen_base);
        }
+
        if (request_mem_succeeded)
                release_mem_region(info->apertures->ranges[0].base,
                                   info->apertures->ranges[0].size);
        fb_dealloc_cmap(&info->cmap);
+
+       framebuffer_release(info);
 }
 
 static const struct fb_ops efifb_ops = {
@@ -620,9 +627,9 @@ static int efifb_remove(struct platform_device *pdev)
 {
        struct fb_info *info = platform_get_drvdata(pdev);
 
+       /* efifb_destroy takes care of info cleanup */
        unregister_framebuffer(info);
        sysfs_remove_groups(&pdev->dev.kobj, efifb_groups);
-       framebuffer_release(info);
 
        return 0;
 }
index 94fc9c6d04113661e57a7f5f7b43f88f17beb5b8..2c198561c338f74fbb2e34a85aa22f70b36faa3b 100644 (file)
@@ -84,6 +84,10 @@ struct simplefb_par {
 static void simplefb_clocks_destroy(struct simplefb_par *par);
 static void simplefb_regulators_destroy(struct simplefb_par *par);
 
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
 static void simplefb_destroy(struct fb_info *info)
 {
        struct simplefb_par *par = info->par;
@@ -94,6 +98,8 @@ static void simplefb_destroy(struct fb_info *info)
        if (info->screen_base)
                iounmap(info->screen_base);
 
+       framebuffer_release(info);
+
        if (mem)
                release_mem_region(mem->start, resource_size(mem));
 }
@@ -545,8 +551,8 @@ static int simplefb_remove(struct platform_device *pdev)
 {
        struct fb_info *info = platform_get_drvdata(pdev);
 
+       /* simplefb_destroy takes care of info cleanup */
        unregister_framebuffer(info);
-       framebuffer_release(info);
 
        return 0;
 }
index df6de5a9dd4cd9981ac11cc70e38e4309dde7cc0..e25e8de5ff672de983837762f39ef9f8034a7a6f 100644 (file)
@@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
        return err;
 }
 
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
 static void vesafb_destroy(struct fb_info *info)
 {
        struct vesafb_par *par = info->par;
@@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info)
        if (info->screen_base)
                iounmap(info->screen_base);
        release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+
+       framebuffer_release(info);
 }
 
 static struct fb_ops vesafb_ops = {
@@ -484,10 +490,10 @@ static int vesafb_remove(struct platform_device *pdev)
 {
        struct fb_info *info = platform_get_drvdata(pdev);
 
+       /* vesafb_destroy takes care of info cleanup */
        unregister_framebuffer(info);
        if (((struct vesafb_par *)(info->par))->region)
                release_region(0x3c0, 32);
-       framebuffer_release(info);
 
        return 0;
 }
index 121b9293c7375f963f8f85cab6086c2e5688628e..87ef258cec64839802079305a79bcd167f2c0df3 100644 (file)
@@ -47,4 +47,9 @@ source "drivers/virt/vboxguest/Kconfig"
 source "drivers/virt/nitro_enclaves/Kconfig"
 
 source "drivers/virt/acrn/Kconfig"
+
+source "drivers/virt/coco/efi_secret/Kconfig"
+
+source "drivers/virt/coco/sev-guest/Kconfig"
+
 endif
index 108d0ffcc9aa98ea5de1f6095f6cf6f41b9f007e..093674e05c40f2b5ba5fd9629e8c6a306d678cd1 100644 (file)
@@ -9,3 +9,5 @@ obj-y                           += vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)   += nitro_enclaves/
 obj-$(CONFIG_ACRN_HSM)         += acrn/
+obj-$(CONFIG_EFI_SECRET)       += coco/efi_secret/
+obj-$(CONFIG_SEV_GUEST)                += coco/sev-guest/
diff --git a/drivers/virt/coco/efi_secret/Kconfig b/drivers/virt/coco/efi_secret/Kconfig
new file mode 100644 (file)
index 0000000..4404d19
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config EFI_SECRET
+       tristate "EFI secret area securityfs support"
+       depends on EFI && X86_64
+       select EFI_COCO_SECRET
+       select SECURITYFS
+       help
+         This is a driver for accessing the EFI secret area via securityfs.
+         The EFI secret area is a memory area designated by the firmware for
+         confidential computing secret injection (for example for AMD SEV
+         guests).  The driver exposes the secrets as files in
+         <securityfs>/secrets/coco.  Files can be read and deleted (deleting
+         a file wipes the secret from memory).
+
+         To compile this driver as a module, choose M here.
+         The module will be called efi_secret.
diff --git a/drivers/virt/coco/efi_secret/Makefile b/drivers/virt/coco/efi_secret/Makefile
new file mode 100644 (file)
index 0000000..c7047ce
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_EFI_SECRET) += efi_secret.o
diff --git a/drivers/virt/coco/efi_secret/efi_secret.c b/drivers/virt/coco/efi_secret/efi_secret.c
new file mode 100644 (file)
index 0000000..e700a5e
--- /dev/null
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * efi_secret module
+ *
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Dov Murik <dovmurik@linux.ibm.com>
+ */
+
+/**
+ * DOC: efi_secret: Allow reading EFI confidential computing (coco) secret area
+ * via securityfs interface.
+ *
+ * When the module is loaded (and securityfs is mounted, typically under
+ * /sys/kernel/security), a "secrets/coco" directory is created in securityfs.
+ * In it, a file is created for each secret entry.  The name of each such file
+ * is the GUID of the secret entry, and its content is the secret data.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/security.h>
+#include <linux/efi.h>
+#include <linux/cacheflush.h>
+
+#define EFI_SECRET_NUM_FILES 64
+
+struct efi_secret {
+       struct dentry *secrets_dir;
+       struct dentry *fs_dir;
+       struct dentry *fs_files[EFI_SECRET_NUM_FILES];
+       void __iomem *secret_data;
+       u64 secret_data_len;
+};
+
+/*
+ * Structure of the EFI secret area
+ *
+ * Offset   Length
+ * (bytes)  (bytes)  Usage
+ * -------  -------  -----
+ *       0       16  Secret table header GUID (must be 1e74f542-71dd-4d66-963e-ef4287ff173b)
+ *      16        4  Length of bytes of the entire secret area
+ *
+ *      20       16  First secret entry's GUID
+ *      36        4  First secret entry's length in bytes (= 16 + 4 + x)
+ *      40        x  First secret entry's data
+ *
+ *    40+x       16  Second secret entry's GUID
+ *    56+x        4  Second secret entry's length in bytes (= 16 + 4 + y)
+ *    60+x        y  Second secret entry's data
+ *
+ * (... and so on for additional entries)
+ *
+ * The GUID of each secret entry designates the usage of the secret data.
+ */
+
+/**
+ * struct secret_header - Header of entire secret area; this should be followed
+ * by instances of struct secret_entry.
+ * @guid:      Must be EFI_SECRET_TABLE_HEADER_GUID
+ * @len:       Length in bytes of entire secret area, including header
+ */
+struct secret_header {
+       efi_guid_t guid;
+       u32 len;
+} __attribute((packed));
+
+/**
+ * struct secret_entry - Holds one secret entry
+ * @guid:      Secret-specific GUID (or NULL_GUID if this secret entry was deleted)
+ * @len:       Length of secret entry, including its guid and len fields
+ * @data:      The secret data (full of zeros if this secret entry was deleted)
+ */
+struct secret_entry {
+       efi_guid_t guid;
+       u32 len;
+       u8 data[];
+} __attribute((packed));
+
+static size_t secret_entry_data_len(struct secret_entry *e)
+{
+       return e->len - sizeof(*e);
+}
+
+static struct efi_secret the_efi_secret;
+
+static inline struct efi_secret *efi_secret_get(void)
+{
+       return &the_efi_secret;
+}
+
+static int efi_secret_bin_file_show(struct seq_file *file, void *data)
+{
+       struct secret_entry *e = file->private;
+
+       if (e)
+               seq_write(file, e->data, secret_entry_data_len(e));
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(efi_secret_bin_file);
+
+/*
+ * Overwrite memory content with zeroes, and ensure that dirty cache lines are
+ * actually written back to memory, to clear out the secret.
+ */
+static void wipe_memory(void *addr, size_t size)
+{
+       memzero_explicit(addr, size);
+#ifdef CONFIG_X86
+       clflush_cache_range(addr, size);
+#endif
+}
+
+static int efi_secret_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct efi_secret *s = efi_secret_get();
+       struct inode *inode = d_inode(dentry);
+       struct secret_entry *e = (struct secret_entry *)inode->i_private;
+       int i;
+
+       if (e) {
+               /* Zero out the secret data */
+               wipe_memory(e->data, secret_entry_data_len(e));
+               e->guid = NULL_GUID;
+       }
+
+       inode->i_private = NULL;
+
+       for (i = 0; i < EFI_SECRET_NUM_FILES; i++)
+               if (s->fs_files[i] == dentry)
+                       s->fs_files[i] = NULL;
+
+       /*
+        * securityfs_remove tries to lock the directory's inode, but we reach
+        * the unlink callback when it's already locked
+        */
+       inode_unlock(dir);
+       securityfs_remove(dentry);
+       inode_lock(dir);
+
+       return 0;
+}
+
+static const struct inode_operations efi_secret_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .unlink         = efi_secret_unlink,
+};
+
+static int efi_secret_map_area(struct platform_device *dev)
+{
+       int ret;
+       struct efi_secret *s = efi_secret_get();
+       struct linux_efi_coco_secret_area *secret_area;
+
+       if (efi.coco_secret == EFI_INVALID_TABLE_ADDR) {
+               dev_err(&dev->dev, "Secret area address is not available\n");
+               return -EINVAL;
+       }
+
+       secret_area = memremap(efi.coco_secret, sizeof(*secret_area), MEMREMAP_WB);
+       if (secret_area == NULL) {
+               dev_err(&dev->dev, "Could not map secret area EFI config entry\n");
+               return -ENOMEM;
+       }
+       if (!secret_area->base_pa || secret_area->size < sizeof(struct secret_header)) {
+               dev_err(&dev->dev,
+                       "Invalid secret area memory location (base_pa=0x%llx size=0x%llx)\n",
+                       secret_area->base_pa, secret_area->size);
+               ret = -EINVAL;
+               goto unmap;
+       }
+
+       s->secret_data = ioremap_encrypted(secret_area->base_pa, secret_area->size);
+       if (s->secret_data == NULL) {
+               dev_err(&dev->dev, "Could not map secret area\n");
+               ret = -ENOMEM;
+               goto unmap;
+       }
+
+       s->secret_data_len = secret_area->size;
+       ret = 0;
+
+unmap:
+       memunmap(secret_area);
+       return ret;
+}
+
+static void efi_secret_securityfs_teardown(struct platform_device *dev)
+{
+       struct efi_secret *s = efi_secret_get();
+       int i;
+
+       for (i = (EFI_SECRET_NUM_FILES - 1); i >= 0; i--) {
+               securityfs_remove(s->fs_files[i]);
+               s->fs_files[i] = NULL;
+       }
+
+       securityfs_remove(s->fs_dir);
+       s->fs_dir = NULL;
+
+       securityfs_remove(s->secrets_dir);
+       s->secrets_dir = NULL;
+
+       dev_dbg(&dev->dev, "Removed securityfs entries\n");
+}
+
+static int efi_secret_securityfs_setup(struct platform_device *dev)
+{
+       struct efi_secret *s = efi_secret_get();
+       int ret = 0, i = 0, bytes_left;
+       unsigned char *ptr;
+       struct secret_header *h;
+       struct secret_entry *e;
+       struct dentry *dent;
+       char guid_str[EFI_VARIABLE_GUID_LEN + 1];
+
+       ptr = (void __force *)s->secret_data;
+       h = (struct secret_header *)ptr;
+       if (efi_guidcmp(h->guid, EFI_SECRET_TABLE_HEADER_GUID)) {
+               /*
+                * This is not an error: it just means that EFI defines secret
+                * area but it was not populated by the Guest Owner.
+                */
+               dev_dbg(&dev->dev, "EFI secret area does not start with correct GUID\n");
+               return -ENODEV;
+       }
+       if (h->len < sizeof(*h)) {
+               dev_err(&dev->dev, "EFI secret area reported length is too small\n");
+               return -EINVAL;
+       }
+       if (h->len > s->secret_data_len) {
+               dev_err(&dev->dev, "EFI secret area reported length is too big\n");
+               return -EINVAL;
+       }
+
+       s->secrets_dir = NULL;
+       s->fs_dir = NULL;
+       memset(s->fs_files, 0, sizeof(s->fs_files));
+
+       dent = securityfs_create_dir("secrets", NULL);
+       if (IS_ERR(dent)) {
+               dev_err(&dev->dev, "Error creating secrets securityfs directory entry err=%ld\n",
+                       PTR_ERR(dent));
+               return PTR_ERR(dent);
+       }
+       s->secrets_dir = dent;
+
+       dent = securityfs_create_dir("coco", s->secrets_dir);
+       if (IS_ERR(dent)) {
+               dev_err(&dev->dev, "Error creating coco securityfs directory entry err=%ld\n",
+                       PTR_ERR(dent));
+               return PTR_ERR(dent);
+       }
+       d_inode(dent)->i_op = &efi_secret_dir_inode_operations;
+       s->fs_dir = dent;
+
+       bytes_left = h->len - sizeof(*h);
+       ptr += sizeof(*h);
+       while (bytes_left >= (int)sizeof(*e) && i < EFI_SECRET_NUM_FILES) {
+               e = (struct secret_entry *)ptr;
+               if (e->len < sizeof(*e) || e->len > (unsigned int)bytes_left) {
+                       dev_err(&dev->dev, "EFI secret area is corrupted\n");
+                       ret = -EINVAL;
+                       goto err_cleanup;
+               }
+
+               /* Skip deleted entries (which will have NULL_GUID) */
+               if (efi_guidcmp(e->guid, NULL_GUID)) {
+                       efi_guid_to_str(&e->guid, guid_str);
+
+                       dent = securityfs_create_file(guid_str, 0440, s->fs_dir, (void *)e,
+                                                     &efi_secret_bin_file_fops);
+                       if (IS_ERR(dent)) {
+                               dev_err(&dev->dev, "Error creating efi_secret securityfs entry\n");
+                               ret = PTR_ERR(dent);
+                               goto err_cleanup;
+                       }
+
+                       s->fs_files[i++] = dent;
+               }
+               ptr += e->len;
+               bytes_left -= e->len;
+       }
+
+       dev_info(&dev->dev, "Created %d entries in securityfs secrets/coco\n", i);
+       return 0;
+
+err_cleanup:
+       efi_secret_securityfs_teardown(dev);
+       return ret;
+}
+
+static void efi_secret_unmap_area(void)
+{
+       struct efi_secret *s = efi_secret_get();
+
+       if (s->secret_data) {
+               iounmap(s->secret_data);
+               s->secret_data = NULL;
+               s->secret_data_len = 0;
+       }
+}
+
+static int efi_secret_probe(struct platform_device *dev)
+{
+       int ret;
+
+       ret = efi_secret_map_area(dev);
+       if (ret)
+               return ret;
+
+       ret = efi_secret_securityfs_setup(dev);
+       if (ret)
+               goto err_unmap;
+
+       return ret;
+
+err_unmap:
+       efi_secret_unmap_area();
+       return ret;
+}
+
+static int efi_secret_remove(struct platform_device *dev)
+{
+       efi_secret_securityfs_teardown(dev);
+       efi_secret_unmap_area();
+       return 0;
+}
+
+static struct platform_driver efi_secret_driver = {
+       .probe = efi_secret_probe,
+       .remove = efi_secret_remove,
+       .driver = {
+               .name = "efi_secret",
+       },
+};
+
+module_platform_driver(efi_secret_driver);
+
+MODULE_DESCRIPTION("Confidential computing EFI secret area access");
+MODULE_AUTHOR("IBM");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:efi_secret");
diff --git a/drivers/virt/coco/sev-guest/Kconfig b/drivers/virt/coco/sev-guest/Kconfig
new file mode 100644 (file)
index 0000000..f9db079
--- /dev/null
@@ -0,0 +1,14 @@
+config SEV_GUEST
+       tristate "AMD SEV Guest driver"
+       default m
+       depends on AMD_MEM_ENCRYPT
+       select CRYPTO_AEAD2
+       select CRYPTO_GCM
+       help
+         SEV-SNP firmware provides the guest a mechanism to communicate with
+         the PSP without risk from a malicious hypervisor who wishes to read,
+         alter, drop or replay the messages sent. The driver provides
+         userspace interface to communicate with the PSP to request the
+         attestation report and more.
+
+         If you choose 'M' here, this module will be called sev-guest.
similarity index 50%
rename from arch/s390/boot/compressed/.gitignore
rename to drivers/virt/coco/sev-guest/Makefile
index 01d93832cf4acc9f81cf68618f4ce4205b51831a..63d67c27723a6060a329d2aea5794ba118df6c11 100644 (file)
@@ -1,4 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-vmlinux
-vmlinux.lds
-vmlinux.syms
+obj-$(CONFIG_SEV_GUEST) += sev-guest.o
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
new file mode 100644 (file)
index 0000000..90ce16b
--- /dev/null
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Encrypted Virtualization (SEV) guest driver interface
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/set_memory.h>
+#include <linux/fs.h>
+#include <crypto/aead.h>
+#include <linux/scatterlist.h>
+#include <linux/psp-sev.h>
+#include <uapi/linux/sev-guest.h>
+#include <uapi/linux/psp-sev.h>
+
+#include <asm/svm.h>
+#include <asm/sev.h>
+
+#include "sev-guest.h"
+
+#define DEVICE_NAME    "sev-guest"
+#define AAD_LEN                48
+#define MSG_HDR_VER    1
+
+struct snp_guest_crypto {
+       struct crypto_aead *tfm;
+       u8 *iv, *authtag;
+       int iv_len, a_len;
+};
+
+struct snp_guest_dev {
+       struct device *dev;
+       struct miscdevice misc;
+
+       void *certs_data;
+       struct snp_guest_crypto *crypto;
+       struct snp_guest_msg *request, *response;
+       struct snp_secrets_page_layout *layout;
+       struct snp_req_data input;
+       u32 *os_area_msg_seqno;
+       u8 *vmpck;
+};
+
+static u32 vmpck_id;
+module_param(vmpck_id, uint, 0444);
+MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP.");
+
+/* Mutex to serialize the shared buffer access and command handling. */
+static DEFINE_MUTEX(snp_cmd_mutex);
+
+static bool is_vmpck_empty(struct snp_guest_dev *snp_dev)
+{
+       char zero_key[VMPCK_KEY_LEN] = {0};
+
+       if (snp_dev->vmpck)
+               return !memcmp(snp_dev->vmpck, zero_key, VMPCK_KEY_LEN);
+
+       return true;
+}
+
+static void snp_disable_vmpck(struct snp_guest_dev *snp_dev)
+{
+       memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN);
+       snp_dev->vmpck = NULL;
+}
+
+static inline u64 __snp_get_msg_seqno(struct snp_guest_dev *snp_dev)
+{
+       u64 count;
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       /* Read the current message sequence counter from secrets pages */
+       count = *snp_dev->os_area_msg_seqno;
+
+       return count + 1;
+}
+
+/* Return a non-zero on success */
+static u64 snp_get_msg_seqno(struct snp_guest_dev *snp_dev)
+{
+       u64 count = __snp_get_msg_seqno(snp_dev);
+
+       /*
+        * The message sequence counter for the SNP guest request is a  64-bit
+        * value but the version 2 of GHCB specification defines a 32-bit storage
+        * for it. If the counter exceeds the 32-bit value then return zero.
+        * The caller should check the return value, but if the caller happens to
+        * not check the value and use it, then the firmware treats zero as an
+        * invalid number and will fail the  message request.
+        */
+       if (count >= UINT_MAX) {
+               dev_err(snp_dev->dev, "request message sequence counter overflow\n");
+               return 0;
+       }
+
+       return count;
+}
+
+static void snp_inc_msg_seqno(struct snp_guest_dev *snp_dev)
+{
+       /*
+        * The counter is also incremented by the PSP, so increment it by 2
+        * and save in secrets page.
+        */
+       *snp_dev->os_area_msg_seqno += 2;
+}
+
+static inline struct snp_guest_dev *to_snp_dev(struct file *file)
+{
+       struct miscdevice *dev = file->private_data;
+
+       return container_of(dev, struct snp_guest_dev, misc);
+}
+
+static struct snp_guest_crypto *init_crypto(struct snp_guest_dev *snp_dev, u8 *key, size_t keylen)
+{
+       struct snp_guest_crypto *crypto;
+
+       crypto = kzalloc(sizeof(*crypto), GFP_KERNEL_ACCOUNT);
+       if (!crypto)
+               return NULL;
+
+       crypto->tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+       if (IS_ERR(crypto->tfm))
+               goto e_free;
+
+       if (crypto_aead_setkey(crypto->tfm, key, keylen))
+               goto e_free_crypto;
+
+       crypto->iv_len = crypto_aead_ivsize(crypto->tfm);
+       crypto->iv = kmalloc(crypto->iv_len, GFP_KERNEL_ACCOUNT);
+       if (!crypto->iv)
+               goto e_free_crypto;
+
+       if (crypto_aead_authsize(crypto->tfm) > MAX_AUTHTAG_LEN) {
+               if (crypto_aead_setauthsize(crypto->tfm, MAX_AUTHTAG_LEN)) {
+                       dev_err(snp_dev->dev, "failed to set authsize to %d\n", MAX_AUTHTAG_LEN);
+                       goto e_free_iv;
+               }
+       }
+
+       crypto->a_len = crypto_aead_authsize(crypto->tfm);
+       crypto->authtag = kmalloc(crypto->a_len, GFP_KERNEL_ACCOUNT);
+       if (!crypto->authtag)
+               goto e_free_auth;
+
+       return crypto;
+
+e_free_auth:
+       kfree(crypto->authtag);
+e_free_iv:
+       kfree(crypto->iv);
+e_free_crypto:
+       crypto_free_aead(crypto->tfm);
+e_free:
+       kfree(crypto);
+
+       return NULL;
+}
+
+static void deinit_crypto(struct snp_guest_crypto *crypto)
+{
+       crypto_free_aead(crypto->tfm);
+       kfree(crypto->iv);
+       kfree(crypto->authtag);
+       kfree(crypto);
+}
+
+static int enc_dec_message(struct snp_guest_crypto *crypto, struct snp_guest_msg *msg,
+                          u8 *src_buf, u8 *dst_buf, size_t len, bool enc)
+{
+       struct snp_guest_msg_hdr *hdr = &msg->hdr;
+       struct scatterlist src[3], dst[3];
+       DECLARE_CRYPTO_WAIT(wait);
+       struct aead_request *req;
+       int ret;
+
+       req = aead_request_alloc(crypto->tfm, GFP_KERNEL);
+       if (!req)
+               return -ENOMEM;
+
+       /*
+        * AEAD memory operations:
+        * +------ AAD -------+------- DATA -----+---- AUTHTAG----+
+        * |  msg header      |  plaintext       |  hdr->authtag  |
+        * | bytes 30h - 5Fh  |    or            |                |
+        * |                  |   cipher         |                |
+        * +------------------+------------------+----------------+
+        */
+       sg_init_table(src, 3);
+       sg_set_buf(&src[0], &hdr->algo, AAD_LEN);
+       sg_set_buf(&src[1], src_buf, hdr->msg_sz);
+       sg_set_buf(&src[2], hdr->authtag, crypto->a_len);
+
+       sg_init_table(dst, 3);
+       sg_set_buf(&dst[0], &hdr->algo, AAD_LEN);
+       sg_set_buf(&dst[1], dst_buf, hdr->msg_sz);
+       sg_set_buf(&dst[2], hdr->authtag, crypto->a_len);
+
+       aead_request_set_ad(req, AAD_LEN);
+       aead_request_set_tfm(req, crypto->tfm);
+       aead_request_set_callback(req, 0, crypto_req_done, &wait);
+
+       aead_request_set_crypt(req, src, dst, len, crypto->iv);
+       ret = crypto_wait_req(enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req), &wait);
+
+       aead_request_free(req);
+       return ret;
+}
+
+static int __enc_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg,
+                        void *plaintext, size_t len)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_guest_msg_hdr *hdr = &msg->hdr;
+
+       memset(crypto->iv, 0, crypto->iv_len);
+       memcpy(crypto->iv, &hdr->msg_seqno, sizeof(hdr->msg_seqno));
+
+       return enc_dec_message(crypto, msg, plaintext, msg->payload, len, true);
+}
+
+static int dec_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg,
+                      void *plaintext, size_t len)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_guest_msg_hdr *hdr = &msg->hdr;
+
+       /* Build IV with response buffer sequence number */
+       memset(crypto->iv, 0, crypto->iv_len);
+       memcpy(crypto->iv, &hdr->msg_seqno, sizeof(hdr->msg_seqno));
+
+       return enc_dec_message(crypto, msg, msg->payload, plaintext, len, false);
+}
+
+static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, u32 sz)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_guest_msg *resp = snp_dev->response;
+       struct snp_guest_msg *req = snp_dev->request;
+       struct snp_guest_msg_hdr *req_hdr = &req->hdr;
+       struct snp_guest_msg_hdr *resp_hdr = &resp->hdr;
+
+       dev_dbg(snp_dev->dev, "response [seqno %lld type %d version %d sz %d]\n",
+               resp_hdr->msg_seqno, resp_hdr->msg_type, resp_hdr->msg_version, resp_hdr->msg_sz);
+
+       /* Verify that the sequence counter is incremented by 1 */
+       if (unlikely(resp_hdr->msg_seqno != (req_hdr->msg_seqno + 1)))
+               return -EBADMSG;
+
+       /* Verify response message type and version number. */
+       if (resp_hdr->msg_type != (req_hdr->msg_type + 1) ||
+           resp_hdr->msg_version != req_hdr->msg_version)
+               return -EBADMSG;
+
+       /*
+        * If the message size is greater than our buffer length then return
+        * an error.
+        */
+       if (unlikely((resp_hdr->msg_sz + crypto->a_len) > sz))
+               return -EBADMSG;
+
+       /* Decrypt the payload */
+       return dec_payload(snp_dev, resp, payload, resp_hdr->msg_sz + crypto->a_len);
+}
+
+static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type,
+                       void *payload, size_t sz)
+{
+       struct snp_guest_msg *req = snp_dev->request;
+       struct snp_guest_msg_hdr *hdr = &req->hdr;
+
+       memset(req, 0, sizeof(*req));
+
+       hdr->algo = SNP_AEAD_AES_256_GCM;
+       hdr->hdr_version = MSG_HDR_VER;
+       hdr->hdr_sz = sizeof(*hdr);
+       hdr->msg_type = type;
+       hdr->msg_version = version;
+       hdr->msg_seqno = seqno;
+       hdr->msg_vmpck = vmpck_id;
+       hdr->msg_sz = sz;
+
+       /* Verify the sequence number is non-zero */
+       if (!hdr->msg_seqno)
+               return -ENOSR;
+
+       dev_dbg(snp_dev->dev, "request [seqno %lld type %d version %d sz %d]\n",
+               hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
+
+       return __enc_payload(snp_dev, req, payload, sz);
+}
+
+static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver,
+                               u8 type, void *req_buf, size_t req_sz, void *resp_buf,
+                               u32 resp_sz, __u64 *fw_err)
+{
+       unsigned long err;
+       u64 seqno;
+       int rc;
+
+       /* Get message sequence and verify that its a non-zero */
+       seqno = snp_get_msg_seqno(snp_dev);
+       if (!seqno)
+               return -EIO;
+
+       memset(snp_dev->response, 0, sizeof(struct snp_guest_msg));
+
+       /* Encrypt the userspace provided payload */
+       rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz);
+       if (rc)
+               return rc;
+
+       /* Call firmware to process the request */
+       rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err);
+       if (fw_err)
+               *fw_err = err;
+
+       if (rc)
+               return rc;
+
+       /*
+        * The verify_and_dec_payload() will fail only if the hypervisor is
+        * actively modifying the message header or corrupting the encrypted payload.
+        * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that
+        * the key cannot be used for any communication. The key is disabled to ensure
+        * that AES-GCM does not use the same IV while encrypting the request payload.
+        */
+       rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
+       if (rc) {
+               dev_alert(snp_dev->dev,
+                         "Detected unexpected decode failure, disabling the vmpck_id %d\n",
+                         vmpck_id);
+               snp_disable_vmpck(snp_dev);
+               return rc;
+       }
+
+       /* Increment to new message sequence after payload decryption was successful. */
+       snp_inc_msg_seqno(snp_dev);
+
+       return 0;
+}
+
+static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_report_resp *resp;
+       struct snp_report_req req;
+       int rc, resp_len;
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       if (!arg->req_data || !arg->resp_data)
+               return -EINVAL;
+
+       if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req)))
+               return -EFAULT;
+
+       /*
+        * The intermediate response buffer is used while decrypting the
+        * response payload. Make sure that it has enough space to cover the
+        * authtag.
+        */
+       resp_len = sizeof(resp->data) + crypto->a_len;
+       resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
+       if (!resp)
+               return -ENOMEM;
+
+       rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
+                                 SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data,
+                                 resp_len, &arg->fw_err);
+       if (rc)
+               goto e_free;
+
+       if (copy_to_user((void __user *)arg->resp_data, resp, sizeof(*resp)))
+               rc = -EFAULT;
+
+e_free:
+       kfree(resp);
+       return rc;
+}
+
+static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_derived_key_resp resp = {0};
+       struct snp_derived_key_req req;
+       int rc, resp_len;
+       /* Response data is 64 bytes and max authsize for GCM is 16 bytes. */
+       u8 buf[64 + 16];
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       if (!arg->req_data || !arg->resp_data)
+               return -EINVAL;
+
+       /*
+        * The intermediate response buffer is used while decrypting the
+        * response payload. Make sure that it has enough space to cover the
+        * authtag.
+        */
+       resp_len = sizeof(resp.data) + crypto->a_len;
+       if (sizeof(buf) < resp_len)
+               return -ENOMEM;
+
+       if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req)))
+               return -EFAULT;
+
+       rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
+                                 SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len,
+                                 &arg->fw_err);
+       if (rc)
+               return rc;
+
+       memcpy(resp.data, buf, sizeof(resp.data));
+       if (copy_to_user((void __user *)arg->resp_data, &resp, sizeof(resp)))
+               rc = -EFAULT;
+
+       /* The response buffer contains the sensitive data, explicitly clear it. */
+       memzero_explicit(buf, sizeof(buf));
+       memzero_explicit(&resp, sizeof(resp));
+       return rc;
+}
+
+static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_ext_report_req req;
+       struct snp_report_resp *resp;
+       int ret, npages = 0, resp_len;
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       if (!arg->req_data || !arg->resp_data)
+               return -EINVAL;
+
+       if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req)))
+               return -EFAULT;
+
+       /* userspace does not want certificate data */
+       if (!req.certs_len || !req.certs_address)
+               goto cmd;
+
+       if (req.certs_len > SEV_FW_BLOB_MAX_SIZE ||
+           !IS_ALIGNED(req.certs_len, PAGE_SIZE))
+               return -EINVAL;
+
+       if (!access_ok((const void __user *)req.certs_address, req.certs_len))
+               return -EFAULT;
+
+       /*
+        * Initialize the intermediate buffer with all zeros. This buffer
+        * is used in the guest request message to get the certs blob from
+        * the host. If host does not supply any certs in it, then copy
+        * zeros to indicate that certificate data was not provided.
+        */
+       memset(snp_dev->certs_data, 0, req.certs_len);
+       npages = req.certs_len >> PAGE_SHIFT;
+cmd:
+       /*
+        * The intermediate response buffer is used while decrypting the
+        * response payload. Make sure that it has enough space to cover the
+        * authtag.
+        */
+       resp_len = sizeof(resp->data) + crypto->a_len;
+       resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
+       if (!resp)
+               return -ENOMEM;
+
+       snp_dev->input.data_npages = npages;
+       ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
+                                  SNP_MSG_REPORT_REQ, &req.data,
+                                  sizeof(req.data), resp->data, resp_len, &arg->fw_err);
+
+       /* If certs length is invalid then copy the returned length */
+       if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
+               req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
+
+               if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
+                       ret = -EFAULT;
+       }
+
+       if (ret)
+               goto e_free;
+
+       if (npages &&
+           copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
+                        req.certs_len)) {
+               ret = -EFAULT;
+               goto e_free;
+       }
+
+       if (copy_to_user((void __user *)arg->resp_data, resp, sizeof(*resp)))
+               ret = -EFAULT;
+
+e_free:
+       kfree(resp);
+       return ret;
+}
+
+static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
+{
+       struct snp_guest_dev *snp_dev = to_snp_dev(file);
+       void __user *argp = (void __user *)arg;
+       struct snp_guest_request_ioctl input;
+       int ret = -ENOTTY;
+
+       if (copy_from_user(&input, argp, sizeof(input)))
+               return -EFAULT;
+
+       input.fw_err = 0xff;
+
+       /* Message version must be non-zero */
+       if (!input.msg_version)
+               return -EINVAL;
+
+       mutex_lock(&snp_cmd_mutex);
+
+       /* Check if the VMPCK is not empty */
+       if (is_vmpck_empty(snp_dev)) {
+               dev_err_ratelimited(snp_dev->dev, "VMPCK is disabled\n");
+               mutex_unlock(&snp_cmd_mutex);
+               return -ENOTTY;
+       }
+
+       switch (ioctl) {
+       case SNP_GET_REPORT:
+               ret = get_report(snp_dev, &input);
+               break;
+       case SNP_GET_DERIVED_KEY:
+               ret = get_derived_key(snp_dev, &input);
+               break;
+       case SNP_GET_EXT_REPORT:
+               ret = get_ext_report(snp_dev, &input);
+               break;
+       default:
+               break;
+       }
+
+       mutex_unlock(&snp_cmd_mutex);
+
+       if (input.fw_err && copy_to_user(argp, &input, sizeof(input)))
+               return -EFAULT;
+
+       return ret;
+}
+
+static void free_shared_pages(void *buf, size_t sz)
+{
+       unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+       int ret;
+
+       if (!buf)
+               return;
+
+       ret = set_memory_encrypted((unsigned long)buf, npages);
+       if (ret) {
+               WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
+               return;
+       }
+
+       __free_pages(virt_to_page(buf), get_order(sz));
+}
+
+static void *alloc_shared_pages(struct device *dev, size_t sz)
+{
+       unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+       struct page *page;
+       int ret;
+
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
+       if (!page)
+               return NULL;
+
+       ret = set_memory_decrypted((unsigned long)page_address(page), npages);
+       if (ret) {
+               dev_err(dev, "failed to mark page shared, ret=%d\n", ret);
+               __free_pages(page, get_order(sz));
+               return NULL;
+       }
+
+       return page_address(page);
+}
+
+static const struct file_operations snp_guest_fops = {
+       .owner  = THIS_MODULE,
+       .unlocked_ioctl = snp_guest_ioctl,
+};
+
+static u8 *get_vmpck(int id, struct snp_secrets_page_layout *layout, u32 **seqno)
+{
+       u8 *key = NULL;
+
+       switch (id) {
+       case 0:
+               *seqno = &layout->os_area.msg_seqno_0;
+               key = layout->vmpck0;
+               break;
+       case 1:
+               *seqno = &layout->os_area.msg_seqno_1;
+               key = layout->vmpck1;
+               break;
+       case 2:
+               *seqno = &layout->os_area.msg_seqno_2;
+               key = layout->vmpck2;
+               break;
+       case 3:
+               *seqno = &layout->os_area.msg_seqno_3;
+               key = layout->vmpck3;
+               break;
+       default:
+               break;
+       }
+
+       return key;
+}
+
+static int __init sev_guest_probe(struct platform_device *pdev)
+{
+       struct snp_secrets_page_layout *layout;
+       struct sev_guest_platform_data *data;
+       struct device *dev = &pdev->dev;
+       struct snp_guest_dev *snp_dev;
+       struct miscdevice *misc;
+       int ret;
+
+       if (!dev->platform_data)
+               return -ENODEV;
+
+       data = (struct sev_guest_platform_data *)dev->platform_data;
+       layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
+       if (!layout)
+               return -ENODEV;
+
+       ret = -ENOMEM;
+       snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL);
+       if (!snp_dev)
+               goto e_unmap;
+
+       ret = -EINVAL;
+       snp_dev->vmpck = get_vmpck(vmpck_id, layout, &snp_dev->os_area_msg_seqno);
+       if (!snp_dev->vmpck) {
+               dev_err(dev, "invalid vmpck id %d\n", vmpck_id);
+               goto e_unmap;
+       }
+
+       /* Verify that VMPCK is not zero. */
+       if (is_vmpck_empty(snp_dev)) {
+               dev_err(dev, "vmpck id %d is null\n", vmpck_id);
+               goto e_unmap;
+       }
+
+       platform_set_drvdata(pdev, snp_dev);
+       snp_dev->dev = dev;
+       snp_dev->layout = layout;
+
+       /* Allocate the shared page used for the request and response message. */
+       snp_dev->request = alloc_shared_pages(dev, sizeof(struct snp_guest_msg));
+       if (!snp_dev->request)
+               goto e_unmap;
+
+       snp_dev->response = alloc_shared_pages(dev, sizeof(struct snp_guest_msg));
+       if (!snp_dev->response)
+               goto e_free_request;
+
+       snp_dev->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE);
+       if (!snp_dev->certs_data)
+               goto e_free_response;
+
+       ret = -EIO;
+       snp_dev->crypto = init_crypto(snp_dev, snp_dev->vmpck, VMPCK_KEY_LEN);
+       if (!snp_dev->crypto)
+               goto e_free_cert_data;
+
+       misc = &snp_dev->misc;
+       misc->minor = MISC_DYNAMIC_MINOR;
+       misc->name = DEVICE_NAME;
+       misc->fops = &snp_guest_fops;
+
+       /* initial the input address for guest request */
+       snp_dev->input.req_gpa = __pa(snp_dev->request);
+       snp_dev->input.resp_gpa = __pa(snp_dev->response);
+       snp_dev->input.data_gpa = __pa(snp_dev->certs_data);
+
+       ret =  misc_register(misc);
+       if (ret)
+               goto e_free_cert_data;
+
+       dev_info(dev, "Initialized SEV guest driver (using vmpck_id %d)\n", vmpck_id);
+       return 0;
+
+e_free_cert_data:
+       free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
+e_free_response:
+       free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
+e_free_request:
+       free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
+e_unmap:
+       iounmap(layout);
+       return ret;
+}
+
+static int __exit sev_guest_remove(struct platform_device *pdev)
+{
+       struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev);
+
+       free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
+       free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
+       free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
+       deinit_crypto(snp_dev->crypto);
+       misc_deregister(&snp_dev->misc);
+
+       return 0;
+}
+
+/*
+ * This driver is meant to be a common SEV guest interface driver and to
+ * support any SEV guest API. As such, even though it has been introduced
+ * with the SEV-SNP support, it is named "sev-guest".
+ */
+static struct platform_driver sev_guest_driver = {
+       .remove         = __exit_p(sev_guest_remove),
+       .driver         = {
+               .name = "sev-guest",
+       },
+};
+
+module_platform_driver_probe(sev_guest_driver, sev_guest_probe);
+
+MODULE_AUTHOR("Brijesh Singh <brijesh.singh@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+MODULE_DESCRIPTION("AMD SEV Guest Driver");
diff --git a/drivers/virt/coco/sev-guest/sev-guest.h b/drivers/virt/coco/sev-guest/sev-guest.h
new file mode 100644 (file)
index 0000000..21bda26
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * SEV-SNP API spec is available at https://developer.amd.com/sev
+ */
+
+#ifndef __VIRT_SEVGUEST_H__
+#define __VIRT_SEVGUEST_H__
+
+#include <linux/types.h>
+
+#define MAX_AUTHTAG_LEN                32
+
+/* See SNP spec SNP_GUEST_REQUEST section for the structure */
+enum msg_type {
+       SNP_MSG_TYPE_INVALID = 0,
+       SNP_MSG_CPUID_REQ,
+       SNP_MSG_CPUID_RSP,
+       SNP_MSG_KEY_REQ,
+       SNP_MSG_KEY_RSP,
+       SNP_MSG_REPORT_REQ,
+       SNP_MSG_REPORT_RSP,
+       SNP_MSG_EXPORT_REQ,
+       SNP_MSG_EXPORT_RSP,
+       SNP_MSG_IMPORT_REQ,
+       SNP_MSG_IMPORT_RSP,
+       SNP_MSG_ABSORB_REQ,
+       SNP_MSG_ABSORB_RSP,
+       SNP_MSG_VMRK_REQ,
+       SNP_MSG_VMRK_RSP,
+
+       SNP_MSG_TYPE_MAX
+};
+
+enum aead_algo {
+       SNP_AEAD_INVALID,
+       SNP_AEAD_AES_256_GCM,
+};
+
+struct snp_guest_msg_hdr {
+       u8 authtag[MAX_AUTHTAG_LEN];
+       u64 msg_seqno;
+       u8 rsvd1[8];
+       u8 algo;
+       u8 hdr_version;
+       u16 hdr_sz;
+       u8 msg_type;
+       u8 msg_version;
+       u16 msg_sz;
+       u32 rsvd2;
+       u8 msg_vmpck;
+       u8 rsvd3[35];
+} __packed;
+
+struct snp_guest_msg {
+       struct snp_guest_msg_hdr hdr;
+       u8 payload[4000];
+} __packed;
+
+#endif /* __VIRT_SEVGUEST_H__ */
index d5bfd7b867fca12d5fb92bc2e3d400d5c572b9b6..91073b4e4a202b68b5155205c2f7294a74916b6e 100644 (file)
 
 MODULE_IMPORT_NS(DMA_BUF);
 
-#ifndef GRANT_INVALID_REF
-/*
- * Note on usage of grant reference 0 as invalid grant reference:
- * grant reference 0 is valid, but never exposed to a driver,
- * because of the fact it is already in use/reserved by the PV console.
- */
-#define GRANT_INVALID_REF      0
-#endif
-
 struct gntdev_dmabuf {
        struct gntdev_dmabuf_priv *priv;
        struct dma_buf *dmabuf;
@@ -532,7 +523,7 @@ static void dmabuf_imp_end_foreign_access(u32 *refs, int count)
        int i;
 
        for (i = 0; i < count; i++)
-               if (refs[i] != GRANT_INVALID_REF)
+               if (refs[i] != INVALID_GRANT_REF)
                        gnttab_end_foreign_access(refs[i], 0UL);
 }
 
@@ -567,7 +558,7 @@ static struct gntdev_dmabuf *dmabuf_imp_alloc_storage(int count)
        gntdev_dmabuf->nr_pages = count;
 
        for (i = 0; i < count; i++)
-               gntdev_dmabuf->u.imp.refs[i] = GRANT_INVALID_REF;
+               gntdev_dmabuf->u.imp.refs[i] = INVALID_GRANT_REF;
 
        return gntdev_dmabuf;
 
index 8ccccace2a4f11aa8dbd3640b17f3fd61a6e18e7..1a1aec0a88a11a07db6e8ebaa017eee4b51cd7e3 100644 (file)
@@ -66,8 +66,6 @@
 
 #include <asm/sync_bitops.h>
 
-/* External tools reserve first few grant table entries. */
-#define NR_RESERVED_ENTRIES 8
 #define GNTTAB_LIST_END 0xffffffff
 
 static grant_ref_t **gnttab_list;
@@ -209,6 +207,10 @@ static inline void check_free_callbacks(void)
 static void put_free_entry(grant_ref_t ref)
 {
        unsigned long flags;
+
+       if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
+               return;
+
        spin_lock_irqsave(&gnttab_list_lock, flags);
        gnttab_entry(ref) = gnttab_free_head;
        gnttab_free_head = ref;
@@ -1465,12 +1467,12 @@ int gnttab_init(void)
        nr_init_grefs = nr_grant_frames *
                        gnttab_interface->grefs_per_grant_frame;
 
-       for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+       for (i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
                gnttab_entry(i) = i + 1;
 
        gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
-       gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
-       gnttab_free_head  = NR_RESERVED_ENTRIES;
+       gnttab_free_count = nr_init_grefs - GNTTAB_NR_RESERVED_ENTRIES;
+       gnttab_free_head  = GNTTAB_NR_RESERVED_ENTRIES;
 
        printk("Grant table initialized\n");
        return 0;
index a959dee21134cea6d60af5374e0c769fe3d93cd5..b6433761d42c09d6ea0b0413485b7ee5fc04534a 100644 (file)
 
 #include <xen/xen-front-pgdir-shbuf.h>
 
-#ifndef GRANT_INVALID_REF
-/*
- * FIXME: usage of grant reference 0 as invalid grant reference:
- * grant reference 0 is valid, but never exposed to a PV driver,
- * because of the fact it is already in use/reserved by the PV console.
- */
-#define GRANT_INVALID_REF      0
-#endif
-
 /**
  * This structure represents the structure of a shared page
  * that contains grant references to the pages of the shared
@@ -38,6 +29,7 @@
  */
 struct xen_page_directory {
        grant_ref_t gref_dir_next_page;
+#define XEN_GREF_LIST_END      0
        grant_ref_t gref[1]; /* Variable length */
 };
 
@@ -83,7 +75,7 @@ grant_ref_t
 xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf)
 {
        if (!buf->grefs)
-               return GRANT_INVALID_REF;
+               return INVALID_GRANT_REF;
 
        return buf->grefs[0];
 }
@@ -142,7 +134,7 @@ void xen_front_pgdir_shbuf_free(struct xen_front_pgdir_shbuf *buf)
                int i;
 
                for (i = 0; i < buf->num_grefs; i++)
-                       if (buf->grefs[i] != GRANT_INVALID_REF)
+                       if (buf->grefs[i] != INVALID_GRANT_REF)
                                gnttab_end_foreign_access(buf->grefs[i], 0UL);
        }
        kfree(buf->grefs);
@@ -355,7 +347,7 @@ static void backend_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
        }
        /* Last page must say there is no more pages. */
        page_dir = (struct xen_page_directory *)ptr;
-       page_dir->gref_dir_next_page = GRANT_INVALID_REF;
+       page_dir->gref_dir_next_page = XEN_GREF_LIST_END;
 }
 
 /**
@@ -384,7 +376,7 @@ static void guest_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
 
                if (grefs_left <= XEN_NUM_GREFS_PER_PAGE) {
                        to_copy = grefs_left;
-                       page_dir->gref_dir_next_page = GRANT_INVALID_REF;
+                       page_dir->gref_dir_next_page = XEN_GREF_LIST_END;
                } else {
                        to_copy = XEN_NUM_GREFS_PER_PAGE;
                        page_dir->gref_dir_next_page = buf->grefs[i + 1];
index 0c5e565aa8cffd33382cf48791447c7c5a37ca68..7a0c93acc2c5760335c072d929c7d59d4102b357 100644 (file)
@@ -280,6 +280,82 @@ static void scsiback_free_translation_entry(struct kref *kref)
        kfree(entry);
 }
 
+static int32_t scsiback_result(int32_t result)
+{
+       int32_t host_status;
+
+       switch (XEN_VSCSIIF_RSLT_HOST(result)) {
+       case DID_OK:
+               host_status = XEN_VSCSIIF_RSLT_HOST_OK;
+               break;
+       case DID_NO_CONNECT:
+               host_status = XEN_VSCSIIF_RSLT_HOST_NO_CONNECT;
+               break;
+       case DID_BUS_BUSY:
+               host_status = XEN_VSCSIIF_RSLT_HOST_BUS_BUSY;
+               break;
+       case DID_TIME_OUT:
+               host_status = XEN_VSCSIIF_RSLT_HOST_TIME_OUT;
+               break;
+       case DID_BAD_TARGET:
+               host_status = XEN_VSCSIIF_RSLT_HOST_BAD_TARGET;
+               break;
+       case DID_ABORT:
+               host_status = XEN_VSCSIIF_RSLT_HOST_ABORT;
+               break;
+       case DID_PARITY:
+               host_status = XEN_VSCSIIF_RSLT_HOST_PARITY;
+               break;
+       case DID_ERROR:
+               host_status = XEN_VSCSIIF_RSLT_HOST_ERROR;
+               break;
+       case DID_RESET:
+               host_status = XEN_VSCSIIF_RSLT_HOST_RESET;
+               break;
+       case DID_BAD_INTR:
+               host_status = XEN_VSCSIIF_RSLT_HOST_BAD_INTR;
+               break;
+       case DID_PASSTHROUGH:
+               host_status = XEN_VSCSIIF_RSLT_HOST_PASSTHROUGH;
+               break;
+       case DID_SOFT_ERROR:
+               host_status = XEN_VSCSIIF_RSLT_HOST_SOFT_ERROR;
+               break;
+       case DID_IMM_RETRY:
+               host_status = XEN_VSCSIIF_RSLT_HOST_IMM_RETRY;
+               break;
+       case DID_REQUEUE:
+               host_status = XEN_VSCSIIF_RSLT_HOST_REQUEUE;
+               break;
+       case DID_TRANSPORT_DISRUPTED:
+               host_status = XEN_VSCSIIF_RSLT_HOST_TRANSPORT_DISRUPTED;
+               break;
+       case DID_TRANSPORT_FAILFAST:
+               host_status = XEN_VSCSIIF_RSLT_HOST_TRANSPORT_FAILFAST;
+               break;
+       case DID_TARGET_FAILURE:
+               host_status = XEN_VSCSIIF_RSLT_HOST_TARGET_FAILURE;
+               break;
+       case DID_NEXUS_FAILURE:
+               host_status = XEN_VSCSIIF_RSLT_HOST_NEXUS_FAILURE;
+               break;
+       case DID_ALLOC_FAILURE:
+               host_status = XEN_VSCSIIF_RSLT_HOST_ALLOC_FAILURE;
+               break;
+       case DID_MEDIUM_ERROR:
+               host_status = XEN_VSCSIIF_RSLT_HOST_MEDIUM_ERROR;
+               break;
+       case DID_TRANSPORT_MARGINAL:
+               host_status = XEN_VSCSIIF_RSLT_HOST_TRANSPORT_MARGINAL;
+               break;
+       default:
+               host_status = XEN_VSCSIIF_RSLT_HOST_ERROR;
+               break;
+       }
+
+       return (host_status << 16) | (result & 0x00ffff);
+}
+
 static void scsiback_send_response(struct vscsibk_info *info,
                        char *sense_buffer, int32_t result, uint32_t resid,
                        uint16_t rqid)
@@ -295,7 +371,7 @@ static void scsiback_send_response(struct vscsibk_info *info,
        ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
        info->ring.rsp_prod_pvt++;
 
-       ring_res->rslt   = result;
+       ring_res->rslt   = scsiback_result(result);
        ring_res->rqid   = rqid;
 
        if (sense_buffer != NULL &&
@@ -555,7 +631,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
        struct scsiback_nexus *nexus = tpg->tpg_nexus;
        struct se_cmd *se_cmd = &pending_req->se_cmd;
        u64 unpacked_lun = pending_req->v2p->lun;
-       int rc, err = FAILED;
+       int rc, err = XEN_VSCSIIF_RSLT_RESET_FAILED;
 
        init_completion(&pending_req->tmr_done);
 
@@ -569,7 +645,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
        wait_for_completion(&pending_req->tmr_done);
 
        err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
-               SUCCESS : FAILED;
+               XEN_VSCSIIF_RSLT_RESET_SUCCESS : XEN_VSCSIIF_RSLT_RESET_FAILED;
 
        scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
        transport_generic_free_cmd(&pending_req->se_cmd, 0);
index df689068123157bfee0c0b2dfec1dd29cdb1c21a..d6fdd2d209d32260c222571da17b8efeffadbf7b 100644 (file)
@@ -363,50 +363,92 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
                __xenbus_switch_state(dev, XenbusStateClosing, 1);
 }
 
-/**
- * xenbus_grant_ring
+/*
+ * xenbus_setup_ring
  * @dev: xenbus device
- * @vaddr: starting virtual address of the ring
+ * @vaddr: pointer to starting virtual address of the ring
  * @nr_pages: number of pages to be granted
  * @grefs: grant reference array to be filled in
  *
- * Grant access to the given @vaddr to the peer of the given device.
- * Then fill in @grefs with grant references.  Return 0 on success, or
- * -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * Allocate physically contiguous pages for a shared ring buffer and grant it
+ * to the peer of the given device. The ring buffer is initially filled with
+ * zeroes. The virtual address of the ring is stored at @vaddr and the
+ * grant references are stored in the @grefs array. In case of error @vaddr
+ * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
                      unsigned int nr_pages, grant_ref_t *grefs)
 {
-       int err;
-       unsigned int i;
+       unsigned long ring_size = nr_pages * XEN_PAGE_SIZE;
        grant_ref_t gref_head;
+       unsigned int i;
+       int ret;
 
-       err = gnttab_alloc_grant_references(nr_pages, &gref_head);
-       if (err) {
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
-               return err;
+       *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
+       if (!*vaddr) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       ret = gnttab_alloc_grant_references(nr_pages, &gref_head);
+       if (ret) {
+               xenbus_dev_fatal(dev, ret, "granting access to %u ring pages",
+                                nr_pages);
+               goto err;
        }
 
        for (i = 0; i < nr_pages; i++) {
                unsigned long gfn;
 
-               if (is_vmalloc_addr(vaddr))
-                       gfn = pfn_to_gfn(vmalloc_to_pfn(vaddr));
+               if (is_vmalloc_addr(*vaddr))
+                       gfn = pfn_to_gfn(vmalloc_to_pfn(vaddr[i]));
                else
-                       gfn = virt_to_gfn(vaddr);
+                       gfn = virt_to_gfn(vaddr[i]);
 
                grefs[i] = gnttab_claim_grant_reference(&gref_head);
                gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
                                                gfn, 0);
-
-               vaddr = vaddr + XEN_PAGE_SIZE;
        }
 
        return 0;
+
+ err:
+       if (*vaddr)
+               free_pages_exact(*vaddr, ring_size);
+       for (i = 0; i < nr_pages; i++)
+               grefs[i] = INVALID_GRANT_REF;
+       *vaddr = NULL;
+
+       return ret;
 }
-EXPORT_SYMBOL_GPL(xenbus_grant_ring);
+EXPORT_SYMBOL_GPL(xenbus_setup_ring);
 
+/*
+ * xenbus_teardown_ring
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of pages
+ * @grefs: grant reference array
+ *
+ * Remove grants for the shared ring buffer and free the associated memory.
+ * On return the grant reference array is filled with INVALID_GRANT_REF.
+ */
+void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages,
+                         grant_ref_t *grefs)
+{
+       unsigned int i;
+
+       for (i = 0; i < nr_pages; i++) {
+               if (grefs[i] != INVALID_GRANT_REF) {
+                       gnttab_end_foreign_access(grefs[i], 0);
+                       grefs[i] = INVALID_GRANT_REF;
+               }
+       }
+
+       if (*vaddr)
+               free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE);
+       *vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(xenbus_teardown_ring);
 
 /**
  * Allocate an event channel for the given xenbus_device, assigning the newly
index fe360c33ce717fcedd4c8494d903888059005919..d367f2bd2b93a961629eba30fe8d42a0dd0ff7ad 100644 (file)
@@ -65,6 +65,7 @@
 #include "xenbus.h"
 
 
+static int xs_init_irq;
 int xen_store_evtchn;
 EXPORT_SYMBOL_GPL(xen_store_evtchn);
 
@@ -750,6 +751,20 @@ static void xenbus_probe(void)
 {
        xenstored_ready = 1;
 
+       if (!xen_store_interface) {
+               xen_store_interface = xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
+                                               XEN_PAGE_SIZE);
+               /*
+                * Now it is safe to free the IRQ used for xenstore late
+                * initialization. No need to unbind: it is about to be
+                * bound again from xb_init_comms. Note that calling
+                * unbind_from_irqhandler now would result in xen_evtchn_close()
+                * being called and the event channel not being enabled again
+                * afterwards, resulting in missed event notifications.
+                */
+               free_irq(xs_init_irq, &xb_waitq);
+       }
+
        /*
         * In the HVM case, xenbus_init() deferred its call to
         * xs_init() in case callbacks were not operational yet.
@@ -798,20 +813,22 @@ static int __init xenbus_probe_initcall(void)
 {
        /*
         * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
-        * need to wait for the platform PCI device to come up.
+        * need to wait for the platform PCI device to come up or
+        * xen_store_interface is not ready.
         */
        if (xen_store_domain_type == XS_PV ||
            (xen_store_domain_type == XS_HVM &&
-            !xs_hvm_defer_init_for_callback()))
+            !xs_hvm_defer_init_for_callback() &&
+            xen_store_interface != NULL))
                xenbus_probe();
 
        /*
-        * For XS_LOCAL, spawn a thread which will wait for xenstored
-        * or a xenstore-stubdom to be started, then probe. It will be
-        * triggered when communication starts happening, by waiting
-        * on xb_waitq.
+        * For XS_LOCAL or when xen_store_interface is not ready, spawn a
+        * thread which will wait for xenstored or a xenstore-stubdom to be
+        * started, then probe.  It will be triggered when communication
+        * starts happening, by waiting on xb_waitq.
         */
-       if (xen_store_domain_type == XS_LOCAL) {
+       if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) {
                struct task_struct *probe_task;
 
                probe_task = kthread_run(xenbus_probe_thread, NULL,
@@ -907,10 +924,25 @@ static struct notifier_block xenbus_resume_nb = {
        .notifier_call = xenbus_resume_cb,
 };
 
+static irqreturn_t xenbus_late_init(int irq, void *unused)
+{
+       int err;
+       uint64_t v = 0;
+
+       err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+       if (err || !v || !~v)
+               return IRQ_HANDLED;
+       xen_store_gfn = (unsigned long)v;
+
+       wake_up(&xb_waitq);
+       return IRQ_HANDLED;
+}
+
 static int __init xenbus_init(void)
 {
        int err;
        uint64_t v = 0;
+       bool wait = false;
        xen_store_domain_type = XS_UNKNOWN;
 
        if (!xen_domain())
@@ -957,25 +989,44 @@ static int __init xenbus_init(void)
                 * been properly initialized. Instead of attempting to map a
                 * wrong guest physical address return error.
                 *
-                * Also recognize all bits set as an invalid value.
+                * Also recognize all bits set as an invalid/uninitialized value.
                 */
-               if (!v || !~v) {
+               if (!v) {
                        err = -ENOENT;
                        goto out_error;
                }
-               /* Avoid truncation on 32-bit. */
+               if (v == ~0ULL) {
+                       wait = true;
+               } else {
+                       /* Avoid truncation on 32-bit. */
 #if BITS_PER_LONG == 32
-               if (v > ULONG_MAX) {
-                       pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
-                              __func__, v);
-                       err = -EINVAL;
-                       goto out_error;
-               }
+                       if (v > ULONG_MAX) {
+                               pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
+                                      __func__, v);
+                               err = -EINVAL;
+                               goto out_error;
+                       }
 #endif
-               xen_store_gfn = (unsigned long)v;
-               xen_store_interface =
-                       xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
-                                 XEN_PAGE_SIZE);
+                       xen_store_gfn = (unsigned long)v;
+                       xen_store_interface =
+                               xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
+                                         XEN_PAGE_SIZE);
+                       if (xen_store_interface->connection != XENSTORE_CONNECTED)
+                               wait = true;
+               }
+               if (wait) {
+                       err = bind_evtchn_to_irqhandler(xen_store_evtchn,
+                                                       xenbus_late_init,
+                                                       0, "xenstore_late_init",
+                                                       &xb_waitq);
+                       if (err < 0) {
+                               pr_err("xenstore_late_init couldn't bind irq err=%d\n",
+                                      err);
+                               return err;
+                       }
+
+                       xs_init_irq = err;
+               }
                break;
        default:
                pr_warn("Xenstore state unknown\n");
index 21c6332fa78503aab716a17e036e707649593c15..32dff7ba3ddaaf12ff34505c679c62fda5a2d738 100644 (file)
@@ -142,12 +142,6 @@ config BINFMT_ZFLAT
        help
          Support FLAT format compressed binaries
 
-config BINFMT_SHARED_FLAT
-       bool "Enable shared FLAT support"
-       depends on BINFMT_FLAT
-       help
-         Support FLAT shared libraries
-
 config HAVE_AOUT
        def_bool n
 
index 2fe402483ad5bbc59750e2096f7127a3ba2152c0..30b066299d39f70a450539823dbe75602427e81a 100644 (file)
@@ -740,10 +740,22 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 {
        struct inode *inode = d_inode(path->dentry);
        struct afs_vnode *vnode = AFS_FS_I(inode);
-       int seq = 0;
+       struct key *key;
+       int ret, seq = 0;
 
        _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
+       if (!(query_flags & AT_STATX_DONT_SYNC) &&
+           !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+               key = afs_request_key(vnode->volume->cell);
+               if (IS_ERR(key))
+                       return PTR_ERR(key);
+               ret = afs_validate(vnode, key);
+               key_put(key);
+               if (ret < 0)
+                       return ret;
+       }
+
        do {
                read_seqbegin_or_lock(&vnode->cb_lock, &seq);
                generic_fillattr(&init_user_ns, inode, stat);
index 6268981500112fd3cc20350649ceb9c4c878d3b5..c26545d71d39a3a548925c1fa65da7c356a3d81c 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/flat.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
-#include <linux/coredump.h>
 
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 #define RELOC_FAILED 0xff00ff01                /* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff                /* Placeholder for unused library */
 
-#ifdef CONFIG_BINFMT_SHARED_FLAT
-#define        MAX_SHARED_LIBS                 (4)
-#else
-#define        MAX_SHARED_LIBS                 (1)
-#endif
+#define MAX_SHARED_LIBS                        (1)
 
 #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET
 #define DATA_START_OFFSET_WORDS                (0)
@@ -93,38 +88,13 @@ struct lib_info {
        } lib_list[MAX_SHARED_LIBS];
 };
 
-#ifdef CONFIG_BINFMT_SHARED_FLAT
-static int load_flat_shared_library(int id, struct lib_info *p);
-#endif
-
 static int load_flat_binary(struct linux_binprm *);
-#ifdef CONFIG_COREDUMP
-static int flat_core_dump(struct coredump_params *cprm);
-#endif
 
 static struct linux_binfmt flat_format = {
        .module         = THIS_MODULE,
        .load_binary    = load_flat_binary,
-#ifdef CONFIG_COREDUMP
-       .core_dump      = flat_core_dump,
-       .min_coredump   = PAGE_SIZE
-#endif
 };
 
-/****************************************************************************/
-/*
- * Routine writes a core dump image in the current directory.
- * Currently only a stub-function.
- */
-
-#ifdef CONFIG_COREDUMP
-static int flat_core_dump(struct coredump_params *cprm)
-{
-       pr_warn("Process %s:%d received signr %d and should have core dumped\n",
-               current->comm, current->pid, cprm->siginfo->si_signo);
-       return 1;
-}
-#endif
 
 /****************************************************************************/
 /*
@@ -329,51 +299,18 @@ out_free:
 /****************************************************************************/
 
 static unsigned long
-calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
+calc_reloc(unsigned long r, struct lib_info *p)
 {
        unsigned long addr;
-       int id;
        unsigned long start_brk;
        unsigned long start_data;
        unsigned long text_len;
        unsigned long start_code;
 
-#ifdef CONFIG_BINFMT_SHARED_FLAT
-       if (r == 0)
-               id = curid;     /* Relocs of 0 are always self referring */
-       else {
-               id = (r >> 24) & 0xff;  /* Find ID for this reloc */
-               r &= 0x00ffffff;        /* Trim ID off here */
-       }
-       if (id >= MAX_SHARED_LIBS) {
-               pr_err("reference 0x%lx to shared library %d", r, id);
-               goto failed;
-       }
-       if (curid != id) {
-               if (internalp) {
-                       pr_err("reloc address 0x%lx not in same module "
-                              "(%d != %d)", r, curid, id);
-                       goto failed;
-               } else if (!p->lib_list[id].loaded &&
-                          load_flat_shared_library(id, p) < 0) {
-                       pr_err("failed to load library %d", id);
-                       goto failed;
-               }
-               /* Check versioning information (i.e. time stamps) */
-               if (p->lib_list[id].build_date && p->lib_list[curid].build_date &&
-                               p->lib_list[curid].build_date < p->lib_list[id].build_date) {
-                       pr_err("library %d is younger than %d", id, curid);
-                       goto failed;
-               }
-       }
-#else
-       id = 0;
-#endif
-
-       start_brk = p->lib_list[id].start_brk;
-       start_data = p->lib_list[id].start_data;
-       start_code = p->lib_list[id].start_code;
-       text_len = p->lib_list[id].text_len;
+       start_brk = p->lib_list[0].start_brk;
+       start_data = p->lib_list[0].start_data;
+       start_code = p->lib_list[0].start_code;
+       text_len = p->lib_list[0].text_len;
 
        if (r > start_brk - start_data + text_len) {
                pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)",
@@ -440,8 +377,32 @@ static void old_reloc(unsigned long rl)
 
 /****************************************************************************/
 
+static inline u32 __user *skip_got_header(u32 __user *rp)
+{
+       if (IS_ENABLED(CONFIG_RISCV)) {
+               /*
+                * RISC-V has a 16 byte GOT PLT header for elf64-riscv
+                * and 8 byte GOT PLT header for elf32-riscv.
+                * Skip the whole GOT PLT header, since it is reserved
+                * for the dynamic linker (ld.so).
+                */
+               u32 rp_val0, rp_val1;
+
+               if (get_user(rp_val0, rp))
+                       return rp;
+               if (get_user(rp_val1, rp + 1))
+                       return rp;
+
+               if (rp_val0 == 0xffffffff && rp_val1 == 0xffffffff)
+                       rp += 4;
+               else if (rp_val0 == 0xffffffff)
+                       rp += 2;
+       }
+       return rp;
+}
+
 static int load_flat_file(struct linux_binprm *bprm,
-               struct lib_info *libinfo, int id, unsigned long *extra_stack)
+               struct lib_info *libinfo, unsigned long *extra_stack)
 {
        struct flat_hdr *hdr;
        unsigned long textpos, datapos, realdatastart;
@@ -493,14 +454,6 @@ static int load_flat_file(struct linux_binprm *bprm,
                goto err;
        }
 
-       /* Don't allow old format executables to use shared libraries */
-       if (rev == OLD_FLAT_VERSION && id != 0) {
-               pr_err("shared libraries are not available before rev 0x%lx\n",
-                      FLAT_VERSION);
-               ret = -ENOEXEC;
-               goto err;
-       }
-
        /*
         * fix up the flags for the older format,  there were all kinds
         * of endian hacks,  this only works for the simple cases
@@ -551,15 +504,13 @@ static int load_flat_file(struct linux_binprm *bprm,
        }
 
        /* Flush all traces of the currently running executable */
-       if (id == 0) {
-               ret = begin_new_exec(bprm);
-               if (ret)
-                       goto err;
+       ret = begin_new_exec(bprm);
+       if (ret)
+               goto err;
 
-               /* OK, This is the point of no return */
-               set_personality(PER_LINUX_32BIT);
-               setup_new_exec(bprm);
-       }
+       /* OK, This is the point of no return */
+       set_personality(PER_LINUX_32BIT);
+       setup_new_exec(bprm);
 
        /*
         * calculate the extra space we need to map in
@@ -739,42 +690,40 @@ static int load_flat_file(struct linux_binprm *bprm,
        text_len -= sizeof(struct flat_hdr); /* the real code len */
 
        /* The main program needs a little extra setup in the task structure */
-       if (id == 0) {
-               current->mm->start_code = start_code;
-               current->mm->end_code = end_code;
-               current->mm->start_data = datapos;
-               current->mm->end_data = datapos + data_len;
-               /*
-                * set up the brk stuff, uses any slack left in data/bss/stack
-                * allocation.  We put the brk after the bss (between the bss
-                * and stack) like other platforms.
-                * Userspace code relies on the stack pointer starting out at
-                * an address right at the end of a page.
-                */
-               current->mm->start_brk = datapos + data_len + bss_len;
-               current->mm->brk = (current->mm->start_brk + 3) & ~3;
+       current->mm->start_code = start_code;
+       current->mm->end_code = end_code;
+       current->mm->start_data = datapos;
+       current->mm->end_data = datapos + data_len;
+       /*
+        * set up the brk stuff, uses any slack left in data/bss/stack
+        * allocation.  We put the brk after the bss (between the bss
+        * and stack) like other platforms.
+        * Userspace code relies on the stack pointer starting out at
+        * an address right at the end of a page.
+        */
+       current->mm->start_brk = datapos + data_len + bss_len;
+       current->mm->brk = (current->mm->start_brk + 3) & ~3;
 #ifndef CONFIG_MMU
-               current->mm->context.end_brk = memp + memp_size - stack_len;
+       current->mm->context.end_brk = memp + memp_size - stack_len;
 #endif
-       }
 
        if (flags & FLAT_FLAG_KTRACE) {
                pr_info("Mapping is %lx, Entry point is %x, data_start is %x\n",
                        textpos, 0x00ffffff&ntohl(hdr->entry), ntohl(hdr->data_start));
                pr_info("%s %s: TEXT=%lx-%lx DATA=%lx-%lx BSS=%lx-%lx\n",
-                       id ? "Lib" : "Load", bprm->filename,
+                       "Load", bprm->filename,
                        start_code, end_code, datapos, datapos + data_len,
                        datapos + data_len, (datapos + data_len + bss_len + 3) & ~3);
        }
 
        /* Store the current module values into the global library structure */
-       libinfo->lib_list[id].start_code = start_code;
-       libinfo->lib_list[id].start_data = datapos;
-       libinfo->lib_list[id].start_brk = datapos + data_len + bss_len;
-       libinfo->lib_list[id].text_len = text_len;
-       libinfo->lib_list[id].loaded = 1;
-       libinfo->lib_list[id].entry = (0x00ffffff & ntohl(hdr->entry)) + textpos;
-       libinfo->lib_list[id].build_date = ntohl(hdr->build_date);
+       libinfo->lib_list[0].start_code = start_code;
+       libinfo->lib_list[0].start_data = datapos;
+       libinfo->lib_list[0].start_brk = datapos + data_len + bss_len;
+       libinfo->lib_list[0].text_len = text_len;
+       libinfo->lib_list[0].loaded = 1;
+       libinfo->lib_list[0].entry = (0x00ffffff & ntohl(hdr->entry)) + textpos;
+       libinfo->lib_list[0].build_date = ntohl(hdr->build_date);
 
        /*
         * We just load the allocations into some temporary memory to
@@ -789,14 +738,15 @@ static int load_flat_file(struct linux_binprm *bprm,
         * image.
         */
        if (flags & FLAT_FLAG_GOTPIC) {
-               for (rp = (u32 __user *)datapos; ; rp++) {
+               rp = skip_got_header((u32 __user *) datapos);
+               for (; ; rp++) {
                        u32 addr, rp_val;
                        if (get_user(rp_val, rp))
                                return -EFAULT;
                        if (rp_val == 0xffffffff)
                                break;
                        if (rp_val) {
-                               addr = calc_reloc(rp_val, libinfo, id, 0);
+                               addr = calc_reloc(rp_val, libinfo);
                                if (addr == RELOC_FAILED) {
                                        ret = -ENOEXEC;
                                        goto err;
@@ -832,7 +782,7 @@ static int load_flat_file(struct linux_binprm *bprm,
                                return -EFAULT;
                        relval = ntohl(tmp);
                        addr = flat_get_relocate_addr(relval);
-                       rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1);
+                       rp = (u32 __user *)calc_reloc(addr, libinfo);
                        if (rp == (u32 __user *)RELOC_FAILED) {
                                ret = -ENOEXEC;
                                goto err;
@@ -855,7 +805,7 @@ static int load_flat_file(struct linux_binprm *bprm,
                                         */
                                        addr = ntohl((__force __be32)addr);
                                }
-                               addr = calc_reloc(addr, libinfo, id, 0);
+                               addr = calc_reloc(addr, libinfo);
                                if (addr == RELOC_FAILED) {
                                        ret = -ENOEXEC;
                                        goto err;
@@ -883,7 +833,7 @@ static int load_flat_file(struct linux_binprm *bprm,
        /* zero the BSS,  BRK and stack areas */
        if (clear_user((void __user *)(datapos + data_len), bss_len +
                       (memp + memp_size - stack_len -          /* end brk */
-                      libinfo->lib_list[id].start_brk) +       /* start brk */
+                      libinfo->lib_list[0].start_brk) +        /* start brk */
                       stack_len))
                return -EFAULT;
 
@@ -893,49 +843,6 @@ err:
 }
 
 
-/****************************************************************************/
-#ifdef CONFIG_BINFMT_SHARED_FLAT
-
-/*
- * Load a shared library into memory.  The library gets its own data
- * segment (including bss) but not argv/argc/environ.
- */
-
-static int load_flat_shared_library(int id, struct lib_info *libs)
-{
-       /*
-        * This is a fake bprm struct; only the members "buf", "file" and
-        * "filename" are actually used.
-        */
-       struct linux_binprm bprm;
-       int res;
-       char buf[16];
-       loff_t pos = 0;
-
-       memset(&bprm, 0, sizeof(bprm));
-
-       /* Create the file name */
-       sprintf(buf, "/lib/lib%d.so", id);
-
-       /* Open the file up */
-       bprm.filename = buf;
-       bprm.file = open_exec(bprm.filename);
-       res = PTR_ERR(bprm.file);
-       if (IS_ERR(bprm.file))
-               return res;
-
-       res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);
-
-       if (res >= 0)
-               res = load_flat_file(&bprm, libs, id, NULL);
-
-       allow_write_access(bprm.file);
-       fput(bprm.file);
-
-       return res;
-}
-
-#endif /* CONFIG_BINFMT_SHARED_FLAT */
 /****************************************************************************/
 
 /*
@@ -968,7 +875,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
        stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
        stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
 
-       res = load_flat_file(bprm, &libinfo, 0, &stack_len);
+       res = load_flat_file(bprm, &libinfo, &stack_len);
        if (res < 0)
                return res;
 
@@ -1013,20 +920,6 @@ static int load_flat_binary(struct linux_binprm *bprm)
         */
        start_addr = libinfo.lib_list[0].entry;
 
-#ifdef CONFIG_BINFMT_SHARED_FLAT
-       for (i = MAX_SHARED_LIBS-1; i > 0; i--) {
-               if (libinfo.lib_list[i].loaded) {
-                       /* Push previos first to call address */
-                       unsigned long __user *sp;
-                       current->mm->start_stack -= sizeof(unsigned long);
-                       sp = (unsigned long __user *)current->mm->start_stack;
-                       if (put_user(start_addr, sp))
-                               return -EFAULT;
-                       start_addr = libinfo.lib_list[i].entry;
-               }
-       }
-#endif
-
 #ifdef FLAT_PLAT_INIT
        FLAT_PLAT_INIT(regs);
 #endif
index 31c3f592e5875ec8f10279e1c6a3a0b65e627dd4..84795d831282b3152440354db4a2ddc3e0b594e7 100644 (file)
@@ -4238,6 +4238,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
  */
 static void btrfs_end_empty_barrier(struct bio *bio)
 {
+       bio_uninit(bio);
        complete(bio->bi_private);
 }
 
@@ -4247,7 +4248,7 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  */
 static void write_dev_flush(struct btrfs_device *device)
 {
-       struct bio *bio = device->flush_bio;
+       struct bio *bio = &device->flush_bio;
 
 #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        /*
@@ -4260,12 +4261,12 @@ static void write_dev_flush(struct btrfs_device *device)
         * of simplicity, since this is a debug tool and not meant for use in
         * non-debug builds.
         */
-       struct request_queue *q = bdev_get_queue(device->bdev);
-       if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+       if (!bdev_write_cache(device->bdev))
                return;
 #endif
 
-       bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
+       bio_init(bio, device->bdev, NULL, 0,
+                REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
        bio->bi_end_io = btrfs_end_empty_barrier;
        init_completion(&device->flush_wait);
        bio->bi_private = &device->flush_wait;
@@ -4279,7 +4280,7 @@ static void write_dev_flush(struct btrfs_device *device)
  */
 static blk_status_t wait_dev_flush(struct btrfs_device *device)
 {
-       struct bio *bio = device->flush_bio;
+       struct bio *bio = &device->flush_bio;
 
        if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
                return BLK_STS_OK;
index 6aa92f84f46547f5144164778926ce420701772e..6260784e74b5ae66b7ef6559ac67ba936376d2c6 100644 (file)
@@ -1239,7 +1239,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
 
                if (size) {
                        ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
-                                                  GFP_NOFS, 0);
+                                                  GFP_NOFS);
                        if (!ret)
                                *discarded_bytes += size;
                        else if (ret != -EOPNOTSUPP)
@@ -1256,7 +1256,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
 
        if (bytes_left) {
                ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
-                                          GFP_NOFS, 0);
+                                          GFP_NOFS);
                if (!ret)
                        *discarded_bytes += bytes_left;
        }
@@ -1291,7 +1291,7 @@ static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
                ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
                                              &discarded);
                discarded += src_disc;
-       } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
+       } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
                ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
        } else {
                ret = 0;
@@ -5987,7 +5987,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
        *trimmed = 0;
 
        /* Discard not supported = nothing to do. */
-       if (!blk_queue_discard(bdev_get_queue(device->bdev)))
+       if (!bdev_max_discard_sectors(device->bdev))
                return 0;
 
        /* Not writable = nothing to do. */
index be6c24577dbe0672589a8a0eccf3b62b2f0ec0df..b2c692b2fd8d35d1e631ae5d1ea8b103acf2f31f 100644 (file)
@@ -468,7 +468,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
                                        void __user *arg)
 {
        struct btrfs_device *device;
-       struct request_queue *q;
        struct fstrim_range range;
        u64 minlen = ULLONG_MAX;
        u64 num_devices = 0;
@@ -498,14 +497,11 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
        rcu_read_lock();
        list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
                                dev_list) {
-               if (!device->bdev)
+               if (!device->bdev || !bdev_max_discard_sectors(device->bdev))
                        continue;
-               q = bdev_get_queue(device->bdev);
-               if (blk_queue_discard(q)) {
-                       num_devices++;
-                       minlen = min_t(u64, q->limits.discard_granularity,
-                                    minlen);
-               }
+               num_devices++;
+               minlen = min_t(u64, bdev_discard_granularity(device->bdev),
+                                   minlen);
        }
        rcu_read_unlock();
 
@@ -2565,7 +2561,12 @@ static noinline int search_ioctl(struct inode *inode,
 
        while (1) {
                ret = -EFAULT;
-               if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
+               /*
+                * Ensure that the whole user buffer is faulted in at sub-page
+                * granularity, otherwise the loop may live-lock.
+                */
+               if (fault_in_subpage_writeable(ubuf + sk_offset,
+                                              *buf_size - sk_offset))
                        break;
 
                ret = btrfs_search_forward(root, &key, path, sk->min_transid);
index a8cc736731fdbed9e0b7539d781dacae25b29632..b6b00338037c49b56d27af50ed7668763b89bbcf 100644 (file)
@@ -405,7 +405,6 @@ void btrfs_free_device(struct btrfs_device *device)
        WARN_ON(!list_empty(&device->post_commit_list));
        rcu_string_free(device->name);
        extent_io_tree_release(&device->alloc_state);
-       bio_put(device->flush_bio);
        btrfs_destroy_dev_zone_info(device);
        kfree(device);
 }
@@ -643,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
                        set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
        }
 
-       if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+       if (!bdev_nonrot(bdev))
                fs_devices->rotating = true;
 
        device->bdev = bdev;
@@ -2706,7 +2705,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
        atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
 
-       if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+       if (!bdev_nonrot(bdev))
                fs_devices->rotating = true;
 
        orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@@ -6949,16 +6948,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       /*
-        * Preallocate a bio that's always going to be used for flushing device
-        * barriers and matches the device lifespan
-        */
-       dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
-       if (!dev->flush_bio) {
-               kfree(dev);
-               return ERR_PTR(-ENOMEM);
-       }
-
        INIT_LIST_HEAD(&dev->dev_list);
        INIT_LIST_HEAD(&dev->dev_alloc_list);
        INIT_LIST_HEAD(&dev->post_commit_list);
index f3e28f11cfb6e606ab4cdb14d8f45bb2352f85e8..b11c563d2025e52d1e1ca2ddffde4df55af08f98 100644 (file)
@@ -121,8 +121,8 @@ struct btrfs_device {
        /* bytes used on the current transaction */
        u64 commit_bytes_used;
 
-       /* for sending down flush barriers */
-       struct bio *flush_bio;
+       /* Bio used for flushing device barriers */
+       struct bio flush_bio;
        struct completion flush_wait;
 
        /* per-device scrub information */
index d31b0eda210f1eeb1a7c25667ab6a8a3816f1ef3..29b54fd9c128dffdb1bff294d8e41e102df466c4 100644 (file)
@@ -350,7 +350,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
        struct btrfs_fs_info *fs_info = device->fs_info;
        struct btrfs_zoned_device_info *zone_info = NULL;
        struct block_device *bdev = device->bdev;
-       struct request_queue *queue = bdev_get_queue(bdev);
        unsigned int max_active_zones;
        unsigned int nactive;
        sector_t nr_sectors;
@@ -410,7 +409,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
        if (!IS_ALIGNED(nr_sectors, zone_sectors))
                zone_info->nr_zones++;
 
-       max_active_zones = queue_max_active_zones(queue);
+       max_active_zones = bdev_max_active_zones(bdev);
        if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
                btrfs_err_in_rcu(fs_info,
 "zoned: %s: max active zones %u is too small, need at least %u active zones",
index aa25bffd48237afb3a0140f68e9824396be24c7b..b6edcf89a429f6107d4ee015a3c999482c74b0fd 100644 (file)
@@ -85,7 +85,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
        if (folio_test_dirty(folio)) {
                dout("%p dirty_folio %p idx %lu -- already dirty\n",
                     mapping->host, folio, folio->index);
-               BUG_ON(!folio_get_private(folio));
+               VM_BUG_ON_FOLIO(!folio_test_private(folio), folio);
                return false;
        }
 
@@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
         * Reference snap context in folio->private.  Also set
         * PagePrivate so that we get invalidate_folio callback.
         */
-       BUG_ON(folio_get_private(folio));
+       VM_BUG_ON_FOLIO(folio_test_private(folio), folio);
        folio_attach_private(folio, snapc);
 
        return ceph_fscache_dirty_folio(mapping, folio);
@@ -150,7 +150,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset,
        }
 
        WARN_ON(!folio_test_locked(folio));
-       if (folio_get_private(folio)) {
+       if (folio_test_private(folio)) {
                dout("%p invalidate_folio idx %lu full dirty page\n",
                     inode, folio->index);
 
@@ -729,8 +729,11 @@ static void writepages_finish(struct ceph_osd_request *req)
 
        /* clean all pages */
        for (i = 0; i < req->r_num_ops; i++) {
-               if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
+               if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) {
+                       pr_warn("%s incorrect op %d req %p index %d tid %llu\n",
+                               __func__, req->r_ops[i].op, req, i, req->r_tid);
                        break;
+               }
 
                osd_data = osd_req_op_extent_osd_data(req, i);
                BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
index 6c9e837aa1d3d183d507d72c21f5a44aa1e54522..8c8226c0feaccce31395d349d2bc266b6497b033 100644 (file)
@@ -629,9 +629,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
        iinfo.change_attr = 1;
        ceph_encode_timespec64(&iinfo.btime, &now);
 
-       iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
-       iinfo.xattr_data = xattr_buf;
-       memset(iinfo.xattr_data, 0, iinfo.xattr_len);
+       if (req->r_pagelist) {
+               iinfo.xattr_len = req->r_pagelist->length;
+               iinfo.xattr_data = req->r_pagelist->mapped_tail;
+       } else {
+               /* fake it */
+               iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
+               iinfo.xattr_data = xattr_buf;
+               memset(iinfo.xattr_data, 0, iinfo.xattr_len);
+       }
 
        in.ino = cpu_to_le64(vino.ino);
        in.snapid = cpu_to_le64(CEPH_NOSNAP);
@@ -743,6 +749,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                err = ceph_security_init_secctx(dentry, mode, &as_ctx);
                if (err < 0)
                        goto out_ctx;
+               /* Async create can't handle more than a page of xattrs */
+               if (as_ctx.pagelist &&
+                   !list_is_singular(&as_ctx.pagelist->head))
+                       try_async = false;
        } else if (!d_in_lookup(dentry)) {
                /* If it's not being looked up, it's negative */
                return -ENOENT;
index 526a4c1bed9947b6931aae3a81de09c194229200..e78be66bbf01563cc2d18435ceb3d17a7f2da47c 100644 (file)
@@ -113,7 +113,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
 
        if (WARN_ON_ONCE(len <= 0))
                return -EINVAL;
-       if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0))
+       if (WARN_ON_ONCE(len % FSCRYPT_CONTENTS_ALIGNMENT != 0))
                return -EINVAL;
 
        fscrypt_generate_iv(&iv, lblk_num, ci);
@@ -213,8 +213,8 @@ EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks);
  * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place
  * @inode:     The inode to which this block belongs
  * @page:      The page containing the block to encrypt
- * @len:       Size of block to encrypt.  Doesn't need to be a multiple of the
- *             fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @len:       Size of block to encrypt.  This must be a multiple of
+ *             FSCRYPT_CONTENTS_ALIGNMENT.
  * @offs:      Byte offset within @page at which the block to encrypt begins
  * @lblk_num:  Filesystem logical block number of the block, i.e. the 0-based
  *             number of the block within the file
@@ -283,8 +283,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks);
  * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place
  * @inode:     The inode to which this block belongs
  * @page:      The page containing the block to decrypt
- * @len:       Size of block to decrypt.  Doesn't need to be a multiple of the
- *             fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @len:       Size of block to decrypt.  This must be a multiple of
+ *             FSCRYPT_CONTENTS_ALIGNMENT.
  * @offs:      Byte offset within @page at which the block to decrypt begins
  * @lblk_num:  Filesystem logical block number of the block, i.e. the 0-based
  *             number of the block within the file
index a9be4bc74a94a02342ce0e1309fb0567605379c1..14e0ef5e9a20ae53d1837edec374491fe02487bc 100644 (file)
 #include <crypto/skcipher.h>
 #include "fscrypt_private.h"
 
+/*
+ * The minimum message length (input and output length), in bytes, for all
+ * filenames encryption modes.  Filenames shorter than this will be zero-padded
+ * before being encrypted.
+ */
+#define FSCRYPT_FNAME_MIN_MSG_LEN 16
+
 /*
  * struct fscrypt_nokey_name - identifier for directory entry when key is absent
  *
@@ -267,7 +274,7 @@ bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
 
        if (orig_len > max_len)
                return false;
-       encrypted_len = max(orig_len, (u32)FS_CRYPTO_BLOCK_SIZE);
+       encrypted_len = max_t(u32, orig_len, FSCRYPT_FNAME_MIN_MSG_LEN);
        encrypted_len = round_up(encrypted_len, padding);
        *encrypted_len_ret = min(encrypted_len, max_len);
        return true;
@@ -350,7 +357,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
                return 0;
        }
 
-       if (iname->len < FS_CRYPTO_BLOCK_SIZE)
+       if (iname->len < FSCRYPT_FNAME_MIN_MSG_LEN)
                return -EUCLEAN;
 
        if (fscrypt_has_encryption_key(inode))
index 5b0a9e6478b5d4acb8cec249c92217831bdbbac1..6b4c8094cc7b084e22dfc2fca0a1fbfd0682951f 100644 (file)
@@ -545,8 +545,8 @@ struct key *
 fscrypt_find_master_key(struct super_block *sb,
                        const struct fscrypt_key_specifier *mk_spec);
 
-int fscrypt_add_test_dummy_key(struct super_block *sb,
-                              struct fscrypt_key_specifier *key_spec);
+int fscrypt_get_test_dummy_key_identifier(
+                         u8 key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
 
 int fscrypt_verify_key_added(struct super_block *sb,
                             const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
@@ -561,7 +561,9 @@ struct fscrypt_mode {
        int keysize;            /* key size in bytes */
        int security_strength;  /* security strength in bytes */
        int ivsize;             /* IV size in bytes */
-       int logged_impl_name;
+       int logged_cryptoapi_impl;
+       int logged_blk_crypto_native;
+       int logged_blk_crypto_fallback;
        enum blk_crypto_mode_num blk_crypto_mode;
 };
 
@@ -621,6 +623,8 @@ int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci);
 
 bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
                            const union fscrypt_policy *policy2);
+int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy,
+                              struct fscrypt_key_specifier *key_spec);
 bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
                              const struct inode *inode);
 int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
index 93c2ca8580923dd5ea9f0e66e72d38eb57cb0999..90f3e68f166e393fe6b14f5e54554abd20ee0f52 100644 (file)
@@ -12,7 +12,7 @@
  * provides the key and IV to use.
  */
 
-#include <linux/blk-crypto.h>
+#include <linux/blk-crypto-profile.h>
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/sched/mm.h>
@@ -64,6 +64,35 @@ static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci)
        return DIV_ROUND_UP(lblk_bits, 8);
 }
 
+/*
+ * Log a message when starting to use blk-crypto (native) or blk-crypto-fallback
+ * for an encryption mode for the first time.  This is the blk-crypto
+ * counterpart to the message logged when starting to use the crypto API for the
+ * first time.  A limitation is that these messages don't convey which specific
+ * filesystems or files are using each implementation.  However, *usually*
+ * systems use just one implementation per mode, which makes these messages
+ * helpful for debugging problems where the "wrong" implementation is used.
+ */
+static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode,
+                                       struct request_queue **devs,
+                                       int num_devs,
+                                       const struct blk_crypto_config *cfg)
+{
+       int i;
+
+       for (i = 0; i < num_devs; i++) {
+               if (!IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
+                   __blk_crypto_cfg_supported(devs[i]->crypto_profile, cfg)) {
+                       if (!xchg(&mode->logged_blk_crypto_native, 1))
+                               pr_info("fscrypt: %s using blk-crypto (native)\n",
+                                       mode->friendly_name);
+               } else if (!xchg(&mode->logged_blk_crypto_fallback, 1)) {
+                       pr_info("fscrypt: %s using blk-crypto-fallback\n",
+                               mode->friendly_name);
+               }
+       }
+}
+
 /* Enable inline encryption for this file if supported. */
 int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
 {
@@ -117,6 +146,8 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
                        goto out_free_devs;
        }
 
+       fscrypt_log_blk_crypto_impl(ci->ci_mode, devs, num_devs, &crypto_cfg);
+
        ci->ci_inlinecrypt = true;
 out_free_devs:
        kfree(devs);
index 0b3ffbb4faf4ab0140c07208c2e6a0706e7e9cc7..caee9f8620dd9bb16f9a426d2c38c3c98471a854 100644 (file)
@@ -688,28 +688,68 @@ out_wipe_secret:
 }
 EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key);
 
-/*
- * Add the key for '-o test_dummy_encryption' to the filesystem keyring.
- *
- * Use a per-boot random key to prevent people from misusing this option.
- */
-int fscrypt_add_test_dummy_key(struct super_block *sb,
-                              struct fscrypt_key_specifier *key_spec)
+static void
+fscrypt_get_test_dummy_secret(struct fscrypt_master_key_secret *secret)
 {
        static u8 test_key[FSCRYPT_MAX_KEY_SIZE];
+
+       get_random_once(test_key, FSCRYPT_MAX_KEY_SIZE);
+
+       memset(secret, 0, sizeof(*secret));
+       secret->size = FSCRYPT_MAX_KEY_SIZE;
+       memcpy(secret->raw, test_key, FSCRYPT_MAX_KEY_SIZE);
+}
+
+int fscrypt_get_test_dummy_key_identifier(
+                               u8 key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
+{
        struct fscrypt_master_key_secret secret;
        int err;
 
-       get_random_once(test_key, FSCRYPT_MAX_KEY_SIZE);
+       fscrypt_get_test_dummy_secret(&secret);
 
-       memset(&secret, 0, sizeof(secret));
-       secret.size = FSCRYPT_MAX_KEY_SIZE;
-       memcpy(secret.raw, test_key, FSCRYPT_MAX_KEY_SIZE);
+       err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size);
+       if (err)
+               goto out;
+       err = fscrypt_hkdf_expand(&secret.hkdf, HKDF_CONTEXT_KEY_IDENTIFIER,
+                                 NULL, 0, key_identifier,
+                                 FSCRYPT_KEY_IDENTIFIER_SIZE);
+out:
+       wipe_master_key_secret(&secret);
+       return err;
+}
+
+/**
+ * fscrypt_add_test_dummy_key() - add the test dummy encryption key
+ * @sb: the filesystem instance to add the key to
+ * @dummy_policy: the encryption policy for test_dummy_encryption
+ *
+ * If needed, add the key for the test_dummy_encryption mount option to the
+ * filesystem.  To prevent misuse of this mount option, a per-boot random key is
+ * used instead of a hardcoded one.  This makes it so that any encrypted files
+ * created using this option won't be accessible after a reboot.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fscrypt_add_test_dummy_key(struct super_block *sb,
+                              const struct fscrypt_dummy_policy *dummy_policy)
+{
+       const union fscrypt_policy *policy = dummy_policy->policy;
+       struct fscrypt_key_specifier key_spec;
+       struct fscrypt_master_key_secret secret;
+       int err;
 
-       err = add_master_key(sb, &secret, key_spec);
+       if (!policy)
+               return 0;
+       err = fscrypt_policy_to_key_spec(policy, &key_spec);
+       if (err)
+               return err;
+       fscrypt_get_test_dummy_secret(&secret);
+       err = add_master_key(sb, &secret, &key_spec);
        wipe_master_key_secret(&secret);
        return err;
 }
+EXPORT_SYMBOL_GPL(fscrypt_add_test_dummy_key);
 
 /*
  * Verify that the current user has added a master key with the given identifier
index eede186b04ce3b274b8cb7b9d9afdf8e1e45c7cc..c35711896bd4fb3b37b617946dd34a1565c3fe10 100644 (file)
@@ -94,7 +94,7 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key,
                            mode->cipher_str, PTR_ERR(tfm));
                return tfm;
        }
-       if (!xchg(&mode->logged_impl_name, 1)) {
+       if (!xchg(&mode->logged_cryptoapi_impl, 1)) {
                /*
                 * fscrypt performance can vary greatly depending on which
                 * crypto algorithm implementation is used.  Help people debug
@@ -425,23 +425,9 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
        if (err)
                return err;
 
-       switch (ci->ci_policy.version) {
-       case FSCRYPT_POLICY_V1:
-               mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
-               memcpy(mk_spec.u.descriptor,
-                      ci->ci_policy.v1.master_key_descriptor,
-                      FSCRYPT_KEY_DESCRIPTOR_SIZE);
-               break;
-       case FSCRYPT_POLICY_V2:
-               mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
-               memcpy(mk_spec.u.identifier,
-                      ci->ci_policy.v2.master_key_identifier,
-                      FSCRYPT_KEY_IDENTIFIER_SIZE);
-               break;
-       default:
-               WARN_ON(1);
-               return -EINVAL;
-       }
+       err = fscrypt_policy_to_key_spec(&ci->ci_policy, &mk_spec);
+       if (err)
+               return err;
 
        key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
        if (IS_ERR(key)) {
index ed3d623724cddbc0cef5b06948db83726f87b574..5f858cee1e3b044312c90aebbeb83fad56a52f8c 100644 (file)
@@ -10,6 +10,7 @@
  * Modified by Eric Biggers, 2019 for v2 policy support.
  */
 
+#include <linux/fs_context.h>
 #include <linux/random.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
@@ -32,6 +33,26 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
        return !memcmp(policy1, policy2, fscrypt_policy_size(policy1));
 }
 
+int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy,
+                              struct fscrypt_key_specifier *key_spec)
+{
+       switch (policy->version) {
+       case FSCRYPT_POLICY_V1:
+               key_spec->type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
+               memcpy(key_spec->u.descriptor, policy->v1.master_key_descriptor,
+                      FSCRYPT_KEY_DESCRIPTOR_SIZE);
+               return 0;
+       case FSCRYPT_POLICY_V2:
+               key_spec->type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
+               memcpy(key_spec->u.identifier, policy->v2.master_key_identifier,
+                      FSCRYPT_KEY_IDENTIFIER_SIZE);
+               return 0;
+       default:
+               WARN_ON(1);
+               return -EINVAL;
+       }
+}
+
 static const union fscrypt_policy *
 fscrypt_get_dummy_policy(struct super_block *sb)
 {
@@ -704,73 +725,45 @@ int fscrypt_set_context(struct inode *inode, void *fs_data)
 EXPORT_SYMBOL_GPL(fscrypt_set_context);
 
 /**
- * fscrypt_set_test_dummy_encryption() - handle '-o test_dummy_encryption'
- * @sb: the filesystem on which test_dummy_encryption is being specified
- * @arg: the argument to the test_dummy_encryption option.  May be NULL.
- * @dummy_policy: the filesystem's current dummy policy (input/output, see
- *               below)
- *
- * Handle the test_dummy_encryption mount option by creating a dummy encryption
- * policy, saving it in @dummy_policy, and adding the corresponding dummy
- * encryption key to the filesystem.  If the @dummy_policy is already set, then
- * instead validate that it matches @arg.  Don't support changing it via
- * remount, as that is difficult to do safely.
+ * fscrypt_parse_test_dummy_encryption() - parse the test_dummy_encryption mount option
+ * @param: the mount option
+ * @dummy_policy: (input/output) the place to write the dummy policy that will
+ *     result from parsing the option.  Zero-initialize this.  If a policy is
+ *     already set here (due to test_dummy_encryption being given multiple
+ *     times), then this function will verify that the policies are the same.
  *
- * Return: 0 on success (dummy policy set, or the same policy is already set);
- *         -EEXIST if a different dummy policy is already set;
- *         or another -errno value.
+ * Return: 0 on success; -EINVAL if the argument is invalid; -EEXIST if the
+ *        argument conflicts with one already specified; or -ENOMEM.
  */
-int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg,
-                                     struct fscrypt_dummy_policy *dummy_policy)
+int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param,
+                               struct fscrypt_dummy_policy *dummy_policy)
 {
-       struct fscrypt_key_specifier key_spec = { 0 };
-       int version;
-       union fscrypt_policy *policy = NULL;
+       const char *arg = "v2";
+       union fscrypt_policy *policy;
        int err;
 
-       if (!arg)
-               arg = "v2";
-
-       if (!strcmp(arg, "v1")) {
-               version = FSCRYPT_POLICY_V1;
-               key_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
-               memset(key_spec.u.descriptor, 0x42,
-                      FSCRYPT_KEY_DESCRIPTOR_SIZE);
-       } else if (!strcmp(arg, "v2")) {
-               version = FSCRYPT_POLICY_V2;
-               key_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
-               /* key_spec.u.identifier gets filled in when adding the key */
-       } else {
-               err = -EINVAL;
-               goto out;
-       }
+       if (param->type == fs_value_is_string && *param->string)
+               arg = param->string;
 
        policy = kzalloc(sizeof(*policy), GFP_KERNEL);
-       if (!policy) {
-               err = -ENOMEM;
-               goto out;
-       }
-
-       err = fscrypt_add_test_dummy_key(sb, &key_spec);
-       if (err)
-               goto out;
+       if (!policy)
+               return -ENOMEM;
 
-       policy->version = version;
-       switch (policy->version) {
-       case FSCRYPT_POLICY_V1:
+       if (!strcmp(arg, "v1")) {
+               policy->version = FSCRYPT_POLICY_V1;
                policy->v1.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
                policy->v1.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
-               memcpy(policy->v1.master_key_descriptor, key_spec.u.descriptor,
+               memset(policy->v1.master_key_descriptor, 0x42,
                       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-               break;
-       case FSCRYPT_POLICY_V2:
+       } else if (!strcmp(arg, "v2")) {
+               policy->version = FSCRYPT_POLICY_V2;
                policy->v2.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
                policy->v2.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
-               memcpy(policy->v2.master_key_identifier, key_spec.u.identifier,
-                      FSCRYPT_KEY_IDENTIFIER_SIZE);
-               break;
-       default:
-               WARN_ON(1);
+               err = fscrypt_get_test_dummy_key_identifier(
+                               policy->v2.master_key_identifier);
+               if (err)
+                       goto out;
+       } else {
                err = -EINVAL;
                goto out;
        }
@@ -789,6 +782,37 @@ out:
        kfree(policy);
        return err;
 }
+EXPORT_SYMBOL_GPL(fscrypt_parse_test_dummy_encryption);
+
+/**
+ * fscrypt_dummy_policies_equal() - check whether two dummy policies are equal
+ * @p1: the first test dummy policy (may be unset)
+ * @p2: the second test dummy policy (may be unset)
+ *
+ * Return: %true if the dummy policies are both set and equal, or both unset.
+ */
+bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1,
+                                 const struct fscrypt_dummy_policy *p2)
+{
+       if (!p1->policy && !p2->policy)
+               return true;
+       if (!p1->policy || !p2->policy)
+               return false;
+       return fscrypt_policies_equal(p1->policy, p2->policy);
+}
+EXPORT_SYMBOL_GPL(fscrypt_dummy_policies_equal);
+
+/* Deprecated, do not use */
+int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg,
+                                     struct fscrypt_dummy_policy *dummy_policy)
+{
+       struct fs_parameter param = {
+               .type = fs_value_is_string,
+               .string = arg ? (char *)arg : "",
+       };
+       return fscrypt_parse_test_dummy_encryption(&param, dummy_policy) ?:
+               fscrypt_add_test_dummy_key(sb, dummy_policy);
+}
 EXPORT_SYMBOL_GPL(fscrypt_set_test_dummy_encryption);
 
 /**
index aef06e607b4054073f47b8cc4f8f274f903fd1a2..840752006f601aab0c23924425da77ea0e8ae39f 100644 (file)
@@ -1115,11 +1115,10 @@ static inline int drop_refcount(struct dio *dio)
  * individual fields and will generate much worse code. This is important
  * for the whole file.
  */
-static inline ssize_t
-do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
-                     struct block_device *bdev, struct iov_iter *iter,
-                     get_block_t get_block, dio_iodone_t end_io,
-                     dio_submit_t submit_io, int flags)
+ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+               struct block_device *bdev, struct iov_iter *iter,
+               get_block_t get_block, dio_iodone_t end_io,
+               dio_submit_t submit_io, int flags)
 {
        unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
        unsigned blkbits = i_blkbits;
@@ -1334,29 +1333,6 @@ fail_dio:
        kmem_cache_free(dio_cache, dio);
        return retval;
 }
-
-ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
-                            struct block_device *bdev, struct iov_iter *iter,
-                            get_block_t get_block,
-                            dio_iodone_t end_io, dio_submit_t submit_io,
-                            int flags)
-{
-       /*
-        * The block device state is needed in the end to finally
-        * submit everything.  Since it's likely to be cache cold
-        * prefetch it here as first thing to hide some of the
-        * latency.
-        *
-        * Attempt to prefetch the pieces we likely need later.
-        */
-       prefetch(&bdev->bd_disk->part_tbl);
-       prefetch(bdev->bd_disk->queue);
-       prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES);
-
-       return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block,
-                                    end_io, submit_io, flags);
-}
-
 EXPORT_SYMBOL(__blockdev_direct_IO);
 
 static __init int dio_init(void)
index 2f513005923661d4962817b7b6531f1b6548b7da..20d4e47f57ab2e27310f5da64df61eae95d3c8b3 100644 (file)
@@ -351,21 +351,20 @@ out:
 
 static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg)
 {
-       struct request_queue *q = bdev_get_queue(inode->i_sb->s_bdev);
        struct fstrim_range range;
        int ret = 0;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(inode->i_sb->s_bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
                return -EFAULT;
 
        range.minlen = max_t(unsigned int, range.minlen,
-                               q->limits.discard_granularity);
+                               bdev_discard_granularity(inode->i_sb->s_bdev));
 
        ret = exfat_trim_fs(inode, &range);
        if (ret < 0)
index 8ca21e7917d16a8806c4debb09b1a4e2197a4ac8..be0788ecaf20e894bf00fb7373e3e83f4a09a41e 100644 (file)
@@ -627,13 +627,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
        if (opts->allow_utime == (unsigned short)-1)
                opts->allow_utime = ~opts->fs_dmask & 0022;
 
-       if (opts->discard) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-
-               if (!blk_queue_discard(q)) {
-                       exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
-                       opts->discard = 0;
-               }
+       if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) {
+               exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
+               opts->discard = 0;
        }
 
        sb->s_flags |= SB_NODIRATIME;
index ba44fa1be70aba8b8dfa30935ee2d9cae2af2e90..4d1d2326eee9a381ef095aea94d8f385ad64d262 100644 (file)
@@ -1044,7 +1044,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
        __u32 flags = 0;
        unsigned int flush_flags = 0;
        struct super_block *sb = file_inode(filp)->i_sb;
-       struct request_queue *q;
 
        if (copy_from_user(&flags, (__u32 __user *)arg,
                                sizeof(__u32)))
@@ -1065,10 +1064,8 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
        if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID)
                return -EINVAL;
 
-       q = bdev_get_queue(EXT4_SB(sb)->s_journal->j_dev);
-       if (!q)
-               return -ENXIO;
-       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+           !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev))
                return -EOPNOTSUPP;
 
        if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
@@ -1393,14 +1390,13 @@ resizefs_out:
 
        case FITRIM:
        {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (!blk_queue_discard(q))
+               if (!bdev_max_discard_sectors(sb->s_bdev))
                        return -EOPNOTSUPP;
 
                /*
index 252c168454c7fcfd83a89a7207351190395dba86..ea653d19f9ec76a1996770bade735713b5989ecc 100644 (file)
@@ -3498,7 +3498,7 @@ int ext4_mb_init(struct super_block *sb)
                spin_lock_init(&lg->lg_prealloc_lock);
        }
 
-       if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
+       if (bdev_nonrot(sb->s_bdev))
                sbi->s_mb_max_linear_groups = 0;
        else
                sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
@@ -3629,7 +3629,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
                return __blkdev_issue_discard(sb->s_bdev,
                        (sector_t)discard_block << (sb->s_blocksize_bits - 9),
                        (sector_t)count << (sb->s_blocksize_bits - 9),
-                       GFP_NOFS, 0, biop);
+                       GFP_NOFS, biop);
        } else
                return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 }
@@ -6455,7 +6455,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
  */
 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 {
-       struct request_queue *q = bdev_get_queue(sb->s_bdev);
+       unsigned int discard_granularity = bdev_discard_granularity(sb->s_bdev);
        struct ext4_group_info *grp;
        ext4_group_t group, first_group, last_group;
        ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
@@ -6475,9 +6475,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
            range->len < sb->s_blocksize)
                return -EINVAL;
        /* No point to try to trim less than discard granularity */
-       if (range->minlen < q->limits.discard_granularity) {
+       if (range->minlen < discard_granularity) {
                minlen = EXT4_NUM_B2C(EXT4_SB(sb),
-                       q->limits.discard_granularity >> sb->s_blocksize_bits);
+                               discard_granularity >> sb->s_blocksize_bits);
                if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
                        goto out;
        }
index 1466fbdbc8e345974b07c48c05aee1de79f0a1de..6900da973ce2872739571d512b12d21e98d1ebb9 100644 (file)
@@ -5474,13 +5474,9 @@ no_journal:
                        goto failed_mount9;
        }
 
-       if (test_opt(sb, DISCARD)) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-               if (!blk_queue_discard(q))
-                       ext4_msg(sb, KERN_WARNING,
-                                "mounting with \"discard\" option, but "
-                                "the device does not support discard");
-       }
+       if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
+               ext4_msg(sb, KERN_WARNING,
+                        "mounting with \"discard\" option, but the device does not support discard");
 
        if (es->s_error_count)
                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
index 8c570de21ed5aaa0b98212cb44b9f584c68ee052..2b2b3c87e45e05c5d00cb2d52acf33ad2dc925bd 100644 (file)
@@ -4372,8 +4372,7 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
 
 static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
 {
-       return blk_queue_discard(bdev_get_queue(bdev)) ||
-              bdev_is_zoned(bdev);
+       return bdev_max_discard_sectors(bdev) || bdev_is_zoned(bdev);
 }
 
 static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
index 5b89af0f27f053265672a30e595ad4db26a2905c..35b6c720c2bc155211ea024ee1c05a4175350e1d 100644 (file)
@@ -2285,7 +2285,6 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
-       struct request_queue *q = bdev_get_queue(sb->s_bdev);
        struct fstrim_range range;
        int ret;
 
@@ -2304,7 +2303,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
                return ret;
 
        range.minlen = max((unsigned int)range.minlen,
-                               q->limits.discard_granularity);
+                          bdev_discard_granularity(sb->s_bdev));
        ret = f2fs_trim_fs(F2FS_SB(sb), &range);
        mnt_drop_write_file(filp);
        if (ret < 0)
@@ -3686,18 +3685,18 @@ out:
 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
                pgoff_t off, block_t block, block_t len, u32 flags)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
        sector_t sector = SECTOR_FROM_BLOCK(block);
        sector_t nr_sects = SECTOR_FROM_BLOCK(len);
        int ret = 0;
 
-       if (!q)
-               return -ENXIO;
-
-       if (flags & F2FS_TRIM_FILE_DISCARD)
-               ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
-                                               blk_queue_secure_erase(q) ?
-                                               BLKDEV_DISCARD_SECURE : 0);
+       if (flags & F2FS_TRIM_FILE_DISCARD) {
+               if (bdev_max_secure_erase_sectors(bdev))
+                       ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
+                                       GFP_NOFS);
+               else
+                       ret = blkdev_issue_discard(bdev, sector, nr_sects,
+                                       GFP_NOFS);
+       }
 
        if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
                if (IS_ENCRYPTED(inode))
index bd9731cdec565132d659dc31f4752e5c5fbcf5f2..7225ce09f3ab9a4931f9e272a26e30066efefd40 100644 (file)
@@ -1196,9 +1196,8 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
                                                unsigned int *issued)
 {
        struct block_device *bdev = dc->bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
        unsigned int max_discard_blocks =
-                       SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+                       SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
                                        &(dcc->fstrim_list) : &(dcc->wait_list);
@@ -1245,7 +1244,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
                err = __blkdev_issue_discard(bdev,
                                        SECTOR_FROM_BLOCK(start),
                                        SECTOR_FROM_BLOCK(len),
-                                       GFP_NOFS, 0, &bio);
+                                       GFP_NOFS, &bio);
 submit:
                if (err) {
                        spin_lock_irqsave(&dc->lock, flags);
@@ -1375,9 +1374,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
        struct discard_cmd *dc;
        struct discard_info di = {0};
        struct rb_node **insert_p = NULL, *insert_parent = NULL;
-       struct request_queue *q = bdev_get_queue(bdev);
        unsigned int max_discard_blocks =
-                       SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+                       SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
        block_t end = lstart + len;
 
        dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
index a5a309fcc7faf6dbdffa07243208e3d53e6c9462..bf91f977debea1968f1b0de7d45d2a3364b71834 100644 (file)
@@ -127,13 +127,12 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
        struct super_block *sb = inode->i_sb;
        struct fstrim_range __user *user_range;
        struct fstrim_range range;
-       struct request_queue *q = bdev_get_queue(sb->s_bdev);
        int err;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(sb->s_bdev))
                return -EOPNOTSUPP;
 
        user_range = (struct fstrim_range __user *)arg;
@@ -141,7 +140,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
                return -EFAULT;
 
        range.minlen = max_t(unsigned int, range.minlen,
-                            q->limits.discard_granularity);
+                            bdev_discard_granularity(sb->s_bdev));
 
        err = fat_trim_fs(inode, &range);
        if (err < 0)
index bf6051bdf1d1d98ccc0ce64dd720734080f28a94..3d1afb95a925a609b797666abb55cd21035f27df 100644 (file)
@@ -1872,13 +1872,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
                goto out_fail;
        }
 
-       if (sbi->options.discard) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-               if (!blk_queue_discard(q))
-                       fat_msg(sb, KERN_WARNING,
-                                       "mounting with \"discard\" option, but "
-                                       "the device does not support discard");
-       }
+       if (sbi->options.discard && !bdev_max_discard_sectors(sb->s_bdev))
+               fat_msg(sb, KERN_WARNING,
+                       "mounting with \"discard\" option, but the device does not support discard");
 
        fat_set_state(sb, 1, 0);
        return 0;
index 591fe9cf1659301372abe7efff2c68a5f76d2d0c..a1074a26e784d227922fcf558048790a87380330 100644 (file)
@@ -1712,6 +1712,10 @@ static int writeback_single_inode(struct inode *inode,
         */
        if (!(inode->i_state & I_DIRTY_ALL))
                inode_cgwb_move_to_attached(inode, wb);
+       else if (!(inode->i_state & I_SYNC_QUEUED) &&
+                (inode->i_state & I_DIRTY))
+               redirty_tail_locked(inode, wb);
+
        spin_unlock(&wb->list_lock);
        inode_sync_complete(inode);
 out:
@@ -1775,11 +1779,12 @@ static long writeback_sb_inodes(struct super_block *sb,
        };
        unsigned long start_time = jiffies;
        long write_chunk;
-       long wrote = 0;  /* count both pages and inodes */
+       long total_wrote = 0;  /* count both pages and inodes */
 
        while (!list_empty(&wb->b_io)) {
                struct inode *inode = wb_inode(wb->b_io.prev);
                struct bdi_writeback *tmp_wb;
+               long wrote;
 
                if (inode->i_sb != sb) {
                        if (work->sb) {
@@ -1855,7 +1860,9 @@ static long writeback_sb_inodes(struct super_block *sb,
 
                wbc_detach_inode(&wbc);
                work->nr_pages -= write_chunk - wbc.nr_to_write;
-               wrote += write_chunk - wbc.nr_to_write;
+               wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
+               wrote = wrote < 0 ? 0 : wrote;
+               total_wrote += wrote;
 
                if (need_resched()) {
                        /*
@@ -1877,7 +1884,7 @@ static long writeback_sb_inodes(struct super_block *sb,
                tmp_wb = inode_to_wb_and_lock_list(inode);
                spin_lock(&inode->i_lock);
                if (!(inode->i_state & I_DIRTY_ALL))
-                       wrote++;
+                       total_wrote++;
                requeue_inode(inode, tmp_wb, &wbc);
                inode_sync_complete(inode);
                spin_unlock(&inode->i_lock);
@@ -1891,14 +1898,14 @@ static long writeback_sb_inodes(struct super_block *sb,
                 * bail out to wb_writeback() often enough to check
                 * background threshold and other termination conditions.
                 */
-               if (wrote) {
+               if (total_wrote) {
                        if (time_is_before_jiffies(start_time + HZ / 10UL))
                                break;
                        if (work->nr_pages <= 0)
                                break;
                }
        }
-       return wrote;
+       return total_wrote;
 }
 
 static long __writeback_inodes_wb(struct bdi_writeback *wb,
index 39080b2d6cf86d26287b6e91b134997b5a1d5dff..b6697333bb2b9e77345f324fb515dc93419f0854 100644 (file)
@@ -1153,13 +1153,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 
        if (length != written && (iomap->flags & IOMAP_F_NEW)) {
                /* Deallocate blocks that were just allocated. */
-               loff_t blockmask = i_blocksize(inode) - 1;
-               loff_t end = (pos + length) & ~blockmask;
+               loff_t hstart = round_up(pos + written, i_blocksize(inode));
+               loff_t hend = iomap->offset + iomap->length;
 
-               pos = (pos + written + blockmask) & ~blockmask;
-               if (pos < end) {
-                       truncate_pagecache_range(inode, pos, end - 1);
-                       punch_hole(ip, pos, end - pos);
+               if (hstart < hend) {
+                       truncate_pagecache_range(inode, hstart, hend - 1);
+                       punch_hole(ip, hstart, hend - hstart);
                }
        }
 
index 48f01323c37c1b2d7e5bfb5b122569d64bc592d2..2556ae1f92ea2d9e27d0fefe9c60c09543c6c563 100644 (file)
@@ -770,30 +770,27 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
        return ret ? ret : ret1;
 }
 
-static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
+static inline bool should_fault_in_pages(struct iov_iter *i,
+                                        struct kiocb *iocb,
                                         size_t *prev_count,
                                         size_t *window_size)
 {
        size_t count = iov_iter_count(i);
        size_t size, offs;
 
-       if (likely(!count))
-               return false;
-       if (ret <= 0 && ret != -EFAULT)
+       if (!count)
                return false;
        if (!iter_is_iovec(i))
                return false;
 
        size = PAGE_SIZE;
-       offs = offset_in_page(i->iov[0].iov_base + i->iov_offset);
+       offs = offset_in_page(iocb->ki_pos);
        if (*prev_count != count || !*window_size) {
                size_t nr_dirtied;
 
-               size = ALIGN(offs + count, PAGE_SIZE);
-               size = min_t(size_t, size, SZ_1M);
                nr_dirtied = max(current->nr_dirtied_pause -
                                 current->nr_dirtied, 8);
-               size = min(size, nr_dirtied << PAGE_SHIFT);
+               size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT);
        }
 
        *prev_count = count;
@@ -807,7 +804,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
        struct file *file = iocb->ki_filp;
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
        size_t prev_count = 0, window_size = 0;
-       size_t written = 0;
+       size_t read = 0;
        ssize_t ret;
 
        /*
@@ -835,35 +832,31 @@ retry:
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
        pagefault_disable();
        to->nofault = true;
        ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
-                          IOMAP_DIO_PARTIAL, written);
+                          IOMAP_DIO_PARTIAL, read);
        to->nofault = false;
        pagefault_enable();
+       if (ret <= 0 && ret != -EFAULT)
+               goto out_unlock;
        if (ret > 0)
-               written = ret;
-
-       if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
-               size_t leftover;
+               read = ret;
 
-               gfs2_holder_allow_demote(gh);
-               leftover = fault_in_iov_iter_writeable(to, window_size);
-               gfs2_holder_disallow_demote(gh);
-               if (leftover != window_size) {
-                       if (gfs2_holder_queued(gh))
-                               goto retry_under_glock;
+       if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(gh);
+               window_size -= fault_in_iov_iter_writeable(to, window_size);
+               if (window_size)
                        goto retry;
-               }
        }
+out_unlock:
        if (gfs2_holder_queued(gh))
                gfs2_glock_dq(gh);
 out_uninit:
        gfs2_holder_uninit(gh);
        if (ret < 0)
                return ret;
-       return written;
+       return read;
 }
 
 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -873,7 +866,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
        struct inode *inode = file->f_mapping->host;
        struct gfs2_inode *ip = GFS2_I(inode);
        size_t prev_count = 0, window_size = 0;
-       size_t read = 0;
+       size_t written = 0;
        ssize_t ret;
 
        /*
@@ -901,39 +894,35 @@ retry:
                goto out_uninit;
        /* Silently fall back to buffered I/O when writing beyond EOF */
        if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
-               goto out;
-retry_under_glock:
+               goto out_unlock;
 
        from->nofault = true;
        ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
-                          IOMAP_DIO_PARTIAL, read);
+                          IOMAP_DIO_PARTIAL, written);
        from->nofault = false;
-
-       if (ret == -ENOTBLK)
-               ret = 0;
+       if (ret <= 0) {
+               if (ret == -ENOTBLK)
+                       ret = 0;
+               if (ret != -EFAULT)
+                       goto out_unlock;
+       }
        if (ret > 0)
-               read = ret;
-
-       if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
-               size_t leftover;
+               written = ret;
 
-               gfs2_holder_allow_demote(gh);
-               leftover = fault_in_iov_iter_readable(from, window_size);
-               gfs2_holder_disallow_demote(gh);
-               if (leftover != window_size) {
-                       if (gfs2_holder_queued(gh))
-                               goto retry_under_glock;
+       if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(gh);
+               window_size -= fault_in_iov_iter_readable(from, window_size);
+               if (window_size)
                        goto retry;
-               }
        }
-out:
+out_unlock:
        if (gfs2_holder_queued(gh))
                gfs2_glock_dq(gh);
 out_uninit:
        gfs2_holder_uninit(gh);
        if (ret < 0)
                return ret;
-       return read;
+       return written;
 }
 
 static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -941,7 +930,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        struct gfs2_inode *ip;
        struct gfs2_holder gh;
        size_t prev_count = 0, window_size = 0;
-       size_t written = 0;
+       size_t read = 0;
        ssize_t ret;
 
        /*
@@ -962,7 +951,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (ret >= 0) {
                if (!iov_iter_count(to))
                        return ret;
-               written = ret;
+               read = ret;
        } else if (ret != -EFAULT) {
                if (ret != -EAGAIN)
                        return ret;
@@ -975,30 +964,26 @@ retry:
        ret = gfs2_glock_nq(&gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
        pagefault_disable();
        ret = generic_file_read_iter(iocb, to);
        pagefault_enable();
+       if (ret <= 0 && ret != -EFAULT)
+               goto out_unlock;
        if (ret > 0)
-               written += ret;
-
-       if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
-               size_t leftover;
+               read += ret;
 
-               gfs2_holder_allow_demote(&gh);
-               leftover = fault_in_iov_iter_writeable(to, window_size);
-               gfs2_holder_disallow_demote(&gh);
-               if (leftover != window_size) {
-                       if (gfs2_holder_queued(&gh))
-                               goto retry_under_glock;
+       if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(&gh);
+               window_size -= fault_in_iov_iter_writeable(to, window_size);
+               if (window_size)
                        goto retry;
-               }
        }
+out_unlock:
        if (gfs2_holder_queued(&gh))
                gfs2_glock_dq(&gh);
 out_uninit:
        gfs2_holder_uninit(&gh);
-       return written ? written : ret;
+       return read ? read : ret;
 }
 
 static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
@@ -1012,7 +997,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
        struct gfs2_holder *statfs_gh = NULL;
        size_t prev_count = 0, window_size = 0;
        size_t orig_count = iov_iter_count(from);
-       size_t read = 0;
+       size_t written = 0;
        ssize_t ret;
 
        /*
@@ -1030,10 +1015,18 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
 
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
 retry:
+       if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+               window_size -= fault_in_iov_iter_readable(from, window_size);
+               if (!window_size) {
+                       ret = -EFAULT;
+                       goto out_uninit;
+               }
+               from->count = min(from->count, window_size);
+       }
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
+
        if (inode == sdp->sd_rindex) {
                struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 
@@ -1050,25 +1043,19 @@ retry_under_glock:
        current->backing_dev_info = NULL;
        if (ret > 0) {
                iocb->ki_pos += ret;
-               read += ret;
+               written += ret;
        }
 
        if (inode == sdp->sd_rindex)
                gfs2_glock_dq_uninit(statfs_gh);
 
-       from->count = orig_count - read;
-       if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
-               size_t leftover;
-
-               gfs2_holder_allow_demote(gh);
-               leftover = fault_in_iov_iter_readable(from, window_size);
-               gfs2_holder_disallow_demote(gh);
-               if (leftover != window_size) {
-                       from->count = min(from->count, window_size - leftover);
-                       if (gfs2_holder_queued(gh))
-                               goto retry_under_glock;
-                       goto retry;
-               }
+       if (ret <= 0 && ret != -EFAULT)
+               goto out_unlock;
+
+       from->count = orig_count - written;
+       if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(gh);
+               goto retry;
        }
 out_unlock:
        if (gfs2_holder_queued(gh))
@@ -1077,8 +1064,8 @@ out_uninit:
        gfs2_holder_uninit(gh);
        if (statfs_gh)
                kfree(statfs_gh);
-       from->count = orig_count - read;
-       return read ? read : ret;
+       from->count = orig_count - written;
+       return written ? written : ret;
 }
 
 /**
index 801ad9f4f2bef9cc4b0ec335cc794a6c572d733f..6d26bb52548448a67170b3150b32773afd3cb45f 100644 (file)
@@ -1386,7 +1386,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 {
        struct inode *inode = file_inode(filp);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-       struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
+       struct block_device *bdev = sdp->sd_vfs->s_bdev;
        struct buffer_head *bh;
        struct gfs2_rgrpd *rgd;
        struct gfs2_rgrpd *rgd_end;
@@ -1405,7 +1405,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                return -EROFS;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&r, argp, sizeof(r)))
@@ -1418,8 +1418,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        start = r.start >> bs_shift;
        end = start + (r.len >> bs_shift);
        minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
-       minlen = max_t(u64, minlen,
-                      q->limits.discard_granularity) >> bs_shift;
+       minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift;
 
        if (end <= start || minlen > sdp->sd_max_rg_data)
                return -EINVAL;
index 08503dc68d2b1ef7f2cc0f6b0986654036dcdc1a..9a6c233ee7f12cc312b4fffb5babeb722401452b 100644 (file)
@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
                         struct pipe_inode_info *opipe,
                         loff_t *offset,
                         size_t len, unsigned int flags);
+
+/*
+ * fs/xattr.c:
+ */
+struct xattr_name {
+       char name[XATTR_NAME_MAX + 1];
+};
+
+struct xattr_ctx {
+       /* Value of attribute */
+       union {
+               const void __user *cvalue;
+               void __user *value;
+       };
+       void *kvalue;
+       size_t size;
+       /* Attribute name */
+       struct xattr_name *kname;
+       unsigned int flags;
+};
+
+
+ssize_t do_getxattr(struct user_namespace *mnt_userns,
+                   struct dentry *d,
+                   struct xattr_ctx *ctx);
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct xattr_ctx *ctx);
index 32aeb2c581c580fa26cf5264b332beb487b0d47f..824623bcf1a53f78e67370a947e063c4245c8c13 100644 (file)
@@ -871,7 +871,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
 
 static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 {
-       set_notify_signal(worker->task);
+       __set_notify_signal(worker->task);
        wake_up_process(worker->task);
        return false;
 }
@@ -991,7 +991,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
 {
        if (work && match->fn(work, match->data)) {
                work->flags |= IO_WQ_WORK_CANCEL;
-               set_notify_signal(worker->task);
+               __set_notify_signal(worker->task);
                return true;
        }
 
index dbecd27656c7ccd79f1d738f40a1c9feebd147ad..ba6eee76d028f64ff932c94b1de3976994d14227 100644 (file)
@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
 struct io_wq_work {
        struct io_wq_work_node list;
        unsigned flags;
+       int cancel_seq;
 };
 
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
index 91de361ea9aba2d0d83718f150437382235945c1..9f1c682d7caf22b6181a6dfeda59271a7c3ca26c 100644 (file)
@@ -80,6 +80,7 @@
 #include <linux/io_uring.h>
 #include <linux/audit.h>
 #include <linux/security.h>
+#include <linux/xattr.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -94,7 +95,7 @@
 #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
 
 /* only define max */
-#define IORING_MAX_FIXED_FILES (1U << 15)
+#define IORING_MAX_FIXED_FILES (1U << 20)
 #define IORING_MAX_RESTRICTIONS        (IORING_RESTRICTION_LAST + \
                                 IORING_REGISTER_LAST + IORING_OP_LAST)
 
 #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
                                REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
 
+#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
+                                IO_REQ_CLEAN_FLAGS)
+
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
 #define IO_TCTX_REFS_CACHE_NR  (1U << 10)
 
 struct io_uring {
@@ -166,7 +172,7 @@ struct io_rings {
         * The application needs a full memory barrier before checking
         * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
         */
-       u32                     sq_flags;
+       atomic_t                sq_flags;
        /*
         * Runtime CQ flags
         *
@@ -198,13 +204,6 @@ struct io_rings {
        struct io_uring_cqe     cqes[] ____cacheline_aligned_in_smp;
 };
 
-enum io_uring_cmd_flags {
-       IO_URING_F_COMPLETE_DEFER       = 1,
-       IO_URING_F_UNLOCKED             = 2,
-       /* int's last bit, sign checks are usually faster than a bit test */
-       IO_URING_F_NONBLOCK             = INT_MIN,
-};
-
 struct io_mapped_ubuf {
        u64             ubuf;
        u64             ubuf_end;
@@ -216,10 +215,27 @@ struct io_mapped_ubuf {
 struct io_ring_ctx;
 
 struct io_overflow_cqe {
-       struct io_uring_cqe cqe;
        struct list_head list;
+       struct io_uring_cqe cqe;
 };
 
+/*
+ * FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0
+ * and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we
+ * can't safely always dereference the file when the task has exited and ring
+ * cleanup is done. If a file is tracked and part of SCM, then unix gc on
+ * process exit may reap it before __io_sqe_files_unregister() is run.
+ */
+#define FFS_NOWAIT             0x1UL
+#define FFS_ISREG              0x2UL
+#if defined(CONFIG_64BIT)
+#define FFS_SCM                        0x4UL
+#else
+#define IO_URING_SCM_ALL
+#define FFS_SCM                        0x0UL
+#endif
+#define FFS_MASK               ~(FFS_NOWAIT|FFS_ISREG|FFS_SCM)
+
 struct io_fixed_file {
        /* file * with additional FFS_* flags */
        unsigned long file_ptr;
@@ -237,6 +253,8 @@ struct io_rsrc_put {
 
 struct io_file_table {
        struct io_fixed_file *files;
+       unsigned long *bitmap;
+       unsigned int alloc_hint;
 };
 
 struct io_rsrc_node {
@@ -261,10 +279,26 @@ struct io_rsrc_data {
        bool                            quiesce;
 };
 
+#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
 struct io_buffer_list {
-       struct list_head list;
-       struct list_head buf_list;
+       /*
+        * If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not,
+        * then these are classic provided buffers and ->buf_list is used.
+        */
+       union {
+               struct list_head buf_list;
+               struct {
+                       struct page **buf_pages;
+                       struct io_uring_buf_ring *buf_ring;
+               };
+       };
        __u16 bgid;
+
+       /* below is for ring provided buffers */
+       __u16 buf_nr_pages;
+       __u16 nr_entries;
+       __u32 head;
+       __u32 mask;
 };
 
 struct io_buffer {
@@ -337,7 +371,7 @@ struct io_ev_fd {
        struct rcu_head         rcu;
 };
 
-#define IO_BUFFERS_HASH_BITS   5
+#define BGID_ARRAY     64
 
 struct io_ring_ctx {
        /* const or read-mostly hot data */
@@ -346,6 +380,7 @@ struct io_ring_ctx {
 
                struct io_rings         *rings;
                unsigned int            flags;
+               enum task_work_notify_mode      notify_method;
                unsigned int            compat: 1;
                unsigned int            drain_next: 1;
                unsigned int            restricted: 1;
@@ -353,6 +388,7 @@ struct io_ring_ctx {
                unsigned int            drain_active: 1;
                unsigned int            drain_disabled: 1;
                unsigned int            has_evfd: 1;
+               unsigned int            syscall_iopoll: 1;
        } ____cacheline_aligned_in_smp;
 
        /* submission data */
@@ -382,17 +418,21 @@ struct io_ring_ctx {
                 */
                struct io_rsrc_node     *rsrc_node;
                int                     rsrc_cached_refs;
+               atomic_t                cancel_seq;
                struct io_file_table    file_table;
                unsigned                nr_user_files;
                unsigned                nr_user_bufs;
                struct io_mapped_ubuf   **user_bufs;
 
                struct io_submit_state  submit_state;
+
+               struct io_buffer_list   *io_bl;
+               struct xarray           io_bl_xa;
+               struct list_head        io_buffers_cache;
+
                struct list_head        timeout_list;
                struct list_head        ltimeout_list;
                struct list_head        cq_overflow_list;
-               struct list_head        *io_buffers;
-               struct list_head        io_buffers_cache;
                struct list_head        apoll_cache;
                struct xarray           personalities;
                u32                     pers_next;
@@ -409,9 +449,16 @@ struct io_ring_ctx {
        struct wait_queue_head  sqo_sq_wait;
        struct list_head        sqd_list;
 
-       unsigned long           check_cq_overflow;
+       unsigned long           check_cq;
 
        struct {
+               /*
+                * We cache a range of free CQEs we can use, once exhausted it
+                * should go through a slower range setup, see __io_get_cqe()
+                */
+               struct io_uring_cqe     *cqe_cached;
+               struct io_uring_cqe     *cqe_sentinel;
+
                unsigned                cached_cq_tail;
                unsigned                cq_entries;
                struct io_ev_fd __rcu   *io_ev_fd;
@@ -497,7 +544,7 @@ struct io_uring_task {
 
        spinlock_t              task_lock;
        struct io_wq_work_list  task_list;
-       struct io_wq_work_list  prior_task_list;
+       struct io_wq_work_list  prio_task_list;
        struct callback_head    task_work;
        struct file             **registered_rings;
        bool                    task_running;
@@ -546,6 +593,16 @@ struct io_accept {
        unsigned long                   nofile;
 };
 
+struct io_socket {
+       struct file                     *file;
+       int                             domain;
+       int                             type;
+       int                             protocol;
+       int                             flags;
+       u32                             file_slot;
+       unsigned long                   nofile;
+};
+
 struct io_sync {
        struct file                     *file;
        loff_t                          len;
@@ -557,6 +614,8 @@ struct io_sync {
 struct io_cancel {
        struct file                     *file;
        u64                             addr;
+       u32                             flags;
+       s32                             fd;
 };
 
 struct io_timeout {
@@ -585,7 +644,7 @@ struct io_rw {
        struct kiocb                    kiocb;
        u64                             addr;
        u32                             len;
-       u32                             flags;
+       rwf_t                           flags;
 };
 
 struct io_connect {
@@ -602,9 +661,9 @@ struct io_sr_msg {
                void __user                     *buf;
        };
        int                             msg_flags;
-       int                             bgid;
        size_t                          len;
        size_t                          done_io;
+       unsigned int                    flags;
 };
 
 struct io_open {
@@ -722,6 +781,12 @@ struct io_msg {
        u32 len;
 };
 
+struct io_nop {
+       struct file                     *file;
+       u64                             extra1;
+       u64                             extra2;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -748,6 +813,12 @@ struct io_async_rw {
        struct wait_page_queue          wpq;
 };
 
+struct io_xattr {
+       struct file                     *file;
+       struct xattr_ctx                ctx;
+       struct filename                 *filename;
+};
+
 enum {
        REQ_F_FIXED_FILE_BIT    = IOSQE_FIXED_FILE_BIT,
        REQ_F_IO_DRAIN_BIT      = IOSQE_IO_DRAIN_BIT,
@@ -766,6 +837,7 @@ enum {
        REQ_F_NEED_CLEANUP_BIT,
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
+       REQ_F_BUFFER_RING_BIT,
        REQ_F_COMPLETE_INLINE_BIT,
        REQ_F_REISSUE_BIT,
        REQ_F_CREDS_BIT,
@@ -776,6 +848,7 @@ enum {
        REQ_F_SINGLE_POLL_BIT,
        REQ_F_DOUBLE_POLL_BIT,
        REQ_F_PARTIAL_IO_BIT,
+       REQ_F_APOLL_MULTISHOT_BIT,
        /* keep async read/write and isreg together and in order */
        REQ_F_SUPPORT_NOWAIT_BIT,
        REQ_F_ISREG_BIT,
@@ -816,6 +889,8 @@ enum {
        REQ_F_POLLED            = BIT(REQ_F_POLLED_BIT),
        /* buffer already selected */
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
+       /* buffer selected from ring, needs commit */
+       REQ_F_BUFFER_RING       = BIT(REQ_F_BUFFER_RING_BIT),
        /* completion is deferred through io_comp_state */
        REQ_F_COMPLETE_INLINE   = BIT(REQ_F_COMPLETE_INLINE_BIT),
        /* caller should reissue async */
@@ -840,6 +915,8 @@ enum {
        REQ_F_DOUBLE_POLL       = BIT(REQ_F_DOUBLE_POLL_BIT),
        /* request has already done partial IO */
        REQ_F_PARTIAL_IO        = BIT(REQ_F_PARTIAL_IO_BIT),
+       /* fast poll multishot mode */
+       REQ_F_APOLL_MULTISHOT   = BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
@@ -862,6 +939,21 @@ enum {
        IORING_RSRC_BUFFER              = 1,
 };
 
+struct io_cqe {
+       __u64   user_data;
+       __s32   res;
+       /* fd initially, then cflags for completion */
+       union {
+               __u32   flags;
+               int     fd;
+       };
+};
+
+enum {
+       IO_CHECK_CQ_OVERFLOW_BIT,
+       IO_CHECK_CQ_DROPPED_BIT,
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -897,46 +989,65 @@ struct io_kiocb {
                struct io_symlink       symlink;
                struct io_hardlink      hardlink;
                struct io_msg           msg;
+               struct io_xattr         xattr;
+               struct io_socket        sock;
+               struct io_nop           nop;
+               struct io_uring_cmd     uring_cmd;
        };
 
        u8                              opcode;
        /* polled IO has completed */
        u8                              iopoll_completed;
+       /*
+        * Can be either a fixed buffer index, or used with provided buffers.
+        * For the latter, before issue it points to the buffer group ID,
+        * and after selection it points to the buffer ID itself.
+        */
        u16                             buf_index;
        unsigned int                    flags;
 
-       u64                             user_data;
-       u32                             result;
-       /* fd initially, then cflags for completion */
-       union {
-               u32                     cflags;
-               int                     fd;
-       };
+       struct io_cqe                   cqe;
 
        struct io_ring_ctx              *ctx;
        struct task_struct              *task;
 
-       struct percpu_ref               *fixed_rsrc_refs;
-       /* store used ubuf, so we can prevent reloading */
-       struct io_mapped_ubuf           *imu;
+       struct io_rsrc_node             *rsrc_node;
+
+       union {
+               /* store used ubuf, so we can prevent reloading */
+               struct io_mapped_ubuf   *imu;
+
+               /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
+               struct io_buffer        *kbuf;
+
+               /*
+                * stores buffer ID for ring provided buffers, valid IFF
+                * REQ_F_BUFFER_RING is set.
+                */
+               struct io_buffer_list   *buf_list;
+       };
 
        union {
                /* used by request caches, completion batching and iopoll */
                struct io_wq_work_node  comp_list;
                /* cache ->apoll->events */
-               int apoll_events;
+               __poll_t apoll_events;
        };
        atomic_t                        refs;
        atomic_t                        poll_refs;
        struct io_task_work             io_task_work;
        /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
-       struct hlist_node               hash_node;
+       union {
+               struct hlist_node       hash_node;
+               struct {
+                       u64             extra1;
+                       u64             extra2;
+               };
+       };
        /* internal polling, see IORING_FEAT_FAST_POLL */
        struct async_poll               *apoll;
        /* opcode allocated if it needs to store data for async defer */
        void                            *async_data;
-       /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
-       struct io_buffer                *kbuf;
        /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */
        struct io_kiocb                 *link;
        /* custom credentials, valid IFF REQ_F_CREDS is set */
@@ -956,6 +1067,24 @@ struct io_defer_entry {
        u32                     seq;
 };
 
+struct io_cancel_data {
+       struct io_ring_ctx *ctx;
+       union {
+               u64 data;
+               struct file *file;
+       };
+       u32 flags;
+       int seq;
+};
+
+/*
+ * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
+ * the following sqe if SQE128 is used.
+ */
+#define uring_cmd_pdu_size(is_sqe128)                          \
+       ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) -    \
+               offsetof(struct io_uring_sqe, cmd))
+
 struct io_op_def {
        /* needs req->file assigned */
        unsigned                needs_file : 1;
@@ -977,12 +1106,20 @@ struct io_op_def {
        unsigned                not_supported : 1;
        /* skip auditing */
        unsigned                audit_skip : 1;
+       /* supports ioprio */
+       unsigned                ioprio : 1;
+       /* supports iopoll */
+       unsigned                iopoll : 1;
        /* size of async data needed, if any */
        unsigned short          async_size;
 };
 
 static const struct io_op_def io_op_defs[] = {
-       [IORING_OP_NOP] = {},
+       [IORING_OP_NOP] = {
+               .audit_skip             = 1,
+               .iopoll                 = 1,
+               .buffer_select          = 1,
+       },
        [IORING_OP_READV] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
@@ -991,6 +1128,8 @@ static const struct io_op_def io_op_defs[] = {
                .needs_async_setup      = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_WRITEV] = {
@@ -1001,6 +1140,8 @@ static const struct io_op_def io_op_defs[] = {
                .needs_async_setup      = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_FSYNC] = {
@@ -1013,6 +1154,8 @@ static const struct io_op_def io_op_defs[] = {
                .pollin                 = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_WRITE_FIXED] = {
@@ -1022,6 +1165,8 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_POLL_ADD] = {
@@ -1064,6 +1209,7 @@ static const struct io_op_def io_op_defs[] = {
                .unbound_nonreg_file    = 1,
                .pollin                 = 1,
                .poll_exclusive         = 1,
+               .ioprio                 = 1,    /* used for flags */
        },
        [IORING_OP_ASYNC_CANCEL] = {
                .audit_skip             = 1,
@@ -1086,6 +1232,7 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_CLOSE] = {},
        [IORING_OP_FILES_UPDATE] = {
                .audit_skip             = 1,
+               .iopoll                 = 1,
        },
        [IORING_OP_STATX] = {
                .audit_skip             = 1,
@@ -1097,6 +1244,8 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_WRITE] = {
@@ -1106,6 +1255,8 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_FADVISE] = {
@@ -1140,9 +1291,11 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_PROVIDE_BUFFERS] = {
                .audit_skip             = 1,
+               .iopoll                 = 1,
        },
        [IORING_OP_REMOVE_BUFFERS] = {
                .audit_skip             = 1,
+               .iopoll                 = 1,
        },
        [IORING_OP_TEE] = {
                .needs_file             = 1,
@@ -1160,11 +1313,30 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_LINKAT] = {},
        [IORING_OP_MSG_RING] = {
                .needs_file             = 1,
+               .iopoll                 = 1,
+       },
+       [IORING_OP_FSETXATTR] = {
+               .needs_file = 1
+       },
+       [IORING_OP_SETXATTR] = {},
+       [IORING_OP_FGETXATTR] = {
+               .needs_file = 1
+       },
+       [IORING_OP_GETXATTR] = {},
+       [IORING_OP_SOCKET] = {
+               .audit_skip             = 1,
+       },
+       [IORING_OP_URING_CMD] = {
+               .needs_file             = 1,
+               .plug                   = 1,
+               .needs_async_setup      = 1,
+               .async_size             = uring_cmd_pdu_size(1),
        },
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
 #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
+#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
 
 static bool io_disarm_next(struct io_kiocb *req);
 static void io_uring_del_tctx_node(unsigned long index);
@@ -1173,10 +1345,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                                         bool cancel_all);
 static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
 
-static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags);
-
-static void io_put_req(struct io_kiocb *req);
-static void io_put_req_deferred(struct io_kiocb *req);
+static void __io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags);
 static void io_dismantle_req(struct io_kiocb *req);
 static void io_queue_linked_timeout(struct io_kiocb *req);
 static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
@@ -1185,10 +1354,10 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 static void io_clean_op(struct io_kiocb *req);
 static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
                                             unsigned issue_flags);
-static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd);
+static struct file *io_file_get_normal(struct io_kiocb *req, int fd);
 static void io_drop_inflight_file(struct io_kiocb *req);
 static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
-static void __io_queue_sqe(struct io_kiocb *req);
+static void io_queue_sqe(struct io_kiocb *req);
 static void io_rsrc_put_work(struct work_struct *work);
 
 static void io_req_task_queue(struct io_kiocb *req);
@@ -1201,11 +1370,115 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
 
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
 static void io_eventfd_signal(struct io_ring_ctx *ctx);
+static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
 
 static struct kmem_cache *req_cachep;
 
 static const struct file_operations io_uring_fops;
 
+const char *io_uring_get_opcode(u8 opcode)
+{
+       switch ((enum io_uring_op)opcode) {
+       case IORING_OP_NOP:
+               return "NOP";
+       case IORING_OP_READV:
+               return "READV";
+       case IORING_OP_WRITEV:
+               return "WRITEV";
+       case IORING_OP_FSYNC:
+               return "FSYNC";
+       case IORING_OP_READ_FIXED:
+               return "READ_FIXED";
+       case IORING_OP_WRITE_FIXED:
+               return "WRITE_FIXED";
+       case IORING_OP_POLL_ADD:
+               return "POLL_ADD";
+       case IORING_OP_POLL_REMOVE:
+               return "POLL_REMOVE";
+       case IORING_OP_SYNC_FILE_RANGE:
+               return "SYNC_FILE_RANGE";
+       case IORING_OP_SENDMSG:
+               return "SENDMSG";
+       case IORING_OP_RECVMSG:
+               return "RECVMSG";
+       case IORING_OP_TIMEOUT:
+               return "TIMEOUT";
+       case IORING_OP_TIMEOUT_REMOVE:
+               return "TIMEOUT_REMOVE";
+       case IORING_OP_ACCEPT:
+               return "ACCEPT";
+       case IORING_OP_ASYNC_CANCEL:
+               return "ASYNC_CANCEL";
+       case IORING_OP_LINK_TIMEOUT:
+               return "LINK_TIMEOUT";
+       case IORING_OP_CONNECT:
+               return "CONNECT";
+       case IORING_OP_FALLOCATE:
+               return "FALLOCATE";
+       case IORING_OP_OPENAT:
+               return "OPENAT";
+       case IORING_OP_CLOSE:
+               return "CLOSE";
+       case IORING_OP_FILES_UPDATE:
+               return "FILES_UPDATE";
+       case IORING_OP_STATX:
+               return "STATX";
+       case IORING_OP_READ:
+               return "READ";
+       case IORING_OP_WRITE:
+               return "WRITE";
+       case IORING_OP_FADVISE:
+               return "FADVISE";
+       case IORING_OP_MADVISE:
+               return "MADVISE";
+       case IORING_OP_SEND:
+               return "SEND";
+       case IORING_OP_RECV:
+               return "RECV";
+       case IORING_OP_OPENAT2:
+               return "OPENAT2";
+       case IORING_OP_EPOLL_CTL:
+               return "EPOLL_CTL";
+       case IORING_OP_SPLICE:
+               return "SPLICE";
+       case IORING_OP_PROVIDE_BUFFERS:
+               return "PROVIDE_BUFFERS";
+       case IORING_OP_REMOVE_BUFFERS:
+               return "REMOVE_BUFFERS";
+       case IORING_OP_TEE:
+               return "TEE";
+       case IORING_OP_SHUTDOWN:
+               return "SHUTDOWN";
+       case IORING_OP_RENAMEAT:
+               return "RENAMEAT";
+       case IORING_OP_UNLINKAT:
+               return "UNLINKAT";
+       case IORING_OP_MKDIRAT:
+               return "MKDIRAT";
+       case IORING_OP_SYMLINKAT:
+               return "SYMLINKAT";
+       case IORING_OP_LINKAT:
+               return "LINKAT";
+       case IORING_OP_MSG_RING:
+               return "MSG_RING";
+       case IORING_OP_FSETXATTR:
+               return "FSETXATTR";
+       case IORING_OP_SETXATTR:
+               return "SETXATTR";
+       case IORING_OP_FGETXATTR:
+               return "FGETXATTR";
+       case IORING_OP_GETXATTR:
+               return "GETXATTR";
+       case IORING_OP_SOCKET:
+               return "SOCKET";
+       case IORING_OP_URING_CMD:
+               return "URING_CMD";
+       case IORING_OP_LAST:
+               return "INVALID";
+       }
+       return "INVALID";
+}
+
 struct sock *io_uring_get_socket(struct file *file)
 {
 #if defined(CONFIG_UNIX)
@@ -1219,6 +1492,42 @@ struct sock *io_uring_get_socket(struct file *file)
 }
 EXPORT_SYMBOL(io_uring_get_socket);
 
+#if defined(CONFIG_UNIX)
+static inline bool io_file_need_scm(struct file *filp)
+{
+#if defined(IO_URING_SCM_ALL)
+       return true;
+#else
+       return !!unix_get_socket(filp);
+#endif
+}
+#else
+static inline bool io_file_need_scm(struct file *filp)
+{
+       return false;
+}
+#endif
+
+static void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags)
+{
+       lockdep_assert_held(&ctx->uring_lock);
+       if (issue_flags & IO_URING_F_UNLOCKED)
+               mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags)
+{
+       /*
+        * "Normal" inline submissions always hold the uring_lock, since we
+        * grab it from the system call. Same is true for the SQPOLL offload.
+        * The only exception is when we've detached the request and issue it
+        * from an async worker thread, grab the lock for that case.
+        */
+       if (issue_flags & IO_URING_F_UNLOCKED)
+               mutex_lock(&ctx->uring_lock);
+       lockdep_assert_held(&ctx->uring_lock);
+}
+
 static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
 {
        if (!*locked) {
@@ -1280,31 +1589,36 @@ static inline void io_req_set_refcount(struct io_kiocb *req)
 
 #define IO_RSRC_REF_BATCH      100
 
+static void io_rsrc_put_node(struct io_rsrc_node *node, int nr)
+{
+       percpu_ref_put_many(&node->refs, nr);
+}
+
 static inline void io_req_put_rsrc_locked(struct io_kiocb *req,
                                          struct io_ring_ctx *ctx)
        __must_hold(&ctx->uring_lock)
 {
-       struct percpu_ref *ref = req->fixed_rsrc_refs;
+       struct io_rsrc_node *node = req->rsrc_node;
 
-       if (ref) {
-               if (ref == &ctx->rsrc_node->refs)
+       if (node) {
+               if (node == ctx->rsrc_node)
                        ctx->rsrc_cached_refs++;
                else
-                       percpu_ref_put(ref);
+                       io_rsrc_put_node(node, 1);
        }
 }
 
-static inline void io_req_put_rsrc(struct io_kiocb *req, struct io_ring_ctx *ctx)
+static inline void io_req_put_rsrc(struct io_kiocb *req)
 {
-       if (req->fixed_rsrc_refs)
-               percpu_ref_put(req->fixed_rsrc_refs);
+       if (req->rsrc_node)
+               io_rsrc_put_node(req->rsrc_node, 1);
 }
 
 static __cold void io_rsrc_refs_drop(struct io_ring_ctx *ctx)
        __must_hold(&ctx->uring_lock)
 {
        if (ctx->rsrc_cached_refs) {
-               percpu_ref_put_many(&ctx->rsrc_node->refs, ctx->rsrc_cached_refs);
+               io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs);
                ctx->rsrc_cached_refs = 0;
        }
 }
@@ -1320,8 +1634,8 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
                                        struct io_ring_ctx *ctx,
                                        unsigned int issue_flags)
 {
-       if (!req->fixed_rsrc_refs) {
-               req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
+       if (!req->rsrc_node) {
+               req->rsrc_node = ctx->rsrc_node;
 
                if (!(issue_flags & IO_URING_F_UNLOCKED)) {
                        lockdep_assert_held(&ctx->uring_lock);
@@ -1329,28 +1643,30 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
                        if (unlikely(ctx->rsrc_cached_refs < 0))
                                io_rsrc_refs_refill(ctx);
                } else {
-                       percpu_ref_get(req->fixed_rsrc_refs);
+                       percpu_ref_get(&req->rsrc_node->refs);
                }
        }
 }
 
 static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list)
 {
-       struct io_buffer *kbuf = req->kbuf;
-       unsigned int cflags;
+       if (req->flags & REQ_F_BUFFER_RING) {
+               if (req->buf_list)
+                       req->buf_list->head++;
+               req->flags &= ~REQ_F_BUFFER_RING;
+       } else {
+               list_add(&req->kbuf->list, list);
+               req->flags &= ~REQ_F_BUFFER_SELECTED;
+       }
 
-       cflags = IORING_CQE_F_BUFFER | (kbuf->bid << IORING_CQE_BUFFER_SHIFT);
-       req->flags &= ~REQ_F_BUFFER_SELECTED;
-       list_add(&kbuf->list, list);
-       req->kbuf = NULL;
-       return cflags;
+       return IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
 }
 
 static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
 {
        lockdep_assert_held(&req->ctx->completion_lock);
 
-       if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                return 0;
        return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
 }
@@ -1360,7 +1676,7 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 {
        unsigned int cflags;
 
-       if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                return 0;
 
        /*
@@ -1375,7 +1691,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
         * We migrate buffers from the comp_list to the issue cache list
         * when we need one.
         */
-       if (issue_flags & IO_URING_F_UNLOCKED) {
+       if (req->flags & REQ_F_BUFFER_RING) {
+               /* no buffers to recycle for this case */
+               cflags = __io_put_kbuf(req, NULL);
+       } else if (issue_flags & IO_URING_F_UNLOCKED) {
                struct io_ring_ctx *ctx = req->ctx;
 
                spin_lock(&ctx->completion_lock);
@@ -1393,15 +1712,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
                                                 unsigned int bgid)
 {
-       struct list_head *hash_list;
-       struct io_buffer_list *bl;
-
-       hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
-       list_for_each_entry(bl, hash_list, list)
-               if (bl->bgid == bgid || bgid == -1U)
-                       return bl;
+       if (ctx->io_bl && bgid < BGID_ARRAY)
+               return &ctx->io_bl[bgid];
 
-       return NULL;
+       return xa_load(&ctx->io_bl_xa, bgid);
 }
 
 static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
@@ -1410,25 +1724,33 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
        struct io_buffer_list *bl;
        struct io_buffer *buf;
 
-       if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                return;
        /* don't recycle if we already did IO to this buffer */
        if (req->flags & REQ_F_PARTIAL_IO)
                return;
+       /*
+        * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+        * the flag and hence ensure that bl->head doesn't get incremented.
+        * If the tail has already been incremented, hang on to it.
+        */
+       if (req->flags & REQ_F_BUFFER_RING) {
+               if (req->buf_list) {
+                       req->buf_index = req->buf_list->bgid;
+                       req->flags &= ~REQ_F_BUFFER_RING;
+               }
+               return;
+       }
 
-       if (issue_flags & IO_URING_F_UNLOCKED)
-               mutex_lock(&ctx->uring_lock);
-
-       lockdep_assert_held(&ctx->uring_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
        buf = req->kbuf;
        bl = io_buffer_get_list(ctx, buf->bgid);
        list_add(&buf->list, &bl->buf_list);
        req->flags &= ~REQ_F_BUFFER_SELECTED;
-       req->kbuf = NULL;
+       req->buf_index = buf->bgid;
 
-       if (issue_flags & IO_URING_F_UNLOCKED)
-               mutex_unlock(&ctx->uring_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
 }
 
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
@@ -1469,7 +1791,12 @@ static inline void req_set_fail(struct io_kiocb *req)
 static inline void req_fail_link_node(struct io_kiocb *req, int res)
 {
        req_set_fail(req);
-       req->result = res;
+       req->cqe.res = res;
+}
+
+static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx)
+{
+       wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
 }
 
 static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref)
@@ -1506,12 +1833,14 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
        struct io_ring_ctx *ctx;
-       int i, hash_bits;
+       int hash_bits;
 
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx)
                return NULL;
 
+       xa_init(&ctx->io_bl_xa);
+
        /*
         * Use 5 bits less than the max cq entries, that should give us around
         * 32 entries per hash list if totally full and uniformly spread.
@@ -1533,13 +1862,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        /* set invalid range, so io_import_fixed() fails meeting it */
        ctx->dummy_ubuf->ubuf = -1UL;
 
-       ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
-                                       sizeof(struct list_head), GFP_KERNEL);
-       if (!ctx->io_buffers)
-               goto err;
-       for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
-               INIT_LIST_HEAD(&ctx->io_buffers[i]);
-
        if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
                            PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
                goto err;
@@ -1575,7 +1897,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 err:
        kfree(ctx->dummy_ubuf);
        kfree(ctx->cancel_hash);
-       kfree(ctx->io_buffers);
+       kfree(ctx->io_bl);
+       xa_destroy(&ctx->io_bl_xa);
        kfree(ctx);
        return NULL;
 }
@@ -1599,10 +1922,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
        return false;
 }
 
-#define FFS_NOWAIT             0x1UL
-#define FFS_ISREG              0x2UL
-#define FFS_MASK               ~(FFS_NOWAIT|FFS_ISREG)
-
 static inline bool io_req_ffs_set(struct io_kiocb *req)
 {
        return req->flags & REQ_F_FIXED_FILE;
@@ -1629,6 +1948,17 @@ static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
        return __io_prep_linked_timeout(req);
 }
 
+static noinline void __io_arm_ltimeout(struct io_kiocb *req)
+{
+       io_queue_linked_timeout(__io_prep_linked_timeout(req));
+}
+
+static inline void io_arm_ltimeout(struct io_kiocb *req)
+{
+       if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
+               __io_arm_ltimeout(req);
+}
+
 static void io_prep_async_work(struct io_kiocb *req)
 {
        const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -1641,6 +1971,7 @@ static void io_prep_async_work(struct io_kiocb *req)
 
        req->work.list.next = NULL;
        req->work.flags = 0;
+       req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
        if (req->flags & REQ_F_FORCE_ASYNC)
                req->work.flags |= IO_WQ_WORK_CONCURRENT;
 
@@ -1672,17 +2003,15 @@ static void io_prep_async_link(struct io_kiocb *req)
 
 static inline void io_req_add_compl_list(struct io_kiocb *req)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_submit_state *state = &ctx->submit_state;
+       struct io_submit_state *state = &req->ctx->submit_state;
 
        if (!(req->flags & REQ_F_CQE_SKIP))
-               ctx->submit_state.flush_cqes = true;
+               state->flush_cqes = true;
        wq_list_add_tail(&req->comp_list, &state->compl_reqs);
 }
 
-static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
+static void io_queue_iowq(struct io_kiocb *req, bool *dont_use)
 {
-       struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *link = io_prep_linked_timeout(req);
        struct io_uring_task *tctx = req->task->io_uring;
 
@@ -1702,8 +2031,9 @@ static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
        if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
                req->work.flags |= IO_WQ_WORK_CANCEL;
 
-       trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
-                                       &req->work, io_wq_is_hashed(&req->work));
+       trace_io_uring_queue_async_work(req->ctx, req, req->cqe.user_data,
+                                       req->opcode, req->flags, &req->work,
+                                       io_wq_is_hashed(&req->work));
        io_wq_enqueue(tctx->io_wq, &req->work);
        if (link)
                io_queue_linked_timeout(link);
@@ -1721,8 +2051,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
                atomic_set(&req->ctx->cq_timeouts,
                        atomic_read(&req->ctx->cq_timeouts) + 1);
                list_del_init(&req->timeout.list);
-               io_fill_cqe_req(req, status, 0);
-               io_put_req_deferred(req);
+               io_req_tw_post_queue(req, status, 0);
        }
 }
 
@@ -1804,21 +2133,53 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
        return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
 }
 
-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+/*
+ * writes to the cq entry need to come after reading head; the
+ * control dependency is enough as we're using WRITE_ONCE to
+ * fill the cq entry
+ */
+static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx)
 {
        struct io_rings *rings = ctx->rings;
-       unsigned tail, mask = ctx->cq_entries - 1;
-
-       /*
-        * writes to the cq entry need to come after reading head; the
-        * control dependency is enough as we're using WRITE_ONCE to
-        * fill the cq entry
-        */
-       if (__io_cqring_events(ctx) == ctx->cq_entries)
+       unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
+       unsigned int shift = 0;
+       unsigned int free, queued, len;
+
+       if (ctx->flags & IORING_SETUP_CQE32)
+               shift = 1;
+
+       /* userspace may cheat modifying the tail, be safe and do min */
+       queued = min(__io_cqring_events(ctx), ctx->cq_entries);
+       free = ctx->cq_entries - queued;
+       /* we need a contiguous range, limit based on the current array offset */
+       len = min(free, ctx->cq_entries - off);
+       if (!len)
                return NULL;
 
-       tail = ctx->cached_cq_tail++;
-       return &rings->cqes[tail & mask];
+       ctx->cached_cq_tail++;
+       ctx->cqe_cached = &rings->cqes[off];
+       ctx->cqe_sentinel = ctx->cqe_cached + len;
+       ctx->cqe_cached++;
+       return &rings->cqes[off << shift];
+}
+
+static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+{
+       if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
+               struct io_uring_cqe *cqe = ctx->cqe_cached;
+
+               if (ctx->flags & IORING_SETUP_CQE32) {
+                       unsigned int off = ctx->cqe_cached - ctx->rings->cqes;
+
+                       cqe += off;
+               }
+
+               ctx->cached_cq_tail++;
+               ctx->cqe_cached++;
+               return cqe;
+       }
+
+       return __io_get_cqe(ctx);
 }
 
 static void io_eventfd_signal(struct io_ring_ctx *ctx)
@@ -1889,10 +2250,14 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
        bool all_flushed, posted;
+       size_t cqe_size = sizeof(struct io_uring_cqe);
 
        if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
                return false;
 
+       if (ctx->flags & IORING_SETUP_CQE32)
+               cqe_size <<= 1;
+
        posted = false;
        spin_lock(&ctx->completion_lock);
        while (!list_empty(&ctx->cq_overflow_list)) {
@@ -1904,7 +2269,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
                ocqe = list_first_entry(&ctx->cq_overflow_list,
                                        struct io_overflow_cqe, list);
                if (cqe)
-                       memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
+                       memcpy(cqe, &ocqe->cqe, cqe_size);
                else
                        io_account_cq_overflow(ctx);
 
@@ -1915,13 +2280,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 
        all_flushed = list_empty(&ctx->cq_overflow_list);
        if (all_flushed) {
-               clear_bit(0, &ctx->check_cq_overflow);
-               WRITE_ONCE(ctx->rings->sq_flags,
-                          ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
+               clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+               atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
        }
 
-       if (posted)
-               io_commit_cqring(ctx);
+       io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        if (posted)
                io_cqring_ev_posted(ctx);
@@ -1932,7 +2295,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
 {
        bool ret = true;
 
-       if (test_bit(0, &ctx->check_cq_overflow)) {
+       if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) {
                /* iopoll syncs against uring_lock, not completion_lock */
                if (ctx->flags & IORING_SETUP_IOPOLL)
                        mutex_lock(&ctx->uring_lock);
@@ -1944,19 +2307,23 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
        return ret;
 }
 
-/* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task, int nr)
+static void __io_put_task(struct task_struct *task, int nr)
 {
        struct io_uring_task *tctx = task->io_uring;
 
-       if (likely(task == current)) {
-               tctx->cached_refs += nr;
-       } else {
-               percpu_counter_sub(&tctx->inflight, nr);
-               if (unlikely(atomic_read(&tctx->in_idle)))
-                       wake_up(&tctx->wait);
-               put_task_struct_many(task, nr);
-       }
+       percpu_counter_sub(&tctx->inflight, nr);
+       if (unlikely(atomic_read(&tctx->in_idle)))
+               wake_up(&tctx->wait);
+       put_task_struct_many(task, nr);
+}
+
+/* must to be called somewhat shortly after putting a request */
+static inline void io_put_task(struct task_struct *task, int nr)
+{
+       if (likely(task == current))
+               task->io_uring->cached_refs += nr;
+       else
+               __io_put_task(task, nr);
 }
 
 static void io_task_refs_refill(struct io_uring_task *tctx)
@@ -1990,11 +2357,18 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
 }
 
 static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
-                                    s32 res, u32 cflags)
+                                    s32 res, u32 cflags, u64 extra1,
+                                    u64 extra2)
 {
        struct io_overflow_cqe *ocqe;
+       size_t ocq_size = sizeof(struct io_overflow_cqe);
+       bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
 
-       ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+       if (is_cqe32)
+               ocq_size += sizeof(struct io_uring_cqe);
+
+       ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
+       trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
        if (!ocqe) {
                /*
                 * If we're in ring overflow flush mode, or in task cancel mode,
@@ -2002,17 +2376,21 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
                 * on the floor.
                 */
                io_account_cq_overflow(ctx);
+               set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq);
                return false;
        }
        if (list_empty(&ctx->cq_overflow_list)) {
-               set_bit(0, &ctx->check_cq_overflow);
-               WRITE_ONCE(ctx->rings->sq_flags,
-                          ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
+               set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+               atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
 
        }
        ocqe->cqe.user_data = user_data;
        ocqe->cqe.res = res;
        ocqe->cqe.flags = cflags;
+       if (is_cqe32) {
+               ocqe->cqe.big_cqe[0] = extra1;
+               ocqe->cqe.big_cqe[1] = extra2;
+       }
        list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
        return true;
 }
@@ -2034,42 +2412,114 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
                WRITE_ONCE(cqe->flags, cflags);
                return true;
        }
-       return io_cqring_event_overflow(ctx, user_data, res, cflags);
+       return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
 }
 
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
+                                           struct io_kiocb *req)
 {
-       trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
-       return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
-}
+       struct io_uring_cqe *cqe;
 
-static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
-       if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe_req(req, res, cflags);
+       trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+                               req->cqe.res, req->cqe.flags, 0, 0);
+
+       /*
+        * If we can't get a cq entry, userspace overflowed the
+        * submission (by quite a lot). Increment the overflow count in
+        * the ring.
+        */
+       cqe = io_get_cqe(ctx);
+       if (likely(cqe)) {
+               memcpy(cqe, &req->cqe, sizeof(*cqe));
+               return true;
+       }
+       return io_cqring_event_overflow(ctx, req->cqe.user_data,
+                                       req->cqe.res, req->cqe.flags, 0, 0);
 }
 
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
-                                    s32 res, u32 cflags)
+static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
+                                             struct io_kiocb *req)
 {
-       ctx->cq_extra++;
-       trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
-       return __io_fill_cqe(ctx, user_data, res, cflags);
+       struct io_uring_cqe *cqe;
+       u64 extra1 = req->extra1;
+       u64 extra2 = req->extra2;
+
+       trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+                               req->cqe.res, req->cqe.flags, extra1, extra2);
+
+       /*
+        * If we can't get a cq entry, userspace overflowed the
+        * submission (by quite a lot). Increment the overflow count in
+        * the ring.
+        */
+       cqe = io_get_cqe(ctx);
+       if (likely(cqe)) {
+               memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+               cqe->big_cqe[0] = extra1;
+               cqe->big_cqe[1] = extra2;
+               return true;
+       }
+
+       return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
+                                       req->cqe.flags, extra1, extra2);
 }
 
-static void __io_req_complete_post(struct io_kiocb *req, s32 res,
-                                  u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
+{
+       trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
+       return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
+}
+
+static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
+                               u64 extra1, u64 extra2)
 {
        struct io_ring_ctx *ctx = req->ctx;
+       struct io_uring_cqe *cqe;
+
+       if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
+               return;
+       if (req->flags & REQ_F_CQE_SKIP)
+               return;
+
+       trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
+                               extra1, extra2);
 
-       if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe_req(req, res, cflags);
+       /*
+        * If we can't get a cq entry, userspace overflowed the
+        * submission (by quite a lot). Increment the overflow count in
+        * the ring.
+        */
+       cqe = io_get_cqe(ctx);
+       if (likely(cqe)) {
+               WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+               WRITE_ONCE(cqe->res, res);
+               WRITE_ONCE(cqe->flags, cflags);
+               WRITE_ONCE(cqe->big_cqe[0], extra1);
+               WRITE_ONCE(cqe->big_cqe[1], extra2);
+               return;
+       }
+
+       io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
+}
+
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+                                    s32 res, u32 cflags)
+{
+       ctx->cq_extra++;
+       trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
+       return __io_fill_cqe(ctx, user_data, res, cflags);
+}
+
+static void __io_req_complete_put(struct io_kiocb *req)
+{
        /*
         * If we're the last reference to this request, add to our locked
         * free_list cache.
         */
        if (req_ref_put_and_test(req)) {
-               if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
+               struct io_ring_ctx *ctx = req->ctx;
+
+               if (req->flags & IO_REQ_LINK_FLAGS) {
                        if (req->flags & IO_DISARM_MASK)
                                io_disarm_next(req);
                        if (req->link) {
@@ -2077,7 +2527,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
                                req->link = NULL;
                        }
                }
-               io_req_put_rsrc(req, ctx);
+               io_req_put_rsrc(req);
                /*
                 * Selected buffer deallocation in io_clean_op() assumes that
                 * we don't hold ->completion_lock. Clean them here to avoid
@@ -2091,8 +2541,23 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
        }
 }
 
-static void io_req_complete_post(struct io_kiocb *req, s32 res,
-                                u32 cflags)
+static void __io_req_complete_post(struct io_kiocb *req, s32 res,
+                                  u32 cflags)
+{
+       if (!(req->flags & REQ_F_CQE_SKIP))
+               __io_fill_cqe_req(req, res, cflags);
+       __io_req_complete_put(req);
+}
+
+static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
+                                  u32 cflags, u64 extra1, u64 extra2)
+{
+       if (!(req->flags & REQ_F_CQE_SKIP))
+               __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+       __io_req_complete_put(req);
+}
+
+static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
@@ -2103,11 +2568,23 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res,
        io_cqring_ev_posted(ctx);
 }
 
+static void io_req_complete_post32(struct io_kiocb *req, s32 res,
+                                  u32 cflags, u64 extra1, u64 extra2)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+
+       spin_lock(&ctx->completion_lock);
+       __io_req_complete_post32(req, res, cflags, extra1, extra2);
+       io_commit_cqring(ctx);
+       spin_unlock(&ctx->completion_lock);
+       io_cqring_ev_posted(ctx);
+}
+
 static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
                                         u32 cflags)
 {
-       req->result = res;
-       req->cflags = cflags;
+       req->cqe.res = res;
+       req->cqe.flags = cflags;
        req->flags |= REQ_F_COMPLETE_INLINE;
 }
 
@@ -2120,8 +2597,23 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
                io_req_complete_post(req, res, cflags);
 }
 
+static inline void __io_req_complete32(struct io_kiocb *req,
+                                      unsigned int issue_flags, s32 res,
+                                      u32 cflags, u64 extra1, u64 extra2)
+{
+       if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
+               io_req_complete_state(req, res, cflags);
+               req->extra1 = extra1;
+               req->extra2 = extra2;
+       } else {
+               io_req_complete_post32(req, res, cflags, extra1, extra2);
+       }
+}
+
 static inline void io_req_complete(struct io_kiocb *req, s32 res)
 {
+       if (res < 0)
+               req_set_fail(req);
        __io_req_complete(req, 0, res, 0);
 }
 
@@ -2131,17 +2623,6 @@ static void io_req_complete_failed(struct io_kiocb *req, s32 res)
        io_req_complete_post(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
 }
 
-static void io_req_complete_fail_submit(struct io_kiocb *req)
-{
-       /*
-        * We don't submit, fail them all, for that replace hardlinks with
-        * normal links. Extra REQ_F_LINK is tolerated.
-        */
-       req->flags &= ~REQ_F_HARDLINK;
-       req->flags |= REQ_F_LINK;
-       io_req_complete_failed(req, req->result);
-}
-
 /*
  * Don't initialise the fields below on every allocation, but do that in
  * advance and keep them valid across allocations.
@@ -2152,7 +2633,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
        req->link = NULL;
        req->async_data = NULL;
        /* not necessary, but safer to zero */
-       req->result = 0;
+       req->cqe.res = 0;
 }
 
 static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
@@ -2164,19 +2645,9 @@ static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
        spin_unlock(&ctx->completion_lock);
 }
 
-/* Returns true IFF there are requests in the cache */
-static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
+static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
 {
-       struct io_submit_state *state = &ctx->submit_state;
-
-       /*
-        * If we have more than a batch's worth of requests in our IRQ side
-        * locked cache, grab the lock and move them over to our submission
-        * side cache.
-        */
-       if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
-               io_flush_cached_locked_reqs(ctx, state);
-       return !!state->free_list.next;
+       return !ctx->submit_state.free_list.next;
 }
 
 /*
@@ -2188,14 +2659,20 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
 static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
        __must_hold(&ctx->uring_lock)
 {
-       struct io_submit_state *state = &ctx->submit_state;
        gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
        void *reqs[IO_REQ_ALLOC_BATCH];
-       struct io_kiocb *req;
        int ret, i;
 
-       if (likely(state->free_list.next || io_flush_cached_reqs(ctx)))
-               return true;
+       /*
+        * If we have more than a batch's worth of requests in our IRQ side
+        * locked cache, grab the lock and move them over to our submission
+        * side cache.
+        */
+       if (data_race(ctx->locked_free_nr) > IO_COMPL_BATCH) {
+               io_flush_cached_locked_reqs(ctx, &ctx->submit_state);
+               if (!io_req_cache_empty(ctx))
+                       return true;
+       }
 
        ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs);
 
@@ -2212,17 +2689,17 @@ static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
 
        percpu_ref_get_many(&ctx->refs, ret);
        for (i = 0; i < ret; i++) {
-               req = reqs[i];
+               struct io_kiocb *req = reqs[i];
 
                io_preinit_req(req, ctx);
-               wq_stack_add_head(&req->comp_list, &state->free_list);
+               io_req_add_to_cache(req, ctx);
        }
        return true;
 }
 
 static inline bool io_alloc_req_refill(struct io_ring_ctx *ctx)
 {
-       if (unlikely(!ctx->submit_state.free_list.next))
+       if (unlikely(io_req_cache_empty(ctx)))
                return __io_alloc_req_refill(ctx);
        return true;
 }
@@ -2251,11 +2728,11 @@ static inline void io_dismantle_req(struct io_kiocb *req)
                io_put_file(req->file);
 }
 
-static __cold void __io_free_req(struct io_kiocb *req)
+static __cold void io_free_req(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
-       io_req_put_rsrc(req, ctx);
+       io_req_put_rsrc(req);
        io_dismantle_req(req);
        io_put_task(req->task, 1);
 
@@ -2273,7 +2750,7 @@ static inline void io_remove_next_linked(struct io_kiocb *req)
        nxt->link = NULL;
 }
 
-static bool io_kill_linked_timeout(struct io_kiocb *req)
+static struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req)
        __must_hold(&req->ctx->completion_lock)
        __must_hold(&req->ctx->timeout_lock)
 {
@@ -2286,13 +2763,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
                link->timeout.head = NULL;
                if (hrtimer_try_to_cancel(&io->timer) != -1) {
                        list_del(&link->timeout.list);
-                       /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
-                       io_fill_cqe_req(link, -ECANCELED, 0);
-                       io_put_req_deferred(link);
-                       return true;
+                       return link;
                }
        }
-       return false;
+       return NULL;
 }
 
 static void io_fail_links(struct io_kiocb *req)
@@ -2306,19 +2780,19 @@ static void io_fail_links(struct io_kiocb *req)
                long res = -ECANCELED;
 
                if (link->flags & REQ_F_FAIL)
-                       res = link->result;
+                       res = link->cqe.res;
 
                nxt = link->link;
                link->link = NULL;
 
-               trace_io_uring_fail_link(req->ctx, req, req->user_data,
+               trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
                                        req->opcode, link);
 
-               if (!ignore_cqes) {
+               if (ignore_cqes)
+                       link->flags |= REQ_F_CQE_SKIP;
+               else
                        link->flags &= ~REQ_F_CQE_SKIP;
-                       io_fill_cqe_req(link, res, 0);
-               }
-               io_put_req_deferred(link);
+               __io_req_complete_post(link, res, 0);
                link = nxt;
        }
 }
@@ -2326,25 +2800,27 @@ static void io_fail_links(struct io_kiocb *req)
 static bool io_disarm_next(struct io_kiocb *req)
        __must_hold(&req->ctx->completion_lock)
 {
+       struct io_kiocb *link = NULL;
        bool posted = false;
 
        if (req->flags & REQ_F_ARM_LTIMEOUT) {
-               struct io_kiocb *link = req->link;
-
+               link = req->link;
                req->flags &= ~REQ_F_ARM_LTIMEOUT;
                if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
                        io_remove_next_linked(req);
-                       /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
-                       io_fill_cqe_req(link, -ECANCELED, 0);
-                       io_put_req_deferred(link);
+                       io_req_tw_post_queue(link, -ECANCELED, 0);
                        posted = true;
                }
        } else if (req->flags & REQ_F_LINK_TIMEOUT) {
                struct io_ring_ctx *ctx = req->ctx;
 
                spin_lock_irq(&ctx->timeout_lock);
-               posted = io_kill_linked_timeout(req);
+               link = io_disarm_linked_timeout(req);
                spin_unlock_irq(&ctx->timeout_lock);
+               if (link) {
+                       posted = true;
+                       io_req_tw_post_queue(link, -ECANCELED, 0);
+               }
        }
        if (unlikely((req->flags & REQ_F_FAIL) &&
                     !(req->flags & REQ_F_HARDLINK))) {
@@ -2361,8 +2837,7 @@ static void __io_req_find_next_prep(struct io_kiocb *req)
 
        spin_lock(&ctx->completion_lock);
        posted = io_disarm_next(req);
-       if (posted)
-               io_commit_cqring(ctx);
+       io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        if (posted)
                io_cqring_ev_posted(ctx);
@@ -2372,8 +2847,6 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
 {
        struct io_kiocb *nxt;
 
-       if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK))))
-               return NULL;
        /*
         * If LINK is set, we have dependent requests in this chain. If we
         * didn't fail this request, queue the first one up, moving any other
@@ -2391,6 +2864,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
 {
        if (!ctx)
                return;
+       if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+               atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
        if (*locked) {
                io_submit_flush_completions(ctx);
                mutex_unlock(&ctx->uring_lock);
@@ -2434,7 +2909,7 @@ static void handle_prev_tw_list(struct io_wq_work_node *node,
                if (likely(*uring_locked))
                        req->io_task_work.func(req, uring_locked);
                else
-                       __io_req_complete_post(req, req->result,
+                       __io_req_complete_post(req, req->cqe.res,
                                                io_put_kbuf_comp(req));
                node = next;
        } while (node);
@@ -2475,15 +2950,11 @@ static void tctx_task_work(struct callback_head *cb)
        while (1) {
                struct io_wq_work_node *node1, *node2;
 
-               if (!tctx->task_list.first &&
-                   !tctx->prior_task_list.first && uring_locked)
-                       io_submit_flush_completions(ctx);
-
                spin_lock_irq(&tctx->task_lock);
-               node1 = tctx->prior_task_list.first;
+               node1 = tctx->prio_task_list.first;
                node2 = tctx->task_list.first;
                INIT_WQ_LIST(&tctx->task_list);
-               INIT_WQ_LIST(&tctx->prior_task_list);
+               INIT_WQ_LIST(&tctx->prio_task_list);
                if (!node2 && !node1)
                        tctx->task_running = false;
                spin_unlock_irq(&tctx->task_lock);
@@ -2492,10 +2963,13 @@ static void tctx_task_work(struct callback_head *cb)
 
                if (node1)
                        handle_prev_tw_list(node1, &ctx, &uring_locked);
-
                if (node2)
                        handle_tw_list(node2, &ctx, &uring_locked);
                cond_resched();
+
+               if (data_race(!tctx->task_list.first) &&
+                   data_race(!tctx->prio_task_list.first) && uring_locked)
+                       io_submit_flush_completions(ctx);
        }
 
        ctx_flush_and_put(ctx, &uring_locked);
@@ -2505,24 +2979,19 @@ static void tctx_task_work(struct callback_head *cb)
                io_uring_drop_tctx_refs(current);
 }
 
-static void io_req_task_work_add(struct io_kiocb *req, bool priority)
+static void __io_req_task_work_add(struct io_kiocb *req,
+                                  struct io_uring_task *tctx,
+                                  struct io_wq_work_list *list)
 {
-       struct task_struct *tsk = req->task;
-       struct io_uring_task *tctx = tsk->io_uring;
-       enum task_work_notify_mode notify;
+       struct io_ring_ctx *ctx = req->ctx;
        struct io_wq_work_node *node;
        unsigned long flags;
        bool running;
 
-       WARN_ON_ONCE(!tctx);
-
        io_drop_inflight_file(req);
 
        spin_lock_irqsave(&tctx->task_lock, flags);
-       if (priority)
-               wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list);
-       else
-               wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+       wq_list_add_tail(&req->io_task_work.node, list);
        running = tctx->task_running;
        if (!running)
                tctx->task_running = true;
@@ -2532,22 +3001,15 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
        if (running)
                return;
 
-       /*
-        * SQPOLL kernel thread doesn't need notification, just a wakeup. For
-        * all other cases, use TWA_SIGNAL unconditionally to ensure we're
-        * processing task_work. There's no reliable way to tell if TWA_RESUME
-        * will do the job.
-        */
-       notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
-       if (likely(!task_work_add(tsk, &tctx->task_work, notify))) {
-               if (notify == TWA_NONE)
-                       wake_up_process(tsk);
+       if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+               atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+
+       if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
                return;
-       }
 
        spin_lock_irqsave(&tctx->task_lock, flags);
        tctx->task_running = false;
-       node = wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
+       node = wq_list_merge(&tctx->prio_task_list, &tctx->task_list);
        spin_unlock_irqrestore(&tctx->task_lock, flags);
 
        while (node) {
@@ -2559,47 +3021,73 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
        }
 }
 
-static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+static void io_req_task_work_add(struct io_kiocb *req)
 {
-       struct io_ring_ctx *ctx = req->ctx;
+       struct io_uring_task *tctx = req->task->io_uring;
+
+       __io_req_task_work_add(req, tctx, &tctx->task_list);
+}
 
+static void io_req_task_prio_work_add(struct io_kiocb *req)
+{
+       struct io_uring_task *tctx = req->task->io_uring;
+
+       if (req->ctx->flags & IORING_SETUP_SQPOLL)
+               __io_req_task_work_add(req, tctx, &tctx->prio_task_list);
+       else
+               __io_req_task_work_add(req, tctx, &tctx->task_list);
+}
+
+static void io_req_tw_post(struct io_kiocb *req, bool *locked)
+{
+       io_req_complete_post(req, req->cqe.res, req->cqe.flags);
+}
+
+static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags)
+{
+       req->cqe.res = res;
+       req->cqe.flags = cflags;
+       req->io_task_work.func = io_req_tw_post;
+       io_req_task_work_add(req);
+}
+
+static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+{
        /* not needed for normal modes, but SQPOLL depends on it */
-       io_tw_lock(ctx, locked);
-       io_req_complete_failed(req, req->result);
+       io_tw_lock(req->ctx, locked);
+       io_req_complete_failed(req, req->cqe.res);
 }
 
 static void io_req_task_submit(struct io_kiocb *req, bool *locked)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       io_tw_lock(ctx, locked);
+       io_tw_lock(req->ctx, locked);
        /* req->task == current here, checking PF_EXITING is safe */
        if (likely(!(req->task->flags & PF_EXITING)))
-               __io_queue_sqe(req);
+               io_queue_sqe(req);
        else
                io_req_complete_failed(req, -EFAULT);
 }
 
 static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
 {
-       req->result = ret;
+       req->cqe.res = ret;
        req->io_task_work.func = io_req_task_cancel;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
 }
 
 static void io_req_task_queue(struct io_kiocb *req)
 {
        req->io_task_work.func = io_req_task_submit;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
 }
 
 static void io_req_task_queue_reissue(struct io_kiocb *req)
 {
-       req->io_task_work.func = io_queue_async_work;
-       io_req_task_work_add(req, false);
+       req->io_task_work.func = io_queue_iowq;
+       io_req_task_work_add(req);
 }
 
-static inline void io_queue_next(struct io_kiocb *req)
+static void io_queue_next(struct io_kiocb *req)
 {
        struct io_kiocb *nxt = io_req_find_next(req);
 
@@ -2607,17 +3095,6 @@ static inline void io_queue_next(struct io_kiocb *req)
                io_req_task_queue(nxt);
 }
 
-static void io_free_req(struct io_kiocb *req)
-{
-       io_queue_next(req);
-       __io_free_req(req);
-}
-
-static void io_free_req_work(struct io_kiocb *req, bool *locked)
-{
-       io_free_req(req);
-}
-
 static void io_free_batch_list(struct io_ring_ctx *ctx,
                                struct io_wq_work_node *node)
        __must_hold(&ctx->uring_lock)
@@ -2629,15 +3106,30 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
                struct io_kiocb *req = container_of(node, struct io_kiocb,
                                                    comp_list);
 
-               if (unlikely(req->flags & REQ_F_REFCOUNT)) {
-                       node = req->comp_list.next;
-                       if (!req_ref_put_and_test(req))
-                               continue;
+               if (unlikely(req->flags & IO_REQ_CLEAN_SLOW_FLAGS)) {
+                       if (req->flags & REQ_F_REFCOUNT) {
+                               node = req->comp_list.next;
+                               if (!req_ref_put_and_test(req))
+                                       continue;
+                       }
+                       if ((req->flags & REQ_F_POLLED) && req->apoll) {
+                               struct async_poll *apoll = req->apoll;
+
+                               if (apoll->double_poll)
+                                       kfree(apoll->double_poll);
+                               list_add(&apoll->poll.wait.entry,
+                                               &ctx->apoll_cache);
+                               req->flags &= ~REQ_F_POLLED;
+                       }
+                       if (req->flags & IO_REQ_LINK_FLAGS)
+                               io_queue_next(req);
+                       if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS))
+                               io_clean_op(req);
                }
+               if (!(req->flags & REQ_F_FIXED_FILE))
+                       io_put_file(req->file);
 
                io_req_put_rsrc_locked(req, ctx);
-               io_queue_next(req);
-               io_dismantle_req(req);
 
                if (req->task != task) {
                        if (task)
@@ -2647,7 +3139,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
                }
                task_refs++;
                node = req->comp_list.next;
-               wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
+               io_req_add_to_cache(req, ctx);
        } while (node);
 
        if (task)
@@ -2666,16 +3158,11 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
                        struct io_kiocb *req = container_of(node, struct io_kiocb,
                                                    comp_list);
 
-                       if (!(req->flags & REQ_F_CQE_SKIP))
-                               __io_fill_cqe_req(req, req->result, req->cflags);
-                       if ((req->flags & REQ_F_POLLED) && req->apoll) {
-                               struct async_poll *apoll = req->apoll;
-
-                               if (apoll->double_poll)
-                                       kfree(apoll->double_poll);
-                               list_add(&apoll->poll.wait.entry,
-                                               &ctx->apoll_cache);
-                               req->flags &= ~REQ_F_POLLED;
+                       if (!(req->flags & REQ_F_CQE_SKIP)) {
+                               if (!(ctx->flags & IORING_SETUP_CQE32))
+                                       __io_fill_cqe_req_filled(ctx, req);
+                               else
+                                       __io_fill_cqe32_req_filled(ctx, req);
                        }
                }
 
@@ -2698,23 +3185,18 @@ static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
        struct io_kiocb *nxt = NULL;
 
        if (req_ref_put_and_test(req)) {
-               nxt = io_req_find_next(req);
-               __io_free_req(req);
+               if (unlikely(req->flags & IO_REQ_LINK_FLAGS))
+                       nxt = io_req_find_next(req);
+               io_free_req(req);
        }
        return nxt;
 }
 
 static inline void io_put_req(struct io_kiocb *req)
-{
-       if (req_ref_put_and_test(req))
-               io_free_req(req);
-}
-
-static inline void io_put_req_deferred(struct io_kiocb *req)
 {
        if (req_ref_put_and_test(req)) {
-               req->io_task_work.func = io_free_req_work;
-               io_req_task_work_add(req, false);
+               io_queue_next(req);
+               io_free_req(req);
        }
 }
 
@@ -2800,7 +3282,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
                nr_events++;
                if (unlikely(req->flags & REQ_F_CQE_SKIP))
                        continue;
-               __io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
+               __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
        }
 
        if (unlikely(!nr_events))
@@ -2846,22 +3328,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 {
        unsigned int nr_events = 0;
        int ret = 0;
+       unsigned long check_cq;
 
-       /*
-        * We disallow the app entering submit/complete with polling, but we
-        * still need to lock the ring to prevent racing with polled issue
-        * that got punted to a workqueue.
-        */
-       mutex_lock(&ctx->uring_lock);
        /*
         * Don't enter poll loop if we already have events pending.
         * If we do, we can potentially be spinning for commands that
         * already triggered a CQE (eg in error).
         */
-       if (test_bit(0, &ctx->check_cq_overflow))
+       check_cq = READ_ONCE(ctx->check_cq);
+       if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
                __io_cqring_overflow_flush(ctx, false);
        if (io_cqring_events(ctx))
-               goto out;
+               return 0;
+
+       /*
+        * Similarly do not spin if we have not informed the user of any
+        * dropped CQE.
+        */
+       if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)))
+               return -EBADR;
+
        do {
                /*
                 * If a submit got punted to a workqueue, we can have the
@@ -2891,8 +3377,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
                nr_events += ret;
                ret = 0;
        } while (nr_events < min && !need_resched());
-out:
-       mutex_unlock(&ctx->uring_lock);
+
        return ret;
 }
 
@@ -2965,21 +3450,21 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
        } else {
                fsnotify_access(req->file);
        }
-       if (unlikely(res != req->result)) {
+       if (unlikely(res != req->cqe.res)) {
                if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
                    io_rw_should_reissue(req)) {
                        req->flags |= REQ_F_REISSUE;
                        return true;
                }
                req_set_fail(req);
-               req->result = res;
+               req->cqe.res = res;
        }
        return false;
 }
 
 static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
 {
-       int res = req->result;
+       int res = req->cqe.res;
 
        if (*locked) {
                io_req_complete_state(req, res, io_put_kbuf(req, 0));
@@ -2995,7 +3480,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res,
 {
        if (__io_complete_rw_common(req, res))
                return;
-       __io_req_complete(req, issue_flags, req->result,
+       __io_req_complete(req, issue_flags, req->cqe.res,
                                io_put_kbuf(req, issue_flags));
 }
 
@@ -3005,9 +3490,9 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
 
        if (__io_complete_rw_common(req, res))
                return;
-       req->result = res;
+       req->cqe.res = res;
        req->io_task_work.func = io_req_task_complete;
-       io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
+       io_req_task_prio_work_add(req);
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
@@ -3016,12 +3501,12 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
 
        if (kiocb->ki_flags & IOCB_WRITE)
                kiocb_end_write(req);
-       if (unlikely(res != req->result)) {
+       if (unlikely(res != req->cqe.res)) {
                if (res == -EAGAIN && io_rw_should_reissue(req)) {
                        req->flags |= REQ_F_REISSUE;
                        return;
                }
-               req->result = res;
+               req->cqe.res = res;
        }
 
        /* order with io_iopoll_complete() checking ->iopoll_completed */
@@ -3131,6 +3616,8 @@ static unsigned int io_file_get_flags(struct file *file)
                res |= FFS_ISREG;
        if (__io_file_supports_nowait(file, mode))
                res |= FFS_NOWAIT;
+       if (io_file_need_scm(file))
+               res |= FFS_SCM;
        return res;
 }
 
@@ -3162,6 +3649,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        req->rw.addr = READ_ONCE(sqe->addr);
        req->rw.len = READ_ONCE(sqe->len);
        req->rw.flags = READ_ONCE(sqe->rw_flags);
+       /* used for fixed read/write too - just read unconditionally */
        req->buf_index = READ_ONCE(sqe->buf_index);
        return 0;
 }
@@ -3310,77 +3798,96 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
        return __io_import_fixed(req, rw, iter, imu);
 }
 
-static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
-{
-       if (needs_lock)
-               mutex_unlock(&ctx->uring_lock);
-}
-
-static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
+static int io_buffer_add_list(struct io_ring_ctx *ctx,
+                             struct io_buffer_list *bl, unsigned int bgid)
 {
-       /*
-        * "Normal" inline submissions always hold the uring_lock, since we
-        * grab it from the system call. Same is true for the SQPOLL offload.
-        * The only exception is when we've detached the request and issue it
-        * from an async worker thread, grab the lock for that case.
-        */
-       if (needs_lock)
-               mutex_lock(&ctx->uring_lock);
-}
-
-static void io_buffer_add_list(struct io_ring_ctx *ctx,
-                              struct io_buffer_list *bl, unsigned int bgid)
-{
-       struct list_head *list;
-
-       list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
-       INIT_LIST_HEAD(&bl->buf_list);
        bl->bgid = bgid;
-       list_add(&bl->list, list);
+       if (bgid < BGID_ARRAY)
+               return 0;
+
+       return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
 }
 
-static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
-                                         int bgid, unsigned int issue_flags)
+static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+                                             struct io_buffer_list *bl)
 {
-       struct io_buffer *kbuf = req->kbuf;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_buffer_list *bl;
-
-       if (req->flags & REQ_F_BUFFER_SELECTED)
-               return kbuf;
-
-       io_ring_submit_lock(ctx, needs_lock);
-
-       lockdep_assert_held(&ctx->uring_lock);
+       if (!list_empty(&bl->buf_list)) {
+               struct io_buffer *kbuf;
 
-       bl = io_buffer_get_list(ctx, bgid);
-       if (bl && !list_empty(&bl->buf_list)) {
                kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
                list_del(&kbuf->list);
                if (*len > kbuf->len)
                        *len = kbuf->len;
                req->flags |= REQ_F_BUFFER_SELECTED;
                req->kbuf = kbuf;
+               req->buf_index = kbuf->bid;
+               return u64_to_user_ptr(kbuf->addr);
+       }
+       return NULL;
+}
+
+static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
+                                         struct io_buffer_list *bl,
+                                         unsigned int issue_flags)
+{
+       struct io_uring_buf_ring *br = bl->buf_ring;
+       struct io_uring_buf *buf;
+       __u32 head = bl->head;
+
+       if (unlikely(smp_load_acquire(&br->tail) == head)) {
+               io_ring_submit_unlock(req->ctx, issue_flags);
+               return NULL;
+       }
+
+       head &= bl->mask;
+       if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
+               buf = &br->bufs[head];
        } else {
-               kbuf = ERR_PTR(-ENOBUFS);
+               int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
+               int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+               buf = page_address(bl->buf_pages[index]);
+               buf += off;
        }
+       if (*len > buf->len)
+               *len = buf->len;
+       req->flags |= REQ_F_BUFFER_RING;
+       req->buf_list = bl;
+       req->buf_index = buf->bid;
 
-       io_ring_submit_unlock(req->ctx, needs_lock);
-       return kbuf;
+       if (issue_flags & IO_URING_F_UNLOCKED) {
+               /*
+                * If we came in unlocked, we have no choice but to consume the
+                * buffer here. This does mean it'll be pinned until the IO
+                * completes. But coming in unlocked means we're in io-wq
+                * context, hence there should be no further retry. For the
+                * locked case, the caller must ensure to call the commit when
+                * the transfer completes (or if we get -EAGAIN and must poll
+                * or retry).
+                */
+               req->buf_list = NULL;
+               bl->head++;
+       }
+       return u64_to_user_ptr(buf->addr);
 }
 
-static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
-                                       unsigned int issue_flags)
+static void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+                                    unsigned int issue_flags)
 {
-       struct io_buffer *kbuf;
-       u16 bgid;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_buffer_list *bl;
+       void __user *ret = NULL;
+
+       io_ring_submit_lock(req->ctx, issue_flags);
 
-       bgid = req->buf_index;
-       kbuf = io_buffer_select(req, len, bgid, issue_flags);
-       if (IS_ERR(kbuf))
-               return kbuf;
-       return u64_to_user_ptr(kbuf->addr);
+       bl = io_buffer_get_list(ctx, req->buf_index);
+       if (likely(bl)) {
+               if (bl->buf_nr_pages)
+                       ret = io_ring_buffer_select(req, len, bl, issue_flags);
+               else
+                       ret = io_provided_buffer_select(req, len, bl);
+       }
+       io_ring_submit_unlock(req->ctx, issue_flags);
+       return ret;
 }
 
 #ifdef CONFIG_COMPAT
@@ -3390,7 +3897,7 @@ static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
        struct compat_iovec __user *uiov;
        compat_ssize_t clen;
        void __user *buf;
-       ssize_t len;
+       size_t len;
 
        uiov = u64_to_user_ptr(req->rw.addr);
        if (!access_ok(uiov, sizeof(*uiov)))
@@ -3401,11 +3908,12 @@ static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
                return -EINVAL;
 
        len = clen;
-       buf = io_rw_buffer_select(req, &len, issue_flags);
-       if (IS_ERR(buf))
-               return PTR_ERR(buf);
+       buf = io_buffer_select(req, &len, issue_flags);
+       if (!buf)
+               return -ENOBUFS;
+       req->rw.addr = (unsigned long) buf;
        iov[0].iov_base = buf;
-       iov[0].iov_len = (compat_size_t) len;
+       req->rw.len = iov[0].iov_len = (compat_size_t) len;
        return 0;
 }
 #endif
@@ -3423,22 +3931,21 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
        len = iov[0].iov_len;
        if (len < 0)
                return -EINVAL;
-       buf = io_rw_buffer_select(req, &len, issue_flags);
-       if (IS_ERR(buf))
-               return PTR_ERR(buf);
+       buf = io_buffer_select(req, &len, issue_flags);
+       if (!buf)
+               return -ENOBUFS;
+       req->rw.addr = (unsigned long) buf;
        iov[0].iov_base = buf;
-       iov[0].iov_len = len;
+       req->rw.len = iov[0].iov_len = len;
        return 0;
 }
 
 static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
                                    unsigned int issue_flags)
 {
-       if (req->flags & REQ_F_BUFFER_SELECTED) {
-               struct io_buffer *kbuf = req->kbuf;
-
-               iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
-               iov[0].iov_len = kbuf->len;
+       if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
+               iov[0].iov_base = u64_to_user_ptr(req->rw.addr);
+               iov[0].iov_len = req->rw.len;
                return 0;
        }
        if (req->rw.len != 1)
@@ -3452,6 +3959,13 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
        return __io_iov_buffer_select(req, iov, issue_flags);
 }
 
+static inline bool io_do_buffer_select(struct io_kiocb *req)
+{
+       if (!(req->flags & REQ_F_BUFFER_SELECT))
+               return false;
+       return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+}
+
 static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
                                       struct io_rw_state *s,
                                       unsigned int issue_flags)
@@ -3470,18 +3984,15 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
                return NULL;
        }
 
-       /* buffer index only valid with fixed read/write, or buffer select  */
-       if (unlikely(req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)))
-               return ERR_PTR(-EINVAL);
-
        buf = u64_to_user_ptr(req->rw.addr);
        sqe_len = req->rw.len;
 
        if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
-               if (req->flags & REQ_F_BUFFER_SELECT) {
-                       buf = io_rw_buffer_select(req, &sqe_len, issue_flags);
-                       if (IS_ERR(buf))
-                               return ERR_CAST(buf);
+               if (io_do_buffer_select(req)) {
+                       buf = io_buffer_select(req, &sqe_len, issue_flags);
+                       if (!buf)
+                               return ERR_PTR(-ENOBUFS);
+                       req->rw.addr = (unsigned long) buf;
                        req->rw.len = sqe_len;
                }
 
@@ -3836,7 +4347,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                kfree(iovec);
                return ret;
        }
-       req->result = iov_iter_count(&s->iter);
+       req->cqe.res = iov_iter_count(&s->iter);
 
        if (force_nonblock) {
                /* If the file doesn't support async, just async punt */
@@ -3852,7 +4363,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
        ppos = io_kiocb_update_pos(req);
 
-       ret = rw_verify_area(READ, req->file, ppos, req->result);
+       ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
        if (unlikely(ret)) {
                kfree(iovec);
                return ret;
@@ -3874,7 +4385,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                ret = 0;
        } else if (ret == -EIOCBQUEUED) {
                goto out_free;
-       } else if (ret == req->result || ret <= 0 || !force_nonblock ||
+       } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
                   (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
                /* read all, failed, already did sync or don't want to retry */
                goto done;
@@ -3964,7 +4475,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
                kfree(iovec);
                return ret;
        }
-       req->result = iov_iter_count(&s->iter);
+       req->cqe.res = iov_iter_count(&s->iter);
 
        if (force_nonblock) {
                /* If the file doesn't support async, just async punt */
@@ -3984,7 +4495,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 
        ppos = io_kiocb_update_pos(req);
 
-       ret = rw_verify_area(WRITE, req->file, ppos, req->result);
+       ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
        if (unlikely(ret))
                goto out_free;
 
@@ -4048,9 +4559,7 @@ static int io_renameat_prep(struct io_kiocb *req,
        struct io_rename *ren = &req->rename;
        const char __user *oldf, *newf;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4087,22 +4596,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
                                ren->newpath, ren->flags);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
 
+static inline void __io_xattr_finish(struct io_kiocb *req)
+{
+       struct io_xattr *ix = &req->xattr;
+
+       if (ix->filename)
+               putname(ix->filename);
+
+       kfree(ix->ctx.kname);
+       kvfree(ix->ctx.kvalue);
+}
+
+static void io_xattr_finish(struct io_kiocb *req, int ret)
+{
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+
+       __io_xattr_finish(req);
+       io_req_complete(req, ret);
+}
+
+static int __io_getxattr_prep(struct io_kiocb *req,
+                             const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *name;
+       int ret;
+
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       ix->filename = NULL;
+       ix->ctx.kvalue = NULL;
+       name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+       ix->ctx.size = READ_ONCE(sqe->len);
+       ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+       if (ix->ctx.flags)
+               return -EINVAL;
+
+       ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+       if (!ix->ctx.kname)
+               return -ENOMEM;
+
+       ret = strncpy_from_user(ix->ctx.kname->name, name,
+                               sizeof(ix->ctx.kname->name));
+       if (!ret || ret == sizeof(ix->ctx.kname->name))
+               ret = -ERANGE;
+       if (ret < 0) {
+               kfree(ix->ctx.kname);
+               return ret;
+       }
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_fgetxattr_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+       return __io_getxattr_prep(req, sqe);
+}
+
+static int io_getxattr_prep(struct io_kiocb *req,
+                           const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *path;
+       int ret;
+
+       ret = __io_getxattr_prep(req, sqe);
+       if (ret)
+               return ret;
+
+       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+       ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+       if (IS_ERR(ix->filename)) {
+               ret = PTR_ERR(ix->filename);
+               ix->filename = NULL;
+       }
+
+       return ret;
+}
+
+static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_xattr *ix = &req->xattr;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
+                       req->file->f_path.dentry,
+                       &ix->ctx);
+
+       io_xattr_finish(req, ret);
+       return 0;
+}
+
+static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_xattr *ix = &req->xattr;
+       unsigned int lookup_flags = LOOKUP_FOLLOW;
+       struct path path;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+retry:
+       ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+       if (!ret) {
+               ret = do_getxattr(mnt_user_ns(path.mnt),
+                               path.dentry,
+                               &ix->ctx);
+
+               path_put(&path);
+               if (retry_estale(ret, lookup_flags)) {
+                       lookup_flags |= LOOKUP_REVAL;
+                       goto retry;
+               }
+       }
+
+       io_xattr_finish(req, ret);
+       return 0;
+}
+
+static int __io_setxattr_prep(struct io_kiocb *req,
+                       const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *name;
+       int ret;
+
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       ix->filename = NULL;
+       name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+       ix->ctx.kvalue = NULL;
+       ix->ctx.size = READ_ONCE(sqe->len);
+       ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+       ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+       if (!ix->ctx.kname)
+               return -ENOMEM;
+
+       ret = setxattr_copy(name, &ix->ctx);
+       if (ret) {
+               kfree(ix->ctx.kname);
+               return ret;
+       }
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_setxattr_prep(struct io_kiocb *req,
+                       const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *path;
+       int ret;
+
+       ret = __io_setxattr_prep(req, sqe);
+       if (ret)
+               return ret;
+
+       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+       ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+       if (IS_ERR(ix->filename)) {
+               ret = PTR_ERR(ix->filename);
+               ix->filename = NULL;
+       }
+
+       return ret;
+}
+
+static int io_fsetxattr_prep(struct io_kiocb *req,
+                       const struct io_uring_sqe *sqe)
+{
+       return __io_setxattr_prep(req, sqe);
+}
+
+static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
+                       struct path *path)
+{
+       struct io_xattr *ix = &req->xattr;
+       int ret;
+
+       ret = mnt_want_write(path->mnt);
+       if (!ret) {
+               ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx);
+               mnt_drop_write(path->mnt);
+       }
+
+       return ret;
+}
+
+static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = __io_setxattr(req, issue_flags, &req->file->f_path);
+       io_xattr_finish(req, ret);
+
+       return 0;
+}
+
+static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_xattr *ix = &req->xattr;
+       unsigned int lookup_flags = LOOKUP_FOLLOW;
+       struct path path;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+retry:
+       ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+       if (!ret) {
+               ret = __io_setxattr(req, issue_flags, &path);
+               path_put(&path);
+               if (retry_estale(ret, lookup_flags)) {
+                       lookup_flags |= LOOKUP_REVAL;
+                       goto retry;
+               }
+       }
+
+       io_xattr_finish(req, ret);
+       return 0;
+}
+
 static int io_unlinkat_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
        struct io_unlink *un = &req->unlink;
        const char __user *fname;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
-           sqe->splice_fd_in)
+       if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4136,8 +4880,6 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
                ret = do_unlinkat(un->dfd, un->filename);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4148,10 +4890,7 @@ static int io_mkdirat_prep(struct io_kiocb *req,
        struct io_mkdir *mkd = &req->mkdir;
        const char __user *fname;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
-           sqe->splice_fd_in)
+       if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4179,8 +4918,6 @@ static int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
        ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4191,10 +4928,7 @@ static int io_symlinkat_prep(struct io_kiocb *req,
        struct io_symlink *sl = &req->symlink;
        const char __user *oldpath, *newpath;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
-           sqe->splice_fd_in)
+       if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4228,8 +4962,6 @@ static int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
        ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4240,9 +4972,7 @@ static int io_linkat_prep(struct io_kiocb *req,
        struct io_hardlink *lnk = &req->hardlink;
        const char __user *oldf, *newf;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4279,9 +5009,97 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
                                lnk->newpath, lnk->flags);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
+       io_req_complete(req, ret);
+       return 0;
+}
+
+static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
+{
+       req->uring_cmd.task_work_cb(&req->uring_cmd);
+}
+
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *))
+{
+       struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
+       req->uring_cmd.task_work_cb = task_work_cb;
+       req->io_task_work.func = io_uring_cmd_work;
+       io_req_task_prio_work_add(req);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+
+/*
+ * Called by consumers of io_uring_cmd, if they originally returned
+ * -EIOCBQUEUED upon receiving the command.
+ */
+void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
+{
+       struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
        if (ret < 0)
                req_set_fail(req);
-       io_req_complete(req, ret);
+       if (req->ctx->flags & IORING_SETUP_CQE32)
+               __io_req_complete32(req, 0, ret, 0, res2, 0);
+       else
+               io_req_complete(req, ret);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_done);
+
+static int io_uring_cmd_prep_async(struct io_kiocb *req)
+{
+       size_t cmd_size;
+
+       cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
+
+       memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
+       return 0;
+}
+
+static int io_uring_cmd_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+
+       if (sqe->rw_flags)
+               return -EINVAL;
+       ioucmd->cmd = sqe->cmd;
+       ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+       return 0;
+}
+
+static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct file *file = req->file;
+       int ret;
+
+       if (!req->file->f_op->uring_cmd)
+               return -EOPNOTSUPP;
+
+       if (ctx->flags & IORING_SETUP_SQE128)
+               issue_flags |= IO_URING_F_SQE128;
+       if (ctx->flags & IORING_SETUP_CQE32)
+               issue_flags |= IO_URING_F_CQE32;
+       if (ctx->flags & IORING_SETUP_IOPOLL)
+               issue_flags |= IO_URING_F_IOPOLL;
+
+       if (req_has_async_data(req))
+               ioucmd->cmd = req->async_data;
+
+       ret = file->f_op->uring_cmd(ioucmd, issue_flags);
+       if (ret == -EAGAIN) {
+               if (!req_has_async_data(req)) {
+                       if (io_alloc_async_data(req))
+                               return -ENOMEM;
+                       io_uring_cmd_prep_async(req);
+               }
+               return -EAGAIN;
+       }
+
+       if (ret != -EIOCBQUEUED)
+               io_uring_cmd_done(ioucmd, ret, 0);
        return 0;
 }
 
@@ -4289,9 +5107,7 @@ static int io_shutdown_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_NET)
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+       if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
                     sqe->buf_index || sqe->splice_fd_in))
                return -EINVAL;
 
@@ -4316,8 +5132,6 @@ static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
                return -ENOTSOCK;
 
        ret = __sys_shutdown_sock(sock, req->shutdown.how);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 #else
@@ -4331,9 +5145,6 @@ static int __io_splice_prep(struct io_kiocb *req,
        struct io_splice *sp = &req->splice;
        unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sp->len = READ_ONCE(sqe->len);
        sp->flags = READ_ONCE(sqe->splice_flags);
        if (unlikely(sp->flags & ~valid_flags))
@@ -4378,7 +5189,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
 done:
        if (ret != sp->len)
                req_set_fail(req);
-       io_req_complete(req, ret);
+       __io_req_complete(req, 0, ret, 0);
        return 0;
 }
 
@@ -4423,7 +5234,20 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
 done:
        if (ret != sp->len)
                req_set_fail(req);
-       io_req_complete(req, ret);
+       __io_req_complete(req, 0, ret, 0);
+       return 0;
+}
+
+static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       /*
+        * If the ring is setup with CQE32, relay back addr/addr
+        */
+       if (req->ctx->flags & IORING_SETUP_CQE32) {
+               req->nop.extra1 = READ_ONCE(sqe->addr);
+               req->nop.extra2 = READ_ONCE(sqe->addr2);
+       }
+
        return 0;
 }
 
@@ -4432,20 +5256,31 @@ done:
  */
 static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 {
-       struct io_ring_ctx *ctx = req->ctx;
+       unsigned int cflags;
+       void __user *buf;
 
-       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               size_t len = 1;
+
+               buf = io_buffer_select(req, &len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+       }
 
-       __io_req_complete(req, issue_flags, 0, 0);
+       cflags = io_put_kbuf(req, issue_flags);
+       if (!(req->ctx->flags & IORING_SETUP_CQE32))
+               __io_req_complete(req, issue_flags, 0, cflags);
+       else
+               __io_req_complete32(req, issue_flags, 0, cflags,
+                                   req->nop.extra1, req->nop.extra2);
        return 0;
 }
 
 static int io_msg_ring_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
-       if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
-                    sqe->splice_fd_in || sqe->buf_index || sqe->personality))
+       if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in ||
+                    sqe->buf_index || sqe->personality))
                return -EINVAL;
 
        req->msg.user_data = READ_ONCE(sqe->off);
@@ -4481,17 +5316,15 @@ done:
        if (ret < 0)
                req_set_fail(req);
        __io_req_complete(req, issue_flags, ret, 0);
+       /* put file to avoid an attempt to IOPOLL the req */
+       io_put_file(req->file);
+       req->file = NULL;
        return 0;
 }
 
 static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-                    sqe->splice_fd_in))
+       if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
                return -EINVAL;
 
        req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -4515,8 +5348,6 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
        ret = vfs_fsync_range(req->file, req->sync.off,
                                end > 0 ? end : LLONG_MAX,
                                req->sync.flags & IORING_FSYNC_DATASYNC);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4524,10 +5355,7 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
 static int io_fallocate_prep(struct io_kiocb *req,
                             const struct io_uring_sqe *sqe)
 {
-       if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
-           sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        req->sync.off = READ_ONCE(sqe->off);
@@ -4545,9 +5373,7 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
                return -EAGAIN;
        ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
                                req->sync.len);
-       if (ret < 0)
-               req_set_fail(req);
-       else
+       if (ret >= 0)
                fsnotify_modify(req->file);
        io_req_complete(req, ret);
        return 0;
@@ -4558,9 +5384,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
        const char __user *fname;
        int ret;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->ioprio || sqe->buf_index))
+       if (unlikely(sqe->buf_index))
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4615,6 +5439,61 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return __io_openat_prep(req, sqe);
 }
 
+static int io_file_bitmap_get(struct io_ring_ctx *ctx)
+{
+       struct io_file_table *table = &ctx->file_table;
+       unsigned long nr = ctx->nr_user_files;
+       int ret;
+
+       if (table->alloc_hint >= nr)
+               table->alloc_hint = 0;
+
+       do {
+               ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
+               if (ret != nr) {
+                       table->alloc_hint = ret + 1;
+                       return ret;
+               }
+               if (!table->alloc_hint)
+                       break;
+
+               nr = table->alloc_hint;
+               table->alloc_hint = 0;
+       } while (1);
+
+       return -ENFILE;
+}
+
+static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
+                              struct file *file, unsigned int file_slot)
+{
+       bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
+       struct io_ring_ctx *ctx = req->ctx;
+       int ret;
+
+       if (alloc_slot) {
+               io_ring_submit_lock(ctx, issue_flags);
+               ret = io_file_bitmap_get(ctx);
+               if (unlikely(ret < 0)) {
+                       io_ring_submit_unlock(ctx, issue_flags);
+                       return ret;
+               }
+
+               file_slot = ret;
+       } else {
+               file_slot--;
+       }
+
+       ret = io_install_fixed_file(req, file, issue_flags, file_slot);
+       if (alloc_slot) {
+               io_ring_submit_unlock(ctx, issue_flags);
+               if (!ret)
+                       return file_slot;
+       }
+
+       return ret;
+}
+
 static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct open_flags op;
@@ -4670,8 +5549,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
        if (!fixed)
                fd_install(ret, file);
        else
-               ret = io_install_fixed_file(req, file, issue_flags,
-                                           req->open.file_slot - 1);
+               ret = io_fixed_fd_install(req, issue_flags, file,
+                                               req->open.file_slot);
 err:
        putname(req->open.filename);
        req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4692,7 +5571,7 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
        struct io_provide_buf *p = &req->pbuf;
        u64 tmp;
 
-       if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+       if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
            sqe->splice_fd_in)
                return -EINVAL;
 
@@ -4715,6 +5594,20 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
        if (!nbufs)
                return 0;
 
+       if (bl->buf_nr_pages) {
+               int j;
+
+               i = bl->buf_ring->tail - bl->head;
+               for (j = 0; j < bl->buf_nr_pages; j++)
+                       unpin_user_page(bl->buf_pages[j]);
+               kvfree(bl->buf_pages);
+               bl->buf_pages = NULL;
+               bl->buf_nr_pages = 0;
+               /* make sure it's seen as empty */
+               INIT_LIST_HEAD(&bl->buf_list);
+               return i;
+       }
+
        /* the head kbuf is the list itself */
        while (!list_empty(&bl->buf_list)) {
                struct io_buffer *nxt;
@@ -4736,22 +5629,23 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
        struct io_ring_ctx *ctx = req->ctx;
        struct io_buffer_list *bl;
        int ret = 0;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
-
-       io_ring_submit_lock(ctx, needs_lock);
 
-       lockdep_assert_held(&ctx->uring_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
        ret = -ENOENT;
        bl = io_buffer_get_list(ctx, p->bgid);
-       if (bl)
-               ret = __io_remove_buffers(ctx, bl, p->nbufs);
+       if (bl) {
+               ret = -EINVAL;
+               /* can't use provide/remove buffers command on mapped buffers */
+               if (!bl->buf_nr_pages)
+                       ret = __io_remove_buffers(ctx, bl, p->nbufs);
+       }
        if (ret < 0)
                req_set_fail(req);
 
        /* complete before unlock, IOPOLL may need the lock */
        __io_req_complete(req, issue_flags, ret, 0);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return 0;
 }
 
@@ -4762,7 +5656,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
        struct io_provide_buf *p = &req->pbuf;
        u64 tmp;
 
-       if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+       if (sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        tmp = READ_ONCE(sqe->fd);
@@ -4859,26 +5753,56 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
        return i ? 0 : -ENOMEM;
 }
 
+static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
+{
+       int i;
+
+       ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list),
+                               GFP_KERNEL);
+       if (!ctx->io_bl)
+               return -ENOMEM;
+
+       for (i = 0; i < BGID_ARRAY; i++) {
+               INIT_LIST_HEAD(&ctx->io_bl[i].buf_list);
+               ctx->io_bl[i].bgid = i;
+       }
+
+       return 0;
+}
+
 static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_provide_buf *p = &req->pbuf;
        struct io_ring_ctx *ctx = req->ctx;
        struct io_buffer_list *bl;
        int ret = 0;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
-       lockdep_assert_held(&ctx->uring_lock);
+       if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
+               ret = io_init_bl_list(ctx);
+               if (ret)
+                       goto err;
+       }
 
        bl = io_buffer_get_list(ctx, p->bgid);
        if (unlikely(!bl)) {
-               bl = kmalloc(sizeof(*bl), GFP_KERNEL);
+               bl = kzalloc(sizeof(*bl), GFP_KERNEL);
                if (!bl) {
                        ret = -ENOMEM;
                        goto err;
                }
-               io_buffer_add_list(ctx, bl, p->bgid);
+               INIT_LIST_HEAD(&bl->buf_list);
+               ret = io_buffer_add_list(ctx, bl, p->bgid);
+               if (ret) {
+                       kfree(bl);
+                       goto err;
+               }
+       }
+       /* can't add buffers via this command for a mapped buffer ring */
+       if (bl->buf_nr_pages) {
+               ret = -EINVAL;
+               goto err;
        }
 
        ret = io_add_buffers(ctx, p, bl);
@@ -4887,7 +5811,7 @@ err:
                req_set_fail(req);
        /* complete before unlock, IOPOLL may need the lock */
        __io_req_complete(req, issue_flags, ret, 0);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return 0;
 }
 
@@ -4895,9 +5819,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
                             const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_EPOLL)
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
 
        req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4941,9 +5863,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
 static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
-       if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
                return -EINVAL;
 
        req->madvise.addr = READ_ONCE(sqe->addr);
@@ -4965,8 +5885,6 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
                return -EAGAIN;
 
        ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 #else
@@ -4976,9 +5894,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
                return -EINVAL;
 
        req->fadvise.offset = READ_ONCE(sqe->off);
@@ -5014,9 +5930,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        const char __user *path;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (req->flags & REQ_F_FIXED_FILE)
                return -EBADF;
@@ -5052,19 +5966,13 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
 
        ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
                       ctx->buffer);
-
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
 
 static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
-           sqe->rw_flags || sqe->buf_index)
+       if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
                return -EINVAL;
        if (req->flags & REQ_F_FIXED_FILE)
                return -EBADF;
@@ -5096,7 +6004,8 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
                spin_unlock(&files->file_lock);
                goto err;
        }
-       file = fdt->fd[close->fd];
+       file = rcu_dereference_protected(fdt->fd[close->fd],
+                       lockdep_is_held(&files->file_lock));
        if (!file || file->f_op == &io_uring_fops) {
                spin_unlock(&files->file_lock);
                file = NULL;
@@ -5130,12 +6039,7 @@ err:
 
 static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-                    sqe->splice_fd_in))
+       if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
                return -EINVAL;
 
        req->sync.off = READ_ONCE(sqe->off);
@@ -5154,13 +6058,18 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
 
        ret = sync_file_range(req->file, req->sync.off, req->sync.len,
                                req->sync.flags);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
 
 #if defined(CONFIG_NET)
+static bool io_net_retry(struct socket *sock, int flags)
+{
+       if (!(flags & MSG_WAITALL))
+               return false;
+       return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+}
+
 static int io_setup_async_msg(struct io_kiocb *req,
                              struct io_async_msghdr *kmsg)
 {
@@ -5206,13 +6115,16 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_sr_msg *sr = &req->sr_msg;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (unlikely(sqe->file_index))
                return -EINVAL;
        if (unlikely(sqe->addr2 || sqe->file_index))
                return -EINVAL;
 
        sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
+       sr->flags = READ_ONCE(sqe->addr2);
+       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+               return -EINVAL;
        sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
        if (sr->msg_flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
@@ -5221,12 +6133,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (req->ctx->compat)
                sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
+       sr->done_io = 0;
        return 0;
 }
 
 static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_async_msghdr iomsg, *kmsg;
+       struct io_sr_msg *sr = &req->sr_msg;
        struct socket *sock;
        unsigned flags;
        int min_ret = 0;
@@ -5245,7 +6159,11 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
                kmsg = &iomsg;
        }
 
-       flags = req->sr_msg.msg_flags;
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return io_setup_async_msg(req, kmsg);
+
+       flags = sr->msg_flags;
        if (issue_flags & IO_URING_F_NONBLOCK)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
@@ -5258,12 +6176,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
                        return io_setup_async_msg(req, kmsg);
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return io_setup_async_msg(req, kmsg);
+               }
                req_set_fail(req);
        }
        /* fast path, check for non-NULL to avoid function call */
        if (kmsg->free_iov)
                kfree(kmsg->free_iov);
        req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
 }
@@ -5278,6 +6205,10 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        int min_ret = 0;
        int ret;
 
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return -EAGAIN;
+
        sock = sock_from_file(req->file);
        if (unlikely(!sock))
                return -ENOTSOCK;
@@ -5291,7 +6222,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        msg.msg_controllen = 0;
        msg.msg_namelen = 0;
 
-       flags = req->sr_msg.msg_flags;
+       flags = sr->msg_flags;
        if (issue_flags & IO_URING_F_NONBLOCK)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
@@ -5304,8 +6235,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
                        return -EAGAIN;
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return -EAGAIN;
+               }
                req_set_fail(req);
        }
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
 }
@@ -5397,14 +6339,6 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
        return __io_recvmsg_copy_hdr(req, iomsg);
 }
 
-static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
-                                              unsigned int issue_flags)
-{
-       struct io_sr_msg *sr = &req->sr_msg;
-
-       return io_buffer_select(req, &sr->len, sr->bgid, issue_flags);
-}
-
 static int io_recvmsg_prep_async(struct io_kiocb *req)
 {
        int ret;
@@ -5419,14 +6353,16 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_sr_msg *sr = &req->sr_msg;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (unlikely(sqe->file_index))
                return -EINVAL;
        if (unlikely(sqe->addr2 || sqe->file_index))
                return -EINVAL;
 
        sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
-       sr->bgid = READ_ONCE(sqe->buf_group);
+       sr->flags = READ_ONCE(sqe->addr2);
+       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+               return -EINVAL;
        sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
        if (sr->msg_flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
@@ -5439,19 +6375,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static bool io_net_retry(struct socket *sock, int flags)
-{
-       if (!(flags & MSG_WAITALL))
-               return false;
-       return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
-}
-
 static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_async_msghdr iomsg, *kmsg;
        struct io_sr_msg *sr = &req->sr_msg;
        struct socket *sock;
-       struct io_buffer *kbuf;
+       unsigned int cflags;
        unsigned flags;
        int ret, min_ret = 0;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -5469,24 +6398,30 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
                kmsg = &iomsg;
        }
 
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               kbuf = io_recv_buffer_select(req, issue_flags);
-               if (IS_ERR(kbuf))
-                       return PTR_ERR(kbuf);
-               kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
-               kmsg->fast_iov[0].iov_len = req->sr_msg.len;
-               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov,
-                               1, req->sr_msg.len);
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return io_setup_async_msg(req, kmsg);
+
+       if (io_do_buffer_select(req)) {
+               void __user *buf;
+
+               buf = io_buffer_select(req, &sr->len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+               kmsg->fast_iov[0].iov_base = buf;
+               kmsg->fast_iov[0].iov_len = sr->len;
+               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
+                               sr->len);
        }
 
-       flags = req->sr_msg.msg_flags;
+       flags = sr->msg_flags;
        if (force_nonblock)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
                min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 
-       ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
-                                       kmsg->uaddr, flags);
+       kmsg->msg.msg_get_inq = 1;
+       ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
        if (ret < min_ret) {
                if (ret == -EAGAIN && force_nonblock)
                        return io_setup_async_msg(req, kmsg);
@@ -5510,45 +6445,54 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
                ret += sr->done_io;
        else if (sr->done_io)
                ret = sr->done_io;
-       __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+       cflags = io_put_kbuf(req, issue_flags);
+       if (kmsg->msg.msg_inq)
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       __io_req_complete(req, issue_flags, ret, cflags);
        return 0;
 }
 
 static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 {
-       struct io_buffer *kbuf;
        struct io_sr_msg *sr = &req->sr_msg;
        struct msghdr msg;
-       void __user *buf = sr->buf;
        struct socket *sock;
        struct iovec iov;
+       unsigned int cflags;
        unsigned flags;
        int ret, min_ret = 0;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return -EAGAIN;
+
        sock = sock_from_file(req->file);
        if (unlikely(!sock))
                return -ENOTSOCK;
 
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               kbuf = io_recv_buffer_select(req, issue_flags);
-               if (IS_ERR(kbuf))
-                       return PTR_ERR(kbuf);
-               buf = u64_to_user_ptr(kbuf->addr);
+       if (io_do_buffer_select(req)) {
+               void __user *buf;
+
+               buf = io_buffer_select(req, &sr->len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+               sr->buf = buf;
        }
 
-       ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
+       ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
        if (unlikely(ret))
                goto out_free;
 
        msg.msg_name = NULL;
+       msg.msg_namelen = 0;
        msg.msg_control = NULL;
+       msg.msg_get_inq = 1;
+       msg.msg_flags = 0;
        msg.msg_controllen = 0;
-       msg.msg_namelen = 0;
        msg.msg_iocb = NULL;
-       msg.msg_flags = 0;
 
-       flags = req->sr_msg.msg_flags;
+       flags = sr->msg_flags;
        if (force_nonblock)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
@@ -5577,36 +6521,49 @@ out_free:
                ret += sr->done_io;
        else if (sr->done_io)
                ret = sr->done_io;
-       __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+       cflags = io_put_kbuf(req, issue_flags);
+       if (msg.msg_inq)
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       __io_req_complete(req, issue_flags, ret, cflags);
        return 0;
 }
 
 static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_accept *accept = &req->accept;
+       unsigned flags;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->len || sqe->buf_index)
+       if (sqe->len || sqe->buf_index)
                return -EINVAL;
 
        accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
        accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        accept->flags = READ_ONCE(sqe->accept_flags);
        accept->nofile = rlimit(RLIMIT_NOFILE);
+       flags = READ_ONCE(sqe->ioprio);
+       if (flags & ~IORING_ACCEPT_MULTISHOT)
+               return -EINVAL;
 
        accept->file_slot = READ_ONCE(sqe->file_index);
-       if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
-               return -EINVAL;
+       if (accept->file_slot) {
+               if (accept->flags & SOCK_CLOEXEC)
+                       return -EINVAL;
+               if (flags & IORING_ACCEPT_MULTISHOT &&
+                   accept->file_slot != IORING_FILE_INDEX_ALLOC)
+                       return -EINVAL;
+       }
        if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
                return -EINVAL;
        if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
                accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+       if (flags & IORING_ACCEPT_MULTISHOT)
+               req->flags |= REQ_F_APOLL_MULTISHOT;
        return 0;
 }
 
 static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 {
+       struct io_ring_ctx *ctx = req->ctx;
        struct io_accept *accept = &req->accept;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
        unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -5614,6 +6571,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
        struct file *file;
        int ret, fd;
 
+retry:
        if (!fixed) {
                fd = __get_unused_fd_flags(accept->flags, accept->nofile);
                if (unlikely(fd < 0))
@@ -5625,7 +6583,89 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
                if (!fixed)
                        put_unused_fd(fd);
                ret = PTR_ERR(file);
-               if (ret == -EAGAIN && force_nonblock)
+               if (ret == -EAGAIN && force_nonblock) {
+                       /*
+                        * if it's multishot and polled, we don't need to
+                        * return EAGAIN to arm the poll infra since it
+                        * has already been done
+                        */
+                       if ((req->flags & IO_APOLL_MULTI_POLLED) ==
+                           IO_APOLL_MULTI_POLLED)
+                               ret = 0;
+                       return ret;
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               req_set_fail(req);
+       } else if (!fixed) {
+               fd_install(fd, file);
+               ret = fd;
+       } else {
+               ret = io_fixed_fd_install(req, issue_flags, file,
+                                               accept->file_slot);
+       }
+
+       if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+               __io_req_complete(req, issue_flags, ret, 0);
+               return 0;
+       }
+       if (ret >= 0) {
+               bool filled;
+
+               spin_lock(&ctx->completion_lock);
+               filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+                                        IORING_CQE_F_MORE);
+               io_commit_cqring(ctx);
+               spin_unlock(&ctx->completion_lock);
+               if (filled) {
+                       io_cqring_ev_posted(ctx);
+                       goto retry;
+               }
+               ret = -ECANCELED;
+       }
+
+       return ret;
+}
+
+static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_socket *sock = &req->sock;
+
+       if (sqe->addr || sqe->rw_flags || sqe->buf_index)
+               return -EINVAL;
+
+       sock->domain = READ_ONCE(sqe->fd);
+       sock->type = READ_ONCE(sqe->off);
+       sock->protocol = READ_ONCE(sqe->len);
+       sock->file_slot = READ_ONCE(sqe->file_index);
+       sock->nofile = rlimit(RLIMIT_NOFILE);
+
+       sock->flags = sock->type & ~SOCK_TYPE_MASK;
+       if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
+               return -EINVAL;
+       if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+               return -EINVAL;
+       return 0;
+}
+
+static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_socket *sock = &req->sock;
+       bool fixed = !!sock->file_slot;
+       struct file *file;
+       int ret, fd;
+
+       if (!fixed) {
+               fd = __get_unused_fd_flags(sock->flags, sock->nofile);
+               if (unlikely(fd < 0))
+                       return fd;
+       }
+       file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
+       if (IS_ERR(file)) {
+               if (!fixed)
+                       put_unused_fd(fd);
+               ret = PTR_ERR(file);
+               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
                        return -EAGAIN;
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
@@ -5635,7 +6675,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
                ret = fd;
        } else {
                ret = io_install_fixed_file(req, file, issue_flags,
-                                           accept->file_slot - 1);
+                                           sock->file_slot - 1);
        }
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
@@ -5653,10 +6693,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_connect *conn = &req->connect;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
-           sqe->splice_fd_in)
+       if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -5729,6 +6766,7 @@ IO_NETOP_PREP_ASYNC(sendmsg);
 IO_NETOP_PREP_ASYNC(recvmsg);
 IO_NETOP_PREP_ASYNC(connect);
 IO_NETOP_PREP(accept);
+IO_NETOP_PREP(socket);
 IO_NETOP_FN(send);
 IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */
@@ -5779,7 +6817,7 @@ static void io_poll_req_insert(struct io_kiocb *req)
        struct io_ring_ctx *ctx = req->ctx;
        struct hlist_head *list;
 
-       list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+       list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
        hlist_add_head(&req->hash_node, list);
 }
 
@@ -5838,22 +6876,23 @@ static void io_poll_remove_entries(struct io_kiocb *req)
        rcu_read_unlock();
 }
 
+static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags);
 /*
  * All poll tw should go through this. Checks for poll events, manages
  * references, does rewait, etc.
  *
  * Returns a negative error on failure. >0 when no action require, which is
  * either spurious wakeup or multishot CQE is served. 0 when it's done with
- * the request, then the mask is stored in req->result.
+ * the request, then the mask is stored in req->cqe.res.
  */
-static int io_poll_check_events(struct io_kiocb *req, bool locked)
+static int io_poll_check_events(struct io_kiocb *req, bool *locked)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       int v;
+       int v, ret;
 
        /* req->task == current here, checking PF_EXITING is safe */
        if (unlikely(req->task->flags & PF_EXITING))
-               io_poll_mark_cancelled(req);
+               return -ECANCELED;
 
        do {
                v = atomic_read(&req->poll_refs);
@@ -5864,32 +6903,46 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked)
                if (v & IO_POLL_CANCEL_FLAG)
                        return -ECANCELED;
 
-               if (!req->result) {
+               if (!req->cqe.res) {
                        struct poll_table_struct pt = { ._key = req->apoll_events };
                        unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
 
                        if (unlikely(!io_assign_file(req, flags)))
                                return -EBADF;
-                       req->result = vfs_poll(req->file, &pt) & req->apoll_events;
+                       req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
                }
 
-               /* multishot, just fill an CQE and proceed */
-               if (req->result && !(req->apoll_events & EPOLLONESHOT)) {
-                       __poll_t mask = mangle_poll(req->result & req->apoll_events);
+               if ((unlikely(!req->cqe.res)))
+                       continue;
+               if (req->apoll_events & EPOLLONESHOT)
+                       return 0;
+
+               /* multishot, just fill a CQE and proceed */
+               if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+                       __poll_t mask = mangle_poll(req->cqe.res &
+                                                   req->apoll_events);
                        bool filled;
 
                        spin_lock(&ctx->completion_lock);
-                       filled = io_fill_cqe_aux(ctx, req->user_data, mask,
-                                                IORING_CQE_F_MORE);
+                       filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
+                                                mask, IORING_CQE_F_MORE);
                        io_commit_cqring(ctx);
                        spin_unlock(&ctx->completion_lock);
-                       if (unlikely(!filled))
-                               return -ECANCELED;
-                       io_cqring_ev_posted(ctx);
-               } else if (req->result) {
-                       return 0;
+                       if (filled) {
+                               io_cqring_ev_posted(ctx);
+                               continue;
+                       }
+                       return -ECANCELED;
                }
 
+               io_tw_lock(req->ctx, locked);
+               if (unlikely(req->task->flags & PF_EXITING))
+                       return -EFAULT;
+               ret = io_issue_sqe(req,
+                                  IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
+               if (ret)
+                       return ret;
+
                /*
                 * Release all references, retry if someone tried to restart
                 * task_work while we were executing it.
@@ -5904,21 +6957,21 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
-       ret = io_poll_check_events(req, *locked);
+       ret = io_poll_check_events(req, locked);
        if (ret > 0)
                return;
 
        if (!ret) {
-               req->result = mangle_poll(req->result & req->poll.events);
+               req->cqe.res = mangle_poll(req->cqe.res & req->poll.events);
        } else {
-               req->result = ret;
+               req->cqe.res = ret;
                req_set_fail(req);
        }
 
        io_poll_remove_entries(req);
        spin_lock(&ctx->completion_lock);
        hash_del(&req->hash_node);
-       __io_req_complete_post(req, req->result, 0);
+       __io_req_complete_post(req, req->cqe.res, 0);
        io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        io_cqring_ev_posted(ctx);
@@ -5929,7 +6982,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
-       ret = io_poll_check_events(req, *locked);
+       ret = io_poll_check_events(req, locked);
        if (ret > 0)
                return;
 
@@ -5944,9 +6997,9 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
                io_req_complete_failed(req, ret);
 }
 
-static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
+static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
 {
-       req->result = mask;
+       req->cqe.res = mask;
        /*
         * This is useful for poll that is armed on behalf of another
         * request, and where the wakeup path could be on a different
@@ -5959,11 +7012,12 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
        else
                req->io_task_work.func = io_apoll_task_func;
 
-       trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
-       io_req_task_work_add(req, false);
+       trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
+       io_req_task_work_add(req);
 }
 
-static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
+static inline void io_poll_execute(struct io_kiocb *req, int res,
+               __poll_t events)
 {
        if (io_poll_get_ownership(req))
                __io_poll_execute(req, res, events);
@@ -5978,6 +7032,7 @@ static void io_poll_cancel_req(struct io_kiocb *req)
 
 #define wqe_to_req(wait)       ((void *)((unsigned long) (wait)->private & ~1))
 #define wqe_is_double(wait)    ((unsigned long) (wait)->private & 1)
+#define IO_ASYNC_POLL_COMMON   (EPOLLONESHOT | EPOLLPRI)
 
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
                        void *key)
@@ -6012,7 +7067,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
        }
 
        /* for instances that support it check for an event match first */
-       if (mask && !(mask & poll->events))
+       if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
                return 0;
 
        if (io_poll_get_ownership(req)) {
@@ -6098,6 +7153,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
        int v;
 
        INIT_HLIST_NODE(&req->hash_node);
+       req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
        io_init_poll_iocb(poll, mask, io_poll_wake);
        poll->file = req->file;
 
@@ -6168,28 +7224,34 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
        struct io_ring_ctx *ctx = req->ctx;
        struct async_poll *apoll;
        struct io_poll_table ipt;
-       __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
+       __poll_t mask = POLLPRI | POLLERR;
        int ret;
 
        if (!def->pollin && !def->pollout)
                return IO_APOLL_ABORTED;
-       if (!file_can_poll(req->file) || (req->flags & REQ_F_POLLED))
+       if (!file_can_poll(req->file))
+               return IO_APOLL_ABORTED;
+       if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
                return IO_APOLL_ABORTED;
+       if (!(req->flags & REQ_F_APOLL_MULTISHOT))
+               mask |= EPOLLONESHOT;
 
        if (def->pollin) {
-               mask |= POLLIN | POLLRDNORM;
+               mask |= EPOLLIN | EPOLLRDNORM;
 
                /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
                if ((req->opcode == IORING_OP_RECVMSG) &&
                    (req->sr_msg.msg_flags & MSG_ERRQUEUE))
-                       mask &= ~POLLIN;
+                       mask &= ~EPOLLIN;
        } else {
-               mask |= POLLOUT | POLLWRNORM;
+               mask |= EPOLLOUT | EPOLLWRNORM;
        }
        if (def->poll_exclusive)
                mask |= EPOLLEXCLUSIVE;
-       if (!(issue_flags & IO_URING_F_UNLOCKED) &&
-           !list_empty(&ctx->apoll_cache)) {
+       if (req->flags & REQ_F_POLLED) {
+               apoll = req->apoll;
+       } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+                  !list_empty(&ctx->apoll_cache)) {
                apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
                                                poll.wait.entry);
                list_del_init(&apoll->poll.wait.entry);
@@ -6209,7 +7271,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
        if (ret || ipt.error)
                return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
 
-       trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
+       trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
                                mask, apoll->poll.events);
        return IO_APOLL_OK;
 }
@@ -6242,24 +7304,53 @@ static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx,
        return found;
 }
 
-static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
-                                    bool poll_only)
+static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
+                                    struct io_cancel_data *cd)
        __must_hold(&ctx->completion_lock)
 {
        struct hlist_head *list;
        struct io_kiocb *req;
 
-       list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
+       list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)];
        hlist_for_each_entry(req, list, hash_node) {
-               if (sqe_addr != req->user_data)
+               if (cd->data != req->cqe.user_data)
                        continue;
                if (poll_only && req->opcode != IORING_OP_POLL_ADD)
                        continue;
+               if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
+                       if (cd->seq == req->work.cancel_seq)
+                               continue;
+                       req->work.cancel_seq = cd->seq;
+               }
                return req;
        }
        return NULL;
 }
 
+static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
+                                         struct io_cancel_data *cd)
+       __must_hold(&ctx->completion_lock)
+{
+       struct io_kiocb *req;
+       int i;
+
+       for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+               struct hlist_head *list;
+
+               list = &ctx->cancel_hash[i];
+               hlist_for_each_entry(req, list, hash_node) {
+                       if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+                           req->file != cd->file)
+                               continue;
+                       if (cd->seq == req->work.cancel_seq)
+                               continue;
+                       req->work.cancel_seq = cd->seq;
+                       return req;
+               }
+       }
+       return NULL;
+}
+
 static bool io_poll_disarm(struct io_kiocb *req)
        __must_hold(&ctx->completion_lock)
 {
@@ -6270,12 +7361,15 @@ static bool io_poll_disarm(struct io_kiocb *req)
        return true;
 }
 
-static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
-                         bool poll_only)
+static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
        __must_hold(&ctx->completion_lock)
 {
-       struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only);
+       struct io_kiocb *req;
 
+       if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
+               req = io_poll_file_find(ctx, cd);
+       else
+               req = io_poll_find(ctx, false, cd);
        if (!req)
                return -ENOENT;
        io_poll_cancel_req(req);
@@ -6302,9 +7396,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
        struct io_poll_update *upd = &req->poll_update;
        u32 flags;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        flags = READ_ONCE(sqe->len);
        if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -6334,9 +7426,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
        struct io_poll_iocb *poll = &req->poll;
        u32 flags;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
+       if (sqe->buf_index || sqe->off || sqe->addr)
                return -EINVAL;
        flags = READ_ONCE(sqe->len);
        if (flags & ~IORING_POLL_ADD_MULTI)
@@ -6366,13 +7456,14 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
 {
+       struct io_cancel_data cd = { .data = req->poll_update.old_user_data, };
        struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *preq;
        int ret2, ret = 0;
        bool locked;
 
        spin_lock(&ctx->completion_lock);
-       preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
+       preq = io_poll_find(ctx, true, &cd);
        if (!preq || !io_poll_disarm(preq)) {
                spin_unlock(&ctx->completion_lock);
                ret = preq ? -EALREADY : -ENOENT;
@@ -6388,7 +7479,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
                        preq->poll.events |= IO_POLL_UNMASK;
                }
                if (req->poll_update.update_user_data)
-                       preq->user_data = req->poll_update.new_user_data;
+                       preq->cqe.user_data = req->poll_update.new_user_data;
 
                ret2 = io_poll_add(preq, issue_flags);
                /* successfully updated, don't complete poll request */
@@ -6397,7 +7488,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
        }
 
        req_set_fail(preq);
-       preq->result = -ECANCELED;
+       preq->cqe.res = -ECANCELED;
        locked = !(issue_flags & IO_URING_F_UNLOCKED);
        io_req_task_complete(preq, &locked);
 out:
@@ -6425,14 +7516,14 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
        if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
                req_set_fail(req);
 
-       req->result = -ETIME;
+       req->cqe.res = -ETIME;
        req->io_task_work.func = io_req_task_complete;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
        return HRTIMER_NORESTART;
 }
 
 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
-                                          __u64 user_data)
+                                          struct io_cancel_data *cd)
        __must_hold(&ctx->timeout_lock)
 {
        struct io_timeout_data *io;
@@ -6440,9 +7531,16 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
        bool found = false;
 
        list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
-               found = user_data == req->user_data;
-               if (found)
-                       break;
+               if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+                   cd->data != req->cqe.user_data)
+                       continue;
+               if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+                       if (cd->seq == req->work.cancel_seq)
+                               continue;
+                       req->work.cancel_seq = cd->seq;
+               }
+               found = true;
+               break;
        }
        if (!found)
                return ERR_PTR(-ENOENT);
@@ -6454,11 +7552,14 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
        return req;
 }
 
-static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
+static int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
        __must_hold(&ctx->completion_lock)
-       __must_hold(&ctx->timeout_lock)
 {
-       struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+       struct io_kiocb *req;
+
+       spin_lock_irq(&ctx->timeout_lock);
+       req = io_timeout_extract(ctx, cd);
+       spin_unlock_irq(&ctx->timeout_lock);
 
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -6491,7 +7592,7 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
        bool found = false;
 
        list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
-               found = user_data == req->user_data;
+               found = user_data == req->cqe.user_data;
                if (found)
                        break;
        }
@@ -6511,7 +7612,8 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
                             struct timespec64 *ts, enum hrtimer_mode mode)
        __must_hold(&ctx->timeout_lock)
 {
-       struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+       struct io_cancel_data cd = { .data = user_data, };
+       struct io_kiocb *req = io_timeout_extract(ctx, &cd);
        struct io_timeout_data *data;
 
        if (IS_ERR(req))
@@ -6531,11 +7633,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
 {
        struct io_timeout_rem *tr = &req->timeout_rem;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
        if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
                return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
                return -EINVAL;
 
        tr->ltimeout = false;
@@ -6576,10 +7676,10 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
        int ret;
 
        if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
+               struct io_cancel_data cd = { .data = tr->addr, };
+
                spin_lock(&ctx->completion_lock);
-               spin_lock_irq(&ctx->timeout_lock);
-               ret = io_timeout_cancel(ctx, tr->addr);
-               spin_unlock_irq(&ctx->timeout_lock);
+               ret = io_timeout_cancel(ctx, &cd);
                spin_unlock(&ctx->completion_lock);
        } else {
                enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
@@ -6605,10 +7705,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        unsigned flags;
        u32 off = READ_ONCE(sqe->off);
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
-           sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
                return -EINVAL;
        if (off && is_timeout_link)
                return -EINVAL;
@@ -6707,30 +7804,42 @@ add:
        return 0;
 }
 
-struct io_cancel_data {
-       struct io_ring_ctx *ctx;
-       u64 user_data;
-};
-
 static bool io_cancel_cb(struct io_wq_work *work, void *data)
 {
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
        struct io_cancel_data *cd = data;
 
-       return req->ctx == cd->ctx && req->user_data == cd->user_data;
+       if (req->ctx != cd->ctx)
+               return false;
+       if (cd->flags & IORING_ASYNC_CANCEL_ANY) {
+               ;
+       } else if (cd->flags & IORING_ASYNC_CANCEL_FD) {
+               if (req->file != cd->file)
+                       return false;
+       } else {
+               if (req->cqe.user_data != cd->data)
+                       return false;
+       }
+       if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+               if (cd->seq == req->work.cancel_seq)
+                       return false;
+               req->work.cancel_seq = cd->seq;
+       }
+       return true;
 }
 
-static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
-                              struct io_ring_ctx *ctx)
+static int io_async_cancel_one(struct io_uring_task *tctx,
+                              struct io_cancel_data *cd)
 {
-       struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, };
        enum io_wq_cancel cancel_ret;
        int ret = 0;
+       bool all;
 
        if (!tctx || !tctx->io_wq)
                return -ENOENT;
 
-       cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false);
+       all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+       cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
        switch (cancel_ret) {
        case IO_WQ_CANCEL_OK:
                ret = 0;
@@ -6746,14 +7855,14 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
        return ret;
 }
 
-static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
+static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd)
 {
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
        WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
 
-       ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
+       ret = io_async_cancel_one(req->task->io_uring, cd);
        /*
         * Fall-through even for -EALREADY, as we may have poll armed
         * that need unarming.
@@ -6762,56 +7871,98 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
                return 0;
 
        spin_lock(&ctx->completion_lock);
-       ret = io_poll_cancel(ctx, sqe_addr, false);
+       ret = io_poll_cancel(ctx, cd);
        if (ret != -ENOENT)
                goto out;
-
-       spin_lock_irq(&ctx->timeout_lock);
-       ret = io_timeout_cancel(ctx, sqe_addr);
-       spin_unlock_irq(&ctx->timeout_lock);
+       if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
+               ret = io_timeout_cancel(ctx, cd);
 out:
        spin_unlock(&ctx->completion_lock);
        return ret;
 }
 
+#define CANCEL_FLAGS   (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
+                        IORING_ASYNC_CANCEL_ANY)
+
 static int io_async_cancel_prep(struct io_kiocb *req,
                                const struct io_uring_sqe *sqe)
 {
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+       if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
                return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
-           sqe->splice_fd_in)
+       if (sqe->off || sqe->len || sqe->splice_fd_in)
                return -EINVAL;
 
        req->cancel.addr = READ_ONCE(sqe->addr);
+       req->cancel.flags = READ_ONCE(sqe->cancel_flags);
+       if (req->cancel.flags & ~CANCEL_FLAGS)
+               return -EINVAL;
+       if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) {
+               if (req->cancel.flags & IORING_ASYNC_CANCEL_ANY)
+                       return -EINVAL;
+               req->cancel.fd = READ_ONCE(sqe->fd);
+       }
+
        return 0;
 }
 
-static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req,
+                            unsigned int issue_flags)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-       u64 sqe_addr = req->cancel.addr;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+       bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+       struct io_ring_ctx *ctx = cd->ctx;
        struct io_tctx_node *node;
-       int ret;
+       int ret, nr = 0;
 
-       ret = io_try_cancel_userdata(req, sqe_addr);
-       if (ret != -ENOENT)
-               goto done;
+       do {
+               ret = io_try_cancel(req, cd);
+               if (ret == -ENOENT)
+                       break;
+               if (!all)
+                       return ret;
+               nr++;
+       } while (1);
 
        /* slow path, try all io-wq's */
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        ret = -ENOENT;
        list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
                struct io_uring_task *tctx = node->task->io_uring;
 
-               ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
-               if (ret != -ENOENT)
-                       break;
+               ret = io_async_cancel_one(tctx, cd);
+               if (ret != -ENOENT) {
+                       if (!all)
+                               break;
+                       nr++;
+               }
+       }
+       io_ring_submit_unlock(ctx, issue_flags);
+       return all ? nr : ret;
+}
+
+static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_cancel_data cd = {
+               .ctx    = req->ctx,
+               .data   = req->cancel.addr,
+               .flags  = req->cancel.flags,
+               .seq    = atomic_inc_return(&req->ctx->cancel_seq),
+       };
+       int ret;
+
+       if (cd.flags & IORING_ASYNC_CANCEL_FD) {
+               if (req->flags & REQ_F_FIXED_FILE)
+                       req->file = io_file_get_fixed(req, req->cancel.fd,
+                                                       issue_flags);
+               else
+                       req->file = io_file_get_normal(req, req->cancel.fd);
+               if (!req->file) {
+                       ret = -EBADF;
+                       goto done;
+               }
+               cd.file = req->file;
        }
-       io_ring_submit_unlock(ctx, needs_lock);
+
+       ret = __io_async_cancel(&cd, req, issue_flags);
 done:
        if (ret < 0)
                req_set_fail(req);
@@ -6824,7 +7975,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
 {
        if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
                return -EINVAL;
-       if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+       if (sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -6838,7 +7989,6 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
 static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
        struct io_uring_rsrc_update2 up;
        int ret;
 
@@ -6849,10 +7999,10 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
        up.resv = 0;
        up.resv2 = 0;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
                                        &up, req->rsrc_update.nr_args);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
 
        if (ret < 0)
                req_set_fail(req);
@@ -6864,7 +8014,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        switch (req->opcode) {
        case IORING_OP_NOP:
-               return 0;
+               return io_nop_prep(req, sqe);
        case IORING_OP_READV:
        case IORING_OP_READ_FIXED:
        case IORING_OP_READ:
@@ -6938,6 +8088,18 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return io_linkat_prep(req, sqe);
        case IORING_OP_MSG_RING:
                return io_msg_ring_prep(req, sqe);
+       case IORING_OP_FSETXATTR:
+               return io_fsetxattr_prep(req, sqe);
+       case IORING_OP_SETXATTR:
+               return io_setxattr_prep(req, sqe);
+       case IORING_OP_FGETXATTR:
+               return io_fgetxattr_prep(req, sqe);
+       case IORING_OP_GETXATTR:
+               return io_getxattr_prep(req, sqe);
+       case IORING_OP_SOCKET:
+               return io_socket_prep(req, sqe);
+       case IORING_OP_URING_CMD:
+               return io_uring_cmd_prep(req, sqe);
        }
 
        printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6951,7 +8113,7 @@ static int io_req_prep_async(struct io_kiocb *req)
 
        /* assign early for deferred execution for non-fixed file */
        if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
-               req->file = io_file_get_normal(req, req->fd);
+               req->file = io_file_get_normal(req, req->cqe.fd);
        if (!def->needs_async_setup)
                return 0;
        if (WARN_ON_ONCE(req_has_async_data(req)))
@@ -6970,6 +8132,8 @@ static int io_req_prep_async(struct io_kiocb *req)
                return io_recvmsg_prep_async(req);
        case IORING_OP_CONNECT:
                return io_connect_prep_async(req);
+       case IORING_OP_URING_CMD:
+               return io_uring_cmd_prep_async(req);
        }
        printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
                    req->opcode);
@@ -6979,9 +8143,10 @@ static int io_req_prep_async(struct io_kiocb *req)
 static u32 io_get_sequence(struct io_kiocb *req)
 {
        u32 seq = req->ctx->cached_sq_head;
+       struct io_kiocb *cur;
 
        /* need original cached_sq_head, but it was increased for each req */
-       io_for_each_link(req, req)
+       io_for_each_link(cur, req)
                seq--;
        return seq;
 }
@@ -7024,7 +8189,7 @@ fail:
                goto queue;
        }
 
-       trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
+       trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode);
        de->req = req;
        de->seq = seq;
        list_add_tail(&de->list, &ctx->defer_list);
@@ -7086,6 +8251,12 @@ static void io_clean_op(struct io_kiocb *req)
                        if (req->statx.filename)
                                putname(req->statx.filename);
                        break;
+               case IORING_OP_SETXATTR:
+               case IORING_OP_FSETXATTR:
+               case IORING_OP_GETXATTR:
+               case IORING_OP_FGETXATTR:
+                       __io_xattr_finish(req);
+                       break;
                }
        }
        if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -7108,15 +8279,11 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
                return true;
 
        if (req->flags & REQ_F_FIXED_FILE)
-               req->file = io_file_get_fixed(req, req->fd, issue_flags);
+               req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
        else
-               req->file = io_file_get_normal(req, req->fd);
-       if (req->file)
-               return true;
+               req->file = io_file_get_normal(req, req->cqe.fd);
 
-       req_set_fail(req);
-       req->result = -EBADF;
-       return false;
+       return !!req->file;
 }
 
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
@@ -7246,6 +8413,24 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
        case IORING_OP_MSG_RING:
                ret = io_msg_ring(req, issue_flags);
                break;
+       case IORING_OP_FSETXATTR:
+               ret = io_fsetxattr(req, issue_flags);
+               break;
+       case IORING_OP_SETXATTR:
+               ret = io_setxattr(req, issue_flags);
+               break;
+       case IORING_OP_FGETXATTR:
+               ret = io_fgetxattr(req, issue_flags);
+               break;
+       case IORING_OP_GETXATTR:
+               ret = io_getxattr(req, issue_flags);
+               break;
+       case IORING_OP_SOCKET:
+               ret = io_socket(req, issue_flags);
+               break;
+       case IORING_OP_URING_CMD:
+               ret = io_uring_cmd(req, issue_flags);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -7279,7 +8464,6 @@ static void io_wq_submit_work(struct io_wq_work *work)
        const struct io_op_def *def = &io_op_defs[req->opcode];
        unsigned int issue_flags = IO_URING_F_UNLOCKED;
        bool needs_poll = false;
-       struct io_kiocb *timeout;
        int ret = 0, err = -ECANCELED;
 
        /* one will be dropped by ->io_free_work() after returning to io-wq */
@@ -7288,10 +8472,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
        else
                req_ref_get(req);
 
-       timeout = io_prep_linked_timeout(req);
-       if (timeout)
-               io_queue_linked_timeout(timeout);
-
+       io_arm_ltimeout(req);
 
        /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
        if (work->flags & IO_WQ_WORK_CANCEL) {
@@ -7324,6 +8505,8 @@ fail:
                 * wait for request slots on the block side.
                 */
                if (!needs_poll) {
+                       if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
+                               break;
                        cond_resched();
                        continue;
                }
@@ -7369,8 +8552,7 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
        struct file *file = NULL;
        unsigned long file_ptr;
 
-       if (issue_flags & IO_URING_F_UNLOCKED)
-               mutex_lock(&ctx->uring_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
        if (unlikely((unsigned int)fd >= ctx->nr_user_files))
                goto out;
@@ -7381,9 +8563,9 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
        /* mask in overlapping REQ_F and FFS bits */
        req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
        io_req_set_rsrc_node(req, ctx, 0);
+       WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
 out:
-       if (issue_flags & IO_URING_F_UNLOCKED)
-               mutex_unlock(&ctx->uring_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return file;
 }
 
@@ -7404,7 +8586,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
 {
        struct file *file = fget(fd);
 
-       trace_io_uring_file_get(req->ctx, req, req->user_data, fd);
+       trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd);
 
        /* we don't allow fixed io_uring files */
        if (file && file->f_op == &io_uring_fops)
@@ -7418,8 +8600,14 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
        int ret = -ENOENT;
 
        if (prev) {
-               if (!(req->task->flags & PF_EXITING))
-                       ret = io_try_cancel_userdata(req, prev->user_data);
+               if (!(req->task->flags & PF_EXITING)) {
+                       struct io_cancel_data cd = {
+                               .ctx            = req->ctx,
+                               .data           = prev->cqe.user_data,
+                       };
+
+                       ret = io_try_cancel(req, &cd);
+               }
                io_req_complete_post(req, ret ?: -ETIME, 0);
                io_put_req(prev);
        } else {
@@ -7453,7 +8641,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
        spin_unlock_irqrestore(&ctx->timeout_lock, flags);
 
        req->io_task_work.func = io_req_task_link_timeout;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
        return HRTIMER_NORESTART;
 }
 
@@ -7479,10 +8667,17 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
        io_put_req(req);
 }
 
-static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
+static void io_queue_async(struct io_kiocb *req, int ret)
        __must_hold(&req->ctx->uring_lock)
 {
-       struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
+       struct io_kiocb *linked_timeout;
+
+       if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
+               io_req_complete_failed(req, ret);
+               return;
+       }
+
+       linked_timeout = io_prep_linked_timeout(req);
 
        switch (io_arm_poll_handler(req, 0)) {
        case IO_APOLL_READY:
@@ -7493,7 +8688,7 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
                 * Queued up for async execution, worker will release
                 * submit reference when the iocb is actually submitted.
                 */
-               io_queue_async_work(req, NULL);
+               io_queue_iowq(req, NULL);
                break;
        case IO_APOLL_OK:
                break;
@@ -7503,10 +8698,9 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
                io_queue_linked_timeout(linked_timeout);
 }
 
-static inline void __io_queue_sqe(struct io_kiocb *req)
+static inline void io_queue_sqe(struct io_kiocb *req)
        __must_hold(&req->ctx->uring_lock)
 {
-       struct io_kiocb *linked_timeout;
        int ret;
 
        ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
@@ -7519,22 +8713,23 @@ static inline void __io_queue_sqe(struct io_kiocb *req)
         * We async punt it if the file wasn't marked NOWAIT, or if the file
         * doesn't support non-blocking read/write attempts
         */
-       if (likely(!ret)) {
-               linked_timeout = io_prep_linked_timeout(req);
-               if (linked_timeout)
-                       io_queue_linked_timeout(linked_timeout);
-       } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
-               io_queue_sqe_arm_apoll(req);
-       } else {
-               io_req_complete_failed(req, ret);
-       }
+       if (likely(!ret))
+               io_arm_ltimeout(req);
+       else
+               io_queue_async(req, ret);
 }
 
 static void io_queue_sqe_fallback(struct io_kiocb *req)
        __must_hold(&req->ctx->uring_lock)
 {
-       if (req->flags & REQ_F_FAIL) {
-               io_req_complete_fail_submit(req);
+       if (unlikely(req->flags & REQ_F_FAIL)) {
+               /*
+                * We don't submit, fail them all, for that replace hardlinks
+                * with normal links. Extra REQ_F_LINK is tolerated.
+                */
+               req->flags &= ~REQ_F_HARDLINK;
+               req->flags |= REQ_F_LINK;
+               io_req_complete_failed(req, req->cqe.res);
        } else if (unlikely(req->ctx->drain_active)) {
                io_drain_req(req);
        } else {
@@ -7543,19 +8738,10 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
                if (unlikely(ret))
                        io_req_complete_failed(req, ret);
                else
-                       io_queue_async_work(req, NULL);
+                       io_queue_iowq(req, NULL);
        }
 }
 
-static inline void io_queue_sqe(struct io_kiocb *req)
-       __must_hold(&req->ctx->uring_lock)
-{
-       if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))))
-               __io_queue_sqe(req);
-       else
-               io_queue_sqe_fallback(req);
-}
-
 /*
  * Check SQE restrictions (opcode and flags).
  *
@@ -7610,9 +8796,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        req->opcode = opcode = READ_ONCE(sqe->opcode);
        /* same numerical values with corresponding REQ_F_*, safe to copy */
        req->flags = sqe_flags = READ_ONCE(sqe->flags);
-       req->user_data = READ_ONCE(sqe->user_data);
+       req->cqe.user_data = READ_ONCE(sqe->user_data);
        req->file = NULL;
-       req->fixed_rsrc_refs = NULL;
+       req->rsrc_node = NULL;
        req->task = current;
 
        if (unlikely(opcode >= IORING_OP_LAST)) {
@@ -7623,9 +8809,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                /* enforce forwards compatibility on users */
                if (sqe_flags & ~SQE_VALID_FLAGS)
                        return -EINVAL;
-               if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
-                   !io_op_defs[opcode].buffer_select)
-                       return -EOPNOTSUPP;
+               if (sqe_flags & IOSQE_BUFFER_SELECT) {
+                       if (!io_op_defs[opcode].buffer_select)
+                               return -EOPNOTSUPP;
+                       req->buf_index = READ_ONCE(sqe->buf_group);
+               }
                if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
                        ctx->drain_disabled = true;
                if (sqe_flags & IOSQE_IO_DRAIN) {
@@ -7648,10 +8836,15 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                }
        }
 
+       if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+               return -EINVAL;
+       if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+
        if (io_op_defs[opcode].needs_file) {
                struct io_submit_state *state = &ctx->submit_state;
 
-               req->fd = READ_ONCE(sqe->fd);
+               req->cqe.fd = READ_ONCE(sqe->fd);
 
                /*
                 * Plug now if we have more than 2 IO left after this, and the
@@ -7683,7 +8876,44 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        return io_req_prep(req, sqe);
 }
 
-static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
+                                     struct io_kiocb *req, int ret)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_submit_link *link = &ctx->submit_state.link;
+       struct io_kiocb *head = link->head;
+
+       trace_io_uring_req_failed(sqe, ctx, req, ret);
+
+       /*
+        * Avoid breaking links in the middle as it renders links with SQPOLL
+        * unusable. Instead of failing eagerly, continue assembling the link if
+        * applicable and mark the head with REQ_F_FAIL. The link flushing code
+        * should find the flag and handle the rest.
+        */
+       req_fail_link_node(req, ret);
+       if (head && !(head->flags & REQ_F_FAIL))
+               req_fail_link_node(head, -ECANCELED);
+
+       if (!(req->flags & IO_REQ_LINK_FLAGS)) {
+               if (head) {
+                       link->last->link = req;
+                       link->head = NULL;
+                       req = head;
+               }
+               io_queue_sqe_fallback(req);
+               return ret;
+       }
+
+       if (head)
+               link->last->link = req;
+       else
+               link->head = req;
+       link->last = req;
+       return 0;
+}
+
+static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
                         const struct io_uring_sqe *sqe)
        __must_hold(&ctx->uring_lock)
 {
@@ -7691,35 +8921,11 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        int ret;
 
        ret = io_init_req(ctx, req, sqe);
-       if (unlikely(ret)) {
-               trace_io_uring_req_failed(sqe, ctx, req, ret);
-
-               /* fail even hard links since we don't submit */
-               if (link->head) {
-                       /*
-                        * we can judge a link req is failed or cancelled by if
-                        * REQ_F_FAIL is set, but the head is an exception since
-                        * it may be set REQ_F_FAIL because of other req's failure
-                        * so let's leverage req->result to distinguish if a head
-                        * is set REQ_F_FAIL because of its failure or other req's
-                        * failure so that we can set the correct ret code for it.
-                        * init result here to avoid affecting the normal path.
-                        */
-                       if (!(link->head->flags & REQ_F_FAIL))
-                               req_fail_link_node(link->head, -ECANCELED);
-               } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-                       /*
-                        * the current req is a normal req, we should return
-                        * error and thus break the submittion loop.
-                        */
-                       io_req_complete_failed(req, ret);
-                       return ret;
-               }
-               req_fail_link_node(req, ret);
-       }
+       if (unlikely(ret))
+               return io_submit_fail_init(sqe, req, ret);
 
        /* don't need @sqe from now on */
-       trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
+       trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode,
                                  req->flags, true,
                                  ctx->flags & IORING_SETUP_SQPOLL);
 
@@ -7730,29 +8936,32 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
         * submitted sync once the chain is complete. If none of those
         * conditions are true (normal request), then just queue it.
         */
-       if (link->head) {
-               struct io_kiocb *head = link->head;
-
-               if (!(req->flags & REQ_F_FAIL)) {
-                       ret = io_req_prep_async(req);
-                       if (unlikely(ret)) {
-                               req_fail_link_node(req, ret);
-                               if (!(head->flags & REQ_F_FAIL))
-                                       req_fail_link_node(head, -ECANCELED);
-                       }
-               }
-               trace_io_uring_link(ctx, req, head);
+       if (unlikely(link->head)) {
+               ret = io_req_prep_async(req);
+               if (unlikely(ret))
+                       return io_submit_fail_init(sqe, req, ret);
+
+               trace_io_uring_link(ctx, req, link->head);
                link->last->link = req;
                link->last = req;
 
-               if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
+               if (req->flags & IO_REQ_LINK_FLAGS)
                        return 0;
-               /* last request of a link, enqueue the link */
+               /* last request of the link, flush it */
+               req = link->head;
                link->head = NULL;
-               req = head;
-       } else if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
-               link->head = req;
-               link->last = req;
+               if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))
+                       goto fallback;
+
+       } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS |
+                                         REQ_F_FORCE_ASYNC | REQ_F_FAIL))) {
+               if (req->flags & IO_REQ_LINK_FLAGS) {
+                       link->head = req;
+                       link->last = req;
+               } else {
+fallback:
+                       io_queue_sqe_fallback(req);
+               }
                return 0;
        }
 
@@ -7767,8 +8976,8 @@ static void io_submit_state_end(struct io_ring_ctx *ctx)
 {
        struct io_submit_state *state = &ctx->submit_state;
 
-       if (state->link.head)
-               io_queue_sqe(state->link.head);
+       if (unlikely(state->link.head))
+               io_queue_sqe_fallback(state->link.head);
        /* flush only after queuing links as they can generate completions */
        io_submit_flush_completions(ctx);
        if (state->plug_started)
@@ -7822,8 +9031,12 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
         *    though the application is the one updating it.
         */
        head = READ_ONCE(ctx->sq_array[sq_idx]);
-       if (likely(head < ctx->sq_entries))
+       if (likely(head < ctx->sq_entries)) {
+               /* double index for 128-byte SQEs, twice as long */
+               if (ctx->flags & IORING_SETUP_SQE128)
+                       head <<= 1;
                return &ctx->sq_sqes[head];
+       }
 
        /* drop invalid entries */
        ctx->cq_extra--;
@@ -7836,54 +9049,52 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
        __must_hold(&ctx->uring_lock)
 {
        unsigned int entries = io_sqring_entries(ctx);
-       int submitted = 0;
+       unsigned int left;
+       int ret;
 
        if (unlikely(!entries))
                return 0;
        /* make sure SQ entry isn't read before tail */
-       nr = min3(nr, ctx->sq_entries, entries);
-       io_get_task_refs(nr);
+       ret = left = min3(nr, ctx->sq_entries, entries);
+       io_get_task_refs(left);
+       io_submit_state_start(&ctx->submit_state, left);
 
-       io_submit_state_start(&ctx->submit_state, nr);
        do {
                const struct io_uring_sqe *sqe;
                struct io_kiocb *req;
 
-               if (unlikely(!io_alloc_req_refill(ctx))) {
-                       if (!submitted)
-                               submitted = -EAGAIN;
+               if (unlikely(!io_alloc_req_refill(ctx)))
                        break;
-               }
                req = io_alloc_req(ctx);
                sqe = io_get_sqe(ctx);
                if (unlikely(!sqe)) {
-                       wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
+                       io_req_add_to_cache(req, ctx);
                        break;
                }
-               /* will complete beyond this point, count as submitted */
-               submitted++;
-               if (io_submit_sqe(ctx, req, sqe)) {
-                       /*
-                        * Continue submitting even for sqe failure if the
-                        * ring was setup with IORING_SETUP_SUBMIT_ALL
-                        */
-                       if (!(ctx->flags & IORING_SETUP_SUBMIT_ALL))
-                               break;
-               }
-       } while (submitted < nr);
 
-       if (unlikely(submitted != nr)) {
-               int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
-               int unused = nr - ref_used;
+               /*
+                * Continue submitting even for sqe failure if the
+                * ring was setup with IORING_SETUP_SUBMIT_ALL
+                */
+               if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
+                   !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
+                       left--;
+                       break;
+               }
+       } while (--left);
 
-               current->io_uring->cached_refs += unused;
+       if (unlikely(left)) {
+               ret -= left;
+               /* try again if it submitted nothing and can't allocate a req */
+               if (!ret && io_req_cache_empty(ctx))
+                       ret = -EAGAIN;
+               current->io_uring->cached_refs += left;
        }
 
        io_submit_state_end(ctx);
         /* Commit SQ ring head once we've consumed and submitted all SQEs */
        io_commit_sqring(ctx);
-
-       return submitted;
+       return ret;
 }
 
 static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
@@ -7891,23 +9102,6 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
        return READ_ONCE(sqd->state);
 }
 
-static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
-{
-       /* Tell userspace we may need a wakeup call */
-       spin_lock(&ctx->completion_lock);
-       WRITE_ONCE(ctx->rings->sq_flags,
-                  ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
-       spin_unlock(&ctx->completion_lock);
-}
-
-static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
-{
-       spin_lock(&ctx->completion_lock);
-       WRITE_ONCE(ctx->rings->sq_flags,
-                  ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
-       spin_unlock(&ctx->completion_lock);
-}
-
 static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 {
        unsigned int to_submit;
@@ -8023,8 +9217,8 @@ static int io_sq_thread(void *data)
                        bool needs_sched = true;
 
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
-                               io_ring_set_wakeup_flag(ctx);
-
+                               atomic_or(IORING_SQ_NEED_WAKEUP,
+                                               &ctx->rings->sq_flags);
                                if ((ctx->flags & IORING_SETUP_IOPOLL) &&
                                    !wq_list_empty(&ctx->iopoll_list)) {
                                        needs_sched = false;
@@ -8035,7 +9229,7 @@ static int io_sq_thread(void *data)
                                 * Ensure the store of the wakeup flag is not
                                 * reordered with the load of the SQ tail
                                 */
-                               smp_mb();
+                               smp_mb__after_atomic();
 
                                if (io_sqring_entries(ctx)) {
                                        needs_sched = false;
@@ -8049,7 +9243,8 @@ static int io_sq_thread(void *data)
                                mutex_lock(&sqd->lock);
                        }
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-                               io_ring_clear_wakeup_flag(ctx);
+                               atomic_andnot(IORING_SQ_NEED_WAKEUP,
+                                               &ctx->rings->sq_flags);
                }
 
                finish_wait(&sqd->wait, &wait);
@@ -8059,7 +9254,7 @@ static int io_sq_thread(void *data)
        io_uring_cancel_generic(true, sqd);
        sqd->thread = NULL;
        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-               io_ring_set_wakeup_flag(ctx);
+               atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
        io_run_task_work();
        mutex_unlock(&sqd->lock);
 
@@ -8099,7 +9294,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
         * Cannot safely flush overflowed CQEs from here, ensure we wake up
         * the task, and the next invocation will do it.
         */
-       if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow))
+       if (io_should_wake(iowq) ||
+           test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &iowq->ctx->check_cq))
                return autoremove_wake_function(curr, mode, wake_flags, key);
        return -1;
 }
@@ -8121,15 +9317,18 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
                                          ktime_t timeout)
 {
        int ret;
+       unsigned long check_cq;
 
        /* make sure we run task_work before checking for signals */
        ret = io_run_task_work_sig();
        if (ret || io_should_wake(iowq))
                return ret;
+       check_cq = READ_ONCE(ctx->check_cq);
        /* let the caller flush overflows, retry */
-       if (test_bit(0, &ctx->check_cq_overflow))
+       if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
                return 1;
-
+       if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)))
+               return -EBADR;
        if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
                return -ETIME;
        return 1;
@@ -8194,10 +9393,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
                                                TASK_INTERRUPTIBLE);
                ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
-               finish_wait(&ctx->cq_wait, &iowq.wq);
                cond_resched();
        } while (ret > 0);
 
+       finish_wait(&ctx->cq_wait, &iowq.wq);
        restore_saved_sigmask_unless(ret == -EINTR);
 
        return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
@@ -8435,17 +9634,57 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
 {
        table->files = kvcalloc(nr_files, sizeof(table->files[0]),
                                GFP_KERNEL_ACCOUNT);
-       return !!table->files;
+       if (unlikely(!table->files))
+               return false;
+
+       table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
+       if (unlikely(!table->bitmap)) {
+               kvfree(table->files);
+               return false;
+       }
+
+       return true;
 }
 
 static void io_free_file_tables(struct io_file_table *table)
 {
        kvfree(table->files);
+       bitmap_free(table->bitmap);
        table->files = NULL;
+       table->bitmap = NULL;
+}
+
+static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
+{
+       WARN_ON_ONCE(test_bit(bit, table->bitmap));
+       __set_bit(bit, table->bitmap);
+       if (bit == table->alloc_hint)
+               table->alloc_hint++;
+}
+
+static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
+{
+       __clear_bit(bit, table->bitmap);
+       table->alloc_hint = bit;
 }
 
 static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
+#if !defined(IO_URING_SCM_ALL)
+       int i;
+
+       for (i = 0; i < ctx->nr_user_files; i++) {
+               struct file *file = io_file_from_index(ctx, i);
+
+               if (!file)
+                       continue;
+               if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
+                       continue;
+               io_file_bitmap_clear(&ctx->file_table, i);
+               fput(file);
+       }
+#endif
+
 #if defined(CONFIG_UNIX)
        if (ctx->ring_sock) {
                struct sock *sock = ctx->ring_sock->sk;
@@ -8454,16 +9693,6 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
                while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
                        kfree_skb(skb);
        }
-#else
-       int i;
-
-       for (i = 0; i < ctx->nr_user_files; i++) {
-               struct file *file;
-
-               file = io_file_from_index(ctx, i);
-               if (file)
-                       fput(file);
-       }
 #endif
        io_free_file_tables(&ctx->file_table);
        io_rsrc_data_free(ctx->file_data);
@@ -8608,107 +9837,66 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
        return sqd;
 }
 
-#if defined(CONFIG_UNIX)
 /*
  * Ensure the UNIX gc is aware of our file set, so we are certain that
  * the io_uring can be safely unregistered on process exit, even if we have
- * loops in the file referencing.
+ * loops in the file referencing. We account only files that can hold other
+ * files because otherwise they can't form a loop and so are not interesting
+ * for GC.
  */
-static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
+static int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
 {
+#if defined(CONFIG_UNIX)
        struct sock *sk = ctx->ring_sock->sk;
+       struct sk_buff_head *head = &sk->sk_receive_queue;
        struct scm_fp_list *fpl;
        struct sk_buff *skb;
-       int i, nr_files;
-
-       fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
-       if (!fpl)
-               return -ENOMEM;
-
-       skb = alloc_skb(0, GFP_KERNEL);
-       if (!skb) {
-               kfree(fpl);
-               return -ENOMEM;
-       }
-
-       skb->sk = sk;
-
-       nr_files = 0;
-       fpl->user = get_uid(current_user());
-       for (i = 0; i < nr; i++) {
-               struct file *file = io_file_from_index(ctx, i + offset);
-
-               if (!file)
-                       continue;
-               fpl->fp[nr_files] = get_file(file);
-               unix_inflight(fpl->user, fpl->fp[nr_files]);
-               nr_files++;
-       }
-
-       if (nr_files) {
-               fpl->max = SCM_MAX_FD;
-               fpl->count = nr_files;
-               UNIXCB(skb).fp = fpl;
-               skb->destructor = unix_destruct_scm;
-               refcount_add(skb->truesize, &sk->sk_wmem_alloc);
-               skb_queue_head(&sk->sk_receive_queue, skb);
-
-               for (i = 0; i < nr; i++) {
-                       struct file *file = io_file_from_index(ctx, i + offset);
-
-                       if (file)
-                               fput(file);
-               }
-       } else {
-               kfree_skb(skb);
-               free_uid(fpl->user);
-               kfree(fpl);
-       }
-
-       return 0;
-}
 
-/*
- * If UNIX sockets are enabled, fd passing can cause a reference cycle which
- * causes regular reference counting to break down. We rely on the UNIX
- * garbage collection to take care of this problem for us.
- */
-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
-{
-       unsigned left, total;
-       int ret = 0;
+       if (likely(!io_file_need_scm(file)))
+               return 0;
 
-       total = 0;
-       left = ctx->nr_user_files;
-       while (left) {
-               unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
+       /*
+        * See if we can merge this file into an existing skb SCM_RIGHTS
+        * file set. If there's no room, fall back to allocating a new skb
+        * and filling it in.
+        */
+       spin_lock_irq(&head->lock);
+       skb = skb_peek(head);
+       if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
+               __skb_unlink(skb, head);
+       else
+               skb = NULL;
+       spin_unlock_irq(&head->lock);
 
-               ret = __io_sqe_files_scm(ctx, this_files, total);
-               if (ret)
-                       break;
-               left -= this_files;
-               total += this_files;
-       }
+       if (!skb) {
+               fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+               if (!fpl)
+                       return -ENOMEM;
 
-       if (!ret)
-               return 0;
+               skb = alloc_skb(0, GFP_KERNEL);
+               if (!skb) {
+                       kfree(fpl);
+                       return -ENOMEM;
+               }
 
-       while (total < ctx->nr_user_files) {
-               struct file *file = io_file_from_index(ctx, total);
+               fpl->user = get_uid(current_user());
+               fpl->max = SCM_MAX_FD;
+               fpl->count = 0;
 
-               if (file)
-                       fput(file);
-               total++;
+               UNIXCB(skb).fp = fpl;
+               skb->sk = sk;
+               skb->destructor = unix_destruct_scm;
+               refcount_add(skb->truesize, &sk->sk_wmem_alloc);
        }
 
-       return ret;
-}
-#else
-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
-{
+       fpl = UNIXCB(skb).fp;
+       fpl->fp[fpl->count++] = get_file(file);
+       unix_inflight(fpl->user, file);
+       skb_queue_head(head, skb);
+       fput(file);
+#endif
        return 0;
 }
-#endif
 
 static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
 {
@@ -8719,6 +9907,11 @@ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
        struct sk_buff *skb;
        int i;
 
+       if (!io_file_need_scm(file)) {
+               fput(file);
+               return;
+       }
+
        __skb_queue_head_init(&list);
 
        /*
@@ -8783,15 +9976,17 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
                list_del(&prsrc->list);
 
                if (prsrc->tag) {
-                       bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
+                       if (ctx->flags & IORING_SETUP_IOPOLL)
+                               mutex_lock(&ctx->uring_lock);
 
-                       io_ring_submit_lock(ctx, lock_ring);
                        spin_lock(&ctx->completion_lock);
                        io_fill_cqe_aux(ctx, prsrc->tag, 0, 0);
                        io_commit_cqring(ctx);
                        spin_unlock(&ctx->completion_lock);
                        io_cqring_ev_posted(ctx);
-                       io_ring_submit_unlock(ctx, lock_ring);
+
+                       if (ctx->flags & IORING_SETUP_IOPOLL)
+                               mutex_unlock(&ctx->uring_lock);
                }
 
                rsrc_data->do_put(ctx, prsrc);
@@ -8845,27 +10040,31 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
        if (ret)
                return ret;
 
-       ret = -ENOMEM;
-       if (!io_alloc_file_tables(&ctx->file_table, nr_args))
-               goto out_free;
+       if (!io_alloc_file_tables(&ctx->file_table, nr_args)) {
+               io_rsrc_data_free(ctx->file_data);
+               ctx->file_data = NULL;
+               return -ENOMEM;
+       }
 
        for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
-               if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+               struct io_fixed_file *file_slot;
+
+               if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) {
                        ret = -EFAULT;
-                       goto out_fput;
+                       goto fail;
                }
                /* allow sparse sets */
-               if (fd == -1) {
+               if (!fds || fd == -1) {
                        ret = -EINVAL;
                        if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
-                               goto out_fput;
+                               goto fail;
                        continue;
                }
 
                file = fget(fd);
                ret = -EBADF;
                if (unlikely(!file))
-                       goto out_fput;
+                       goto fail;
 
                /*
                 * Don't allow io_uring instances to be registered. If UNIX
@@ -8876,74 +10075,23 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
                 */
                if (file->f_op == &io_uring_fops) {
                        fput(file);
-                       goto out_fput;
+                       goto fail;
                }
-               io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
-       }
-
-       ret = io_sqe_files_scm(ctx);
-       if (ret) {
-               __io_sqe_files_unregister(ctx);
-               return ret;
-       }
-
-       io_rsrc_node_switch(ctx, NULL);
-       return ret;
-out_fput:
-       for (i = 0; i < ctx->nr_user_files; i++) {
-               file = io_file_from_index(ctx, i);
-               if (file)
+               ret = io_scm_file_account(ctx, file);
+               if (ret) {
                        fput(file);
-       }
-       io_free_file_tables(&ctx->file_table);
-       ctx->nr_user_files = 0;
-out_free:
-       io_rsrc_data_free(ctx->file_data);
-       ctx->file_data = NULL;
-       return ret;
-}
-
-static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
-                               int index)
-{
-#if defined(CONFIG_UNIX)
-       struct sock *sock = ctx->ring_sock->sk;
-       struct sk_buff_head *head = &sock->sk_receive_queue;
-       struct sk_buff *skb;
-
-       /*
-        * See if we can merge this file into an existing skb SCM_RIGHTS
-        * file set. If there's no room, fall back to allocating a new skb
-        * and filling it in.
-        */
-       spin_lock_irq(&head->lock);
-       skb = skb_peek(head);
-       if (skb) {
-               struct scm_fp_list *fpl = UNIXCB(skb).fp;
-
-               if (fpl->count < SCM_MAX_FD) {
-                       __skb_unlink(skb, head);
-                       spin_unlock_irq(&head->lock);
-                       fpl->fp[fpl->count] = get_file(file);
-                       unix_inflight(fpl->user, fpl->fp[fpl->count]);
-                       fpl->count++;
-                       spin_lock_irq(&head->lock);
-                       __skb_queue_head(head, skb);
-               } else {
-                       skb = NULL;
+                       goto fail;
                }
-       }
-       spin_unlock_irq(&head->lock);
-
-       if (skb) {
-               fput(file);
-               return 0;
+               file_slot = io_fixed_file_slot(&ctx->file_table, i);
+               io_fixed_file_set(file_slot, file);
+               io_file_bitmap_set(&ctx->file_table, i);
        }
 
-       return __io_sqe_files_scm(ctx, 1, index);
-#else
+       io_rsrc_node_switch(ctx, NULL);
        return 0;
-#endif
+fail:
+       __io_sqe_files_unregister(ctx);
+       return ret;
 }
 
 static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
@@ -8967,12 +10115,11 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                                 unsigned int issue_flags, u32 slot_index)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
        bool needs_switch = false;
        struct io_fixed_file *file_slot;
        int ret = -EBADF;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        if (file->f_op == &io_uring_fops)
                goto err;
        ret = -ENXIO;
@@ -8998,22 +10145,20 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                if (ret)
                        goto err;
                file_slot->file_ptr = 0;
+               io_file_bitmap_clear(&ctx->file_table, slot_index);
                needs_switch = true;
        }
 
-       *io_get_tag_slot(ctx->file_data, slot_index) = 0;
-       io_fixed_file_set(file_slot, file);
-       ret = io_sqe_file_register(ctx, file, slot_index);
-       if (ret) {
-               file_slot->file_ptr = 0;
-               goto err;
+       ret = io_scm_file_account(ctx, file);
+       if (!ret) {
+               *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+               io_fixed_file_set(file_slot, file);
+               io_file_bitmap_set(&ctx->file_table, slot_index);
        }
-
-       ret = 0;
 err:
        if (needs_switch)
                io_rsrc_node_switch(ctx, ctx->file_data);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        if (ret)
                fput(file);
        return ret;
@@ -9023,12 +10168,11 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
 {
        unsigned int offset = req->close.file_slot - 1;
        struct io_ring_ctx *ctx = req->ctx;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
        struct io_fixed_file *file_slot;
        struct file *file;
        int ret;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        ret = -ENXIO;
        if (unlikely(!ctx->file_data))
                goto out;
@@ -9051,10 +10195,11 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
                goto out;
 
        file_slot->file_ptr = 0;
+       io_file_bitmap_clear(&ctx->file_table, offset);
        io_rsrc_node_switch(ctx, ctx->file_data);
        ret = 0;
 out:
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return ret;
 }
 
@@ -9100,6 +10245,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
                        if (err)
                                break;
                        file_slot->file_ptr = 0;
+                       io_file_bitmap_clear(&ctx->file_table, i);
                        needs_switch = true;
                }
                if (fd != -1) {
@@ -9121,14 +10267,14 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
                                err = -EBADF;
                                break;
                        }
-                       *io_get_tag_slot(data, i) = tag;
-                       io_fixed_file_set(file_slot, file);
-                       err = io_sqe_file_register(ctx, file, i);
+                       err = io_scm_file_account(ctx, file);
                        if (err) {
-                               file_slot->file_ptr = 0;
                                fput(file);
                                break;
                        }
+                       *io_get_tag_slot(data, i) = tag;
+                       io_fixed_file_set(file_slot, file);
+                       io_file_bitmap_set(&ctx->file_table, i);
                }
        }
 
@@ -9208,7 +10354,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
        task->io_uring = tctx;
        spin_lock_init(&tctx->task_lock);
        INIT_WQ_LIST(&tctx->task_list);
-       INIT_WQ_LIST(&tctx->prior_task_list);
+       INIT_WQ_LIST(&tctx->prio_task_list);
        init_task_work(&tctx->task_work, tctx_task_work);
        return 0;
 }
@@ -9386,8 +10532,8 @@ static void *io_mem_alloc(size_t size)
        return (void *) __get_free_pages(gfp, get_order(size));
 }
 
-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
-                               size_t *sq_offset)
+static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
+                               unsigned int cq_entries, size_t *sq_offset)
 {
        struct io_rings *rings;
        size_t off, sq_array_size;
@@ -9395,6 +10541,10 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
        off = struct_size(rings, cqes, cq_entries);
        if (off == SIZE_MAX)
                return SIZE_MAX;
+       if (ctx->flags & IORING_SETUP_CQE32) {
+               if (check_shl_overflow(off, 1, &off))
+                       return SIZE_MAX;
+       }
 
 #ifdef CONFIG_SMP
        off = ALIGN(off, SMP_CACHE_BYTES);
@@ -9556,30 +10706,18 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
        return ret;
 }
 
-static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
-                                 struct io_mapped_ubuf **pimu,
-                                 struct page **last_hpage)
+static struct page **io_pin_pages(unsigned long ubuf, unsigned long len,
+                                 int *npages)
 {
-       struct io_mapped_ubuf *imu = NULL;
+       unsigned long start, end, nr_pages;
        struct vm_area_struct **vmas = NULL;
        struct page **pages = NULL;
-       unsigned long off, start, end, ubuf;
-       size_t size;
-       int ret, pret, nr_pages, i;
-
-       if (!iov->iov_base) {
-               *pimu = ctx->dummy_ubuf;
-               return 0;
-       }
+       int i, pret, ret = -ENOMEM;
 
-       ubuf = (unsigned long) iov->iov_base;
-       end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        start = ubuf >> PAGE_SHIFT;
        nr_pages = end - start;
 
-       *pimu = NULL;
-       ret = -ENOMEM;
-
        pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
        if (!pages)
                goto done;
@@ -9589,10 +10727,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
        if (!vmas)
                goto done;
 
-       imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
-       if (!imu)
-               goto done;
-
        ret = 0;
        mmap_read_lock(current->mm);
        pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
@@ -9610,6 +10744,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                                break;
                        }
                }
+               *npages = nr_pages;
        } else {
                ret = pret < 0 ? pret : -EFAULT;
        }
@@ -9623,14 +10758,53 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                        unpin_user_pages(pages, pret);
                goto done;
        }
+       ret = 0;
+done:
+       kvfree(vmas);
+       if (ret < 0) {
+               kvfree(pages);
+               pages = ERR_PTR(ret);
+       }
+       return pages;
+}
+
+static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
+                                 struct io_mapped_ubuf **pimu,
+                                 struct page **last_hpage)
+{
+       struct io_mapped_ubuf *imu = NULL;
+       struct page **pages = NULL;
+       unsigned long off;
+       size_t size;
+       int ret, nr_pages, i;
+
+       if (!iov->iov_base) {
+               *pimu = ctx->dummy_ubuf;
+               return 0;
+       }
+
+       *pimu = NULL;
+       ret = -ENOMEM;
+
+       pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
+                               &nr_pages);
+       if (IS_ERR(pages)) {
+               ret = PTR_ERR(pages);
+               pages = NULL;
+               goto done;
+       }
+
+       imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
+       if (!imu)
+               goto done;
 
-       ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
+       ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
        if (ret) {
-               unpin_user_pages(pages, pret);
+               unpin_user_pages(pages, nr_pages);
                goto done;
        }
 
-       off = ubuf & ~PAGE_MASK;
+       off = (unsigned long) iov->iov_base & ~PAGE_MASK;
        size = iov->iov_len;
        for (i = 0; i < nr_pages; i++) {
                size_t vec_len;
@@ -9643,8 +10817,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                size -= vec_len;
        }
        /* store original address for later verification */
-       imu->ubuf = ubuf;
-       imu->ubuf_end = ubuf + iov->iov_len;
+       imu->ubuf = (unsigned long) iov->iov_base;
+       imu->ubuf_end = imu->ubuf + iov->iov_len;
        imu->nr_bvecs = nr_pages;
        *pimu = imu;
        ret = 0;
@@ -9652,7 +10826,6 @@ done:
        if (ret)
                kvfree(imu);
        kvfree(pages);
-       kvfree(vmas);
        return ret;
 }
 
@@ -9711,12 +10884,17 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
        }
 
        for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
-               ret = io_copy_iov(ctx, &iov, arg, i);
-               if (ret)
-                       break;
-               ret = io_buffer_validate(&iov);
-               if (ret)
-                       break;
+               if (arg) {
+                       ret = io_copy_iov(ctx, &iov, arg, i);
+                       if (ret)
+                               break;
+                       ret = io_buffer_validate(&iov);
+                       if (ret)
+                               break;
+               } else {
+                       memset(&iov, 0, sizeof(iov));
+               }
+
                if (!iov.iov_base && *io_get_tag_slot(data, i)) {
                        ret = -EINVAL;
                        break;
@@ -9855,19 +11033,19 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
+       struct io_buffer_list *bl;
+       unsigned long index;
        int i;
 
-       for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
-               struct list_head *list = &ctx->io_buffers[i];
-
-               while (!list_empty(list)) {
-                       struct io_buffer_list *bl;
+       for (i = 0; i < BGID_ARRAY; i++) {
+               if (!ctx->io_bl)
+                       break;
+               __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
+       }
 
-                       bl = list_first_entry(list, struct io_buffer_list, list);
-                       __io_remove_buffers(ctx, bl, -1U);
-                       list_del(&bl->list);
-                       kfree(bl);
-               }
+       xa_for_each(&ctx->io_bl_xa, index, bl) {
+               xa_erase(&ctx->io_bl_xa, bl->bgid);
+               __io_remove_buffers(ctx, bl, -1U);
        }
 
        while (!list_empty(&ctx->io_buffers_pages)) {
@@ -9887,7 +11065,7 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
        mutex_lock(&ctx->uring_lock);
        io_flush_cached_locked_reqs(ctx, state);
 
-       while (state->free_list.next) {
+       while (!io_req_cache_empty(ctx)) {
                struct io_wq_work_node *node;
                struct io_kiocb *req;
 
@@ -9976,7 +11154,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
                io_wq_put_hash(ctx->hash_map);
        kfree(ctx->cancel_hash);
        kfree(ctx->dummy_ubuf);
-       kfree(ctx->io_buffers);
+       kfree(ctx->io_bl);
+       xa_destroy(&ctx->io_bl_xa);
        kfree(ctx);
 }
 
@@ -10007,7 +11186,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
         * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
         * pushs them to do the flush.
         */
-       if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow))
+       if (io_cqring_events(ctx) ||
+           test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        return mask;
@@ -10139,8 +11319,7 @@ static __cold bool io_kill_timeouts(struct io_ring_ctx *ctx,
                }
        }
        spin_unlock_irq(&ctx->timeout_lock);
-       if (canceled != 0)
-               io_commit_cqring(ctx);
+       io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        if (canceled != 0)
                io_cqring_ev_posted(ctx);
@@ -10160,11 +11339,13 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
                io_unregister_personality(ctx, index);
        mutex_unlock(&ctx->uring_lock);
 
-       io_kill_timeouts(ctx, NULL, true);
-       io_poll_remove_all(ctx, NULL, true);
-
-       /* if we failed setting up the ctx, we might not have any rings */
-       io_iopoll_try_reap_events(ctx);
+       /* failed during ring init, it couldn't have issued any requests */
+       if (ctx->rings) {
+               io_kill_timeouts(ctx, NULL, true);
+               io_poll_remove_all(ctx, NULL, true);
+               /* if we failed setting up the ctx, we might not have any rings */
+               io_iopoll_try_reap_events(ctx);
+       }
 
        INIT_WORK(&ctx->exit_work, io_ring_exit_work);
        /*
@@ -10256,6 +11437,10 @@ static __cold void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
        struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
        struct io_uring_task *tctx = task ? task->io_uring : NULL;
 
+       /* failed during ring init, it couldn't have issued any requests */
+       if (!ctx->rings)
+               return;
+
        while (1) {
                enum io_wq_cancel cret;
                bool ret = false;
@@ -10701,6 +11886,19 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
        return 0;
 }
 
+static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz)
+{
+       if (flags & IORING_ENTER_EXT_ARG) {
+               struct io_uring_getevents_arg arg;
+
+               if (argsz != sizeof(arg))
+                       return -EINVAL;
+               if (copy_from_user(&arg, argp, sizeof(arg)))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
 static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
                          struct __kernel_timespec __user **ts,
                          const sigset_t __user **sig)
@@ -10738,7 +11936,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                size_t, argsz)
 {
        struct io_ring_ctx *ctx;
-       int submitted = 0;
        struct fd f;
        long ret;
 
@@ -10801,39 +11998,64 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                        if (ret)
                                goto out;
                }
-               submitted = to_submit;
+               ret = to_submit;
        } else if (to_submit) {
                ret = io_uring_add_tctx_node(ctx);
                if (unlikely(ret))
                        goto out;
-               mutex_lock(&ctx->uring_lock);
-               submitted = io_submit_sqes(ctx, to_submit);
-               mutex_unlock(&ctx->uring_lock);
 
-               if (submitted != to_submit)
+               mutex_lock(&ctx->uring_lock);
+               ret = io_submit_sqes(ctx, to_submit);
+               if (ret != to_submit) {
+                       mutex_unlock(&ctx->uring_lock);
                        goto out;
+               }
+               if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
+                       goto iopoll_locked;
+               mutex_unlock(&ctx->uring_lock);
        }
        if (flags & IORING_ENTER_GETEVENTS) {
-               const sigset_t __user *sig;
-               struct __kernel_timespec __user *ts;
-
-               ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
-               if (unlikely(ret))
-                       goto out;
+               int ret2;
+               if (ctx->syscall_iopoll) {
+                       /*
+                        * We disallow the app entering submit/complete with
+                        * polling, but we still need to lock the ring to
+                        * prevent racing with polled issue that got punted to
+                        * a workqueue.
+                        */
+                       mutex_lock(&ctx->uring_lock);
+iopoll_locked:
+                       ret2 = io_validate_ext_arg(flags, argp, argsz);
+                       if (likely(!ret2)) {
+                               min_complete = min(min_complete,
+                                                  ctx->cq_entries);
+                               ret2 = io_iopoll_check(ctx, min_complete);
+                       }
+                       mutex_unlock(&ctx->uring_lock);
+               } else {
+                       const sigset_t __user *sig;
+                       struct __kernel_timespec __user *ts;
+
+                       ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+                       if (likely(!ret2)) {
+                               min_complete = min(min_complete,
+                                                  ctx->cq_entries);
+                               ret2 = io_cqring_wait(ctx, min_complete, sig,
+                                                     argsz, ts);
+                       }
+               }
 
-               min_complete = min(min_complete, ctx->cq_entries);
+               if (!ret) {
+                       ret = ret2;
 
-               /*
-                * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
-                * space applications don't need to do io completion events
-                * polling again, they can rely on io_sq_thread to do polling
-                * work, which can reduce cpu usage and uring_lock contention.
-                */
-               if (ctx->flags & IORING_SETUP_IOPOLL &&
-                   !(ctx->flags & IORING_SETUP_SQPOLL)) {
-                       ret = io_iopoll_check(ctx, min_complete);
-               } else {
-                       ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
+                       /*
+                        * EBADR indicates that one or more CQE were dropped.
+                        * Once the user has been informed we can clear the bit
+                        * as they are obviously ok with those drops.
+                        */
+                       if (unlikely(ret2 == -EBADR))
+                               clear_bit(IO_CHECK_CQ_DROPPED_BIT,
+                                         &ctx->check_cq);
                }
        }
 
@@ -10842,7 +12064,7 @@ out:
 out_fput:
        if (!(flags & IORING_ENTER_REGISTERED_RING))
                fdput(f);
-       return submitted ? submitted : ret;
+       return ret;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -10889,10 +12111,15 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
        unsigned int sq_tail = READ_ONCE(r->sq.tail);
        unsigned int cq_head = READ_ONCE(r->cq.head);
        unsigned int cq_tail = READ_ONCE(r->cq.tail);
+       unsigned int cq_shift = 0;
        unsigned int sq_entries, cq_entries;
        bool has_lock;
+       bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
        unsigned int i;
 
+       if (is_cqe32)
+               cq_shift = 1;
+
        /*
         * we may get imprecise sqe and cqe info if uring is actively running
         * since we get cached_sq_head and cached_cq_tail without uring_lock
@@ -10925,11 +12152,18 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
        cq_entries = min(cq_tail - cq_head, ctx->cq_entries);
        for (i = 0; i < cq_entries; i++) {
                unsigned int entry = i + cq_head;
-               struct io_uring_cqe *cqe = &r->cqes[entry & cq_mask];
+               struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift];
 
-               seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
+               if (!is_cqe32) {
+                       seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
                           entry & cq_mask, cqe->user_data, cqe->res,
                           cqe->flags);
+               } else {
+                       seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, "
+                               "extra1:%llu, extra2:%llu\n",
+                               entry & cq_mask, cqe->user_data, cqe->res,
+                               cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]);
+               }
        }
 
        /*
@@ -11032,7 +12266,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
        ctx->sq_entries = p->sq_entries;
        ctx->cq_entries = p->cq_entries;
 
-       size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+       size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset);
        if (size == SIZE_MAX)
                return -EOVERFLOW;
 
@@ -11047,7 +12281,10 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
        rings->sq_ring_entries = p->sq_entries;
        rings->cq_ring_entries = p->cq_entries;
 
-       size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+       if (p->flags & IORING_SETUP_SQE128)
+               size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries);
+       else
+               size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
        if (size == SIZE_MAX) {
                io_mem_free(ctx->rings);
                ctx->rings = NULL;
@@ -11159,10 +12396,40 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
        ctx = io_ring_ctx_alloc(p);
        if (!ctx)
                return -ENOMEM;
+
+       /*
+        * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+        * space applications don't need to do io completion events
+        * polling again, they can rely on io_sq_thread to do polling
+        * work, which can reduce cpu usage and uring_lock contention.
+        */
+       if (ctx->flags & IORING_SETUP_IOPOLL &&
+           !(ctx->flags & IORING_SETUP_SQPOLL))
+               ctx->syscall_iopoll = 1;
+
        ctx->compat = in_compat_syscall();
        if (!capable(CAP_IPC_LOCK))
                ctx->user = get_uid(current_user());
 
+       /*
+        * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if
+        * COOP_TASKRUN is set, then IPIs are never needed by the app.
+        */
+       ret = -EINVAL;
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+               /* IPI related flags don't make sense with SQPOLL */
+               if (ctx->flags & (IORING_SETUP_COOP_TASKRUN |
+                                 IORING_SETUP_TASKRUN_FLAG))
+                       goto err;
+               ctx->notify_method = TWA_SIGNAL_NO_IPI;
+       } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) {
+               ctx->notify_method = TWA_SIGNAL_NO_IPI;
+       } else {
+               if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+                       goto err;
+               ctx->notify_method = TWA_SIGNAL;
+       }
+
        /*
         * This is just grabbed for accounting purposes. When a process exits,
         * the mm is exited and dropped before the files, hence we need to hang
@@ -11260,10 +12527,12 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
        if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
                        IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
                        IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
-                       IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
+                       IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
+                       IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
+                       IORING_SETUP_SQE128 | IORING_SETUP_CQE32))
                return -EINVAL;
 
-       return  io_uring_create(entries, &p, params);
+       return io_uring_create(entries, &p, params);
 }
 
 SYSCALL_DEFINE2(io_uring_setup, u32, entries,
@@ -11476,14 +12745,20 @@ static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
        memset(&rr, 0, sizeof(rr));
        if (copy_from_user(&rr, arg, size))
                return -EFAULT;
-       if (!rr.nr || rr.resv || rr.resv2)
+       if (!rr.nr || rr.resv2)
+               return -EINVAL;
+       if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE)
                return -EINVAL;
 
        switch (type) {
        case IORING_RSRC_FILE:
+               if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+                       break;
                return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
                                             rr.nr, u64_to_user_ptr(rr.tags));
        case IORING_RSRC_BUFFER:
+               if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+                       break;
                return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
                                               rr.nr, u64_to_user_ptr(rr.tags));
        }
@@ -11618,6 +12893,85 @@ err:
        return ret;
 }
 
+static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+       struct io_uring_buf_ring *br;
+       struct io_uring_buf_reg reg;
+       struct io_buffer_list *bl;
+       struct page **pages;
+       int nr_pages;
+
+       if (copy_from_user(&reg, arg, sizeof(reg)))
+               return -EFAULT;
+
+       if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
+               return -EINVAL;
+       if (!reg.ring_addr)
+               return -EFAULT;
+       if (reg.ring_addr & ~PAGE_MASK)
+               return -EINVAL;
+       if (!is_power_of_2(reg.ring_entries))
+               return -EINVAL;
+
+       if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
+               int ret = io_init_bl_list(ctx);
+               if (ret)
+                       return ret;
+       }
+
+       bl = io_buffer_get_list(ctx, reg.bgid);
+       if (bl) {
+               /* if mapped buffer ring OR classic exists, don't allow */
+               if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
+                       return -EEXIST;
+       } else {
+               bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+               if (!bl)
+                       return -ENOMEM;
+       }
+
+       pages = io_pin_pages(reg.ring_addr,
+                            struct_size(br, bufs, reg.ring_entries),
+                            &nr_pages);
+       if (IS_ERR(pages)) {
+               kfree(bl);
+               return PTR_ERR(pages);
+       }
+
+       br = page_address(pages[0]);
+       bl->buf_pages = pages;
+       bl->buf_nr_pages = nr_pages;
+       bl->nr_entries = reg.ring_entries;
+       bl->buf_ring = br;
+       bl->mask = reg.ring_entries - 1;
+       io_buffer_add_list(ctx, bl, reg.bgid);
+       return 0;
+}
+
+static int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+       struct io_uring_buf_reg reg;
+       struct io_buffer_list *bl;
+
+       if (copy_from_user(&reg, arg, sizeof(reg)))
+               return -EFAULT;
+       if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
+               return -EINVAL;
+
+       bl = io_buffer_get_list(ctx, reg.bgid);
+       if (!bl)
+               return -ENOENT;
+       if (!bl->buf_nr_pages)
+               return -EINVAL;
+
+       __io_remove_buffers(ctx, bl, -1U);
+       if (bl->bgid >= BGID_ARRAY) {
+               xa_erase(&ctx->io_bl_xa, bl->bgid);
+               kfree(bl);
+       }
+       return 0;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                               void __user *arg, unsigned nr_args)
        __releases(ctx->uring_lock)
@@ -11643,6 +12997,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 
        switch (opcode) {
        case IORING_REGISTER_BUFFERS:
+               ret = -EFAULT;
+               if (!arg)
+                       break;
                ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
                break;
        case IORING_UNREGISTER_BUFFERS:
@@ -11652,6 +13009,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                ret = io_sqe_buffers_unregister(ctx);
                break;
        case IORING_REGISTER_FILES:
+               ret = -EFAULT;
+               if (!arg)
+                       break;
                ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
                break;
        case IORING_UNREGISTER_FILES:
@@ -11746,6 +13106,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
        case IORING_UNREGISTER_RING_FDS:
                ret = io_ringfd_unregister(ctx, arg, nr_args);
                break;
+       case IORING_REGISTER_PBUF_RING:
+               ret = -EINVAL;
+               if (!arg || nr_args != 1)
+                       break;
+               ret = io_register_pbuf_ring(ctx, arg);
+               break;
+       case IORING_UNREGISTER_PBUF_RING:
+               ret = -EINVAL;
+               if (!arg || nr_args != 1)
+                       break;
+               ret = io_unregister_pbuf_ring(ctx, arg);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -11822,6 +13194,7 @@ static int __init io_uring_init(void)
        BUILD_BUG_SQE_ELEM(42, __u16,  personality);
        BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
        BUILD_BUG_SQE_ELEM(44, __u32,  file_index);
+       BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
 
        BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
                     sizeof(struct io_uring_rsrc_update));
@@ -11830,6 +13203,10 @@ static int __init io_uring_init(void)
 
        /* ->buf_index is u16 */
        BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+       BUILD_BUG_ON(BGID_ARRAY * sizeof(struct io_buffer_list) > PAGE_SIZE);
+       BUILD_BUG_ON(offsetof(struct io_uring_buf_ring, bufs) != 0);
+       BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) !=
+                    offsetof(struct io_uring_buf_ring, tail));
 
        /* should fit into one byte */
        BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
@@ -11839,6 +13216,10 @@ static int __init io_uring_init(void)
        BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
        BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
 
+       BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
+
+       BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
+
        req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
                                SLAB_ACCOUNT);
        return 0;
index b08f5dc31780da7ae5288c711ce09328a60f46bc..80f9b047aa1b6298523daf76638ed67ae823bc24 100644 (file)
@@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 {
        atomic_inc(&dio->ref);
 
-       if (dio->iocb->ki_flags & IOCB_HIPRI) {
+       /* Sync dio can't be polled reliably */
+       if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
                bio_set_polled(bio, dio->iocb);
                dio->submit.poll_bio = bio;
        }
@@ -265,8 +266,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
                 * cache flushes on IO completion.
                 */
                if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
-                   (dio->flags & IOMAP_DIO_WRITE_FUA) &&
-                   blk_queue_fua(bdev_get_queue(iomap->bdev)))
+                   (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
                        use_fua = true;
        }
 
@@ -654,9 +654,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                        if (!READ_ONCE(dio->submit.waiter))
                                break;
 
-                       if (!dio->submit.poll_bio ||
-                           !bio_poll(dio->submit.poll_bio, NULL, 0))
-                               blk_io_schedule();
+                       blk_io_schedule();
                }
                __set_current_state(TASK_RUNNING);
        }
index fcacafa4510d173d144a00b6bc0a5936766f969e..c0cbeeaec2d1aa33d0ff97217ae02e2205c248ba 100644 (file)
@@ -1762,7 +1762,6 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
        unsigned long block, log_offset; /* logical */
        unsigned long long phys_block, block_start, block_stop; /* physical */
        loff_t byte_start, byte_stop, byte_count;
-       struct request_queue *q = bdev_get_queue(journal->j_dev);
 
        /* flags must be set to either discard or zeroout */
        if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
@@ -1770,10 +1769,8 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
                        (flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
                return -EINVAL;
 
-       if (!q)
-               return -ENXIO;
-
-       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+           !bdev_max_discard_sectors(journal->j_dev))
                return -EOPNOTSUPP;
 
        /*
@@ -1828,7 +1825,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
                        err = blkdev_issue_discard(journal->j_dev,
                                        byte_start >> SECTOR_SHIFT,
                                        byte_count >> SECTOR_SHIFT,
-                                       GFP_NOFS, 0);
+                                       GFP_NOFS);
                } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
                        err = blkdev_issue_zeroout(journal->j_dev,
                                        byte_start >> SECTOR_SHIFT,
index 03a845ab4f009c30e6fc2a941bb2196f21a65200..1e7b177ece60579f5d66a461863fdd3f69baab5c 100644 (file)
@@ -110,14 +110,13 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                s64 ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (!blk_queue_discard(q)) {
+               if (!bdev_max_discard_sectors(sb->s_bdev)) {
                        jfs_warn("FITRIM not supported on device");
                        return -EOPNOTSUPP;
                }
@@ -127,7 +126,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        return -EFAULT;
 
                range.minlen = max_t(unsigned int, range.minlen,
-                       q->limits.discard_granularity);
+                                    bdev_discard_granularity(sb->s_bdev));
 
                ret = jfs_ioc_trim(inode, &range);
                if (ret < 0)
index f1a13a74cddf3df17f6026af202f57de9969506b..85d4f44f2ac4dfa6da236263d2ba326a78b23100 100644 (file)
@@ -372,19 +372,16 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
                }
 
                case Opt_discard:
-               {
-                       struct request_queue *q = bdev_get_queue(sb->s_bdev);
                        /* if set to 1, even copying files will cause
                         * trimming :O
                         * -> user has more control over the online trimming
                         */
                        sbi->minblks_trim = 64;
-                       if (blk_queue_discard(q))
+                       if (bdev_max_discard_sectors(sb->s_bdev))
                                *flag |= JFS_DISCARD;
                        else
                                pr_err("JFS: discard option not supported on device\n");
                        break;
-               }
 
                case Opt_nodiscard:
                        *flag &= ~JFS_DISCARD;
@@ -392,10 +389,9 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 
                case Opt_discard_minblk:
                {
-                       struct request_queue *q = bdev_get_queue(sb->s_bdev);
                        char *minblks_trim = args[0].from;
                        int rc;
-                       if (blk_queue_discard(q)) {
+                       if (bdev_max_discard_sectors(sb->s_bdev)) {
                                *flag |= JFS_DISCARD;
                                rc = kstrtouint(minblks_trim, 0,
                                                &sbi->minblks_trim);
index e2d59bb5e6bbe788953443e6fa54b6ead01c7d7d..9a16897e8dc6b2036c30148f8793604e046097ec 100644 (file)
@@ -517,7 +517,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
                if (result.negated)
                        ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
                else
-                       ctx->flags &= NFS_MOUNT_SOFTREVAL;
+                       ctx->flags |= NFS_MOUNT_SOFTREVAL;
                break;
        case Opt_posix:
                if (result.negated)
index fec194a666f4b771798876a2d6b22a16145047bc..87e1004b606d20d9726a8b58faabe5460eb4b1d7 100644 (file)
@@ -1052,20 +1052,20 @@ out:
 static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
 {
        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
-       struct request_queue *q = bdev_get_queue(nilfs->ns_bdev);
        struct fstrim_range range;
        int ret;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(nilfs->ns_bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&range, argp, sizeof(range)))
                return -EFAULT;
 
-       range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity);
+       range.minlen = max_t(u64, range.minlen,
+                            bdev_discard_granularity(nilfs->ns_bdev));
 
        down_read(&nilfs->ns_segctor_sem);
        ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range);
index e385cca2004a7bb9b8b38408989a1e07fd68e6e3..77ff8e95421fa86b3b3a0b0afa1b89936717db91 100644 (file)
@@ -1100,7 +1100,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                                ret = blkdev_issue_discard(nilfs->ns_bdev,
                                                start * sects_per_block,
                                                nblocks * sects_per_block,
-                                               GFP_NOFS, 0);
+                                               GFP_NOFS);
                                if (ret < 0) {
                                        put_bh(su_bh);
                                        goto out_sem;
@@ -1134,7 +1134,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                        ret = blkdev_issue_discard(nilfs->ns_bdev,
                                        start * sects_per_block,
                                        nblocks * sects_per_block,
-                                       GFP_NOFS, 0);
+                                       GFP_NOFS);
                        if (!ret)
                                ndiscarded += nblocks;
                }
index dd48a8f74d577c76aa9af5844029405ac9b16684..3b4a079c9617c78438e84c5a91817b329b57b9d8 100644 (file)
@@ -672,7 +672,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
                        ret = blkdev_issue_discard(nilfs->ns_bdev,
                                                   start * sects_per_block,
                                                   nblocks * sects_per_block,
-                                                  GFP_NOFS, 0);
+                                                  GFP_NOFS);
                        if (ret < 0)
                                return ret;
                        nblocks = 0;
@@ -682,7 +682,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
                ret = blkdev_issue_discard(nilfs->ns_bdev,
                                           start * sects_per_block,
                                           nblocks * sects_per_block,
-                                          GFP_NOFS, 0);
+                                          GFP_NOFS);
        return ret;
 }
 
index 9b32b76a9c303c408709dd33f739f2867cec5244..a792e21c530993a0d721921ad945b65fcabc095d 100644 (file)
@@ -1657,6 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
        else
                mnt = path.mnt;
 
+       /*
+        * FAN_RENAME is not allowed on non-dir (for now).
+        * We shouldn't have allowed setting any dirent events in mask of
+        * non-dir, but because we always allowed it, error only if group
+        * was initialized with the new flag FAN_REPORT_TARGET_FID.
+        */
+       ret = -ENOTDIR;
+       if (inode && !S_ISDIR(inode->i_mode) &&
+           ((mask & FAN_RENAME) ||
+            ((mask & FANOTIFY_DIRENT_EVENTS) &&
+             FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
+               goto path_put_and_out;
+
        /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
        if (mnt || !S_ISDIR(inode->i_mode)) {
                mask &= ~FAN_EVENT_ON_CHILD;
index 787b53b984ee17ae07aff29f2972319d437387b7..15806eeae217a048eb091d3c5cc7e4f31fef0834 100644 (file)
@@ -22,20 +22,20 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
 {
        struct fstrim_range __user *user_range;
        struct fstrim_range range;
-       struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
        int err;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(sbi->sb->s_bdev))
                return -EOPNOTSUPP;
 
        user_range = (struct fstrim_range __user *)arg;
        if (copy_from_user(&range, user_range, sizeof(range)))
                return -EFAULT;
 
-       range.minlen = max_t(u32, range.minlen, q->limits.discard_granularity);
+       range.minlen = max_t(u32, range.minlen,
+                            bdev_discard_granularity(sbi->sb->s_bdev));
 
        err = ntfs_trim_fs(sbi, &range);
        if (err < 0)
index 278dcf502410231b3af89241b9cc0eef0af382db..5781b9e8e3d85b54e44f682d2cc6b49b89f97669 100644 (file)
@@ -882,7 +882,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
        int err;
        struct ntfs_sb_info *sbi = sb->s_fs_info;
        struct block_device *bdev = sb->s_bdev;
-       struct request_queue *rq;
        struct inode *inode;
        struct ntfs_inode *ni;
        size_t i, tt;
@@ -912,15 +911,14 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
                goto out;
        }
 
-       rq = bdev_get_queue(bdev);
-       if (blk_queue_discard(rq) && rq->limits.discard_granularity) {
-               sbi->discard_granularity = rq->limits.discard_granularity;
+       if (bdev_max_discard_sectors(bdev) && bdev_discard_granularity(bdev)) {
+               sbi->discard_granularity = bdev_discard_granularity(bdev);
                sbi->discard_granularity_mask_inv =
                        ~(u64)(sbi->discard_granularity - 1);
        }
 
        /* Parse boot. */
-       err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512,
+       err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev),
                                  bdev_nr_bytes(bdev));
        if (err)
                goto out;
@@ -1335,7 +1333,7 @@ int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len)
                return 0;
 
        err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9,
-                                  GFP_NOFS, 0);
+                                  GFP_NOFS);
 
        if (err == -EOPNOTSUPP)
                sbi->flags |= NTFS_FLAGS_NODISCARD;
index f59461d85da4595dafd9119f9df09a77fefa8ead..afd54ec661030d08587764ee48c49c0359bc22c4 100644 (file)
@@ -903,20 +903,19 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (!blk_queue_discard(q))
+               if (!bdev_max_discard_sectors(sb->s_bdev))
                        return -EOPNOTSUPP;
 
                if (copy_from_user(&range, argp, sizeof(range)))
                        return -EFAULT;
 
-               range.minlen = max_t(u64, q->limits.discard_granularity,
+               range.minlen = max_t(u64, bdev_discard_granularity(sb->s_bdev),
                                     range.minlen);
                ret = ocfs2_trim_fs(sb, &range);
                if (ret < 0)
index 419760fd77bdd82b1fa1d1565bbcc8d13ae25967..f38bda5b83ec4418091fa52ea7e9758de56a6efe 100644 (file)
@@ -5,14 +5,10 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-__weak void arch_freq_prepare_all(void)
-{
-}
-
 extern const struct seq_operations cpuinfo_op;
+
 static int cpuinfo_open(struct inode *inode, struct file *file)
 {
-       arch_freq_prepare_all();
        return seq_open(file, &cpuinfo_op);
 }
 
index 172c86270b3127571e745528544583ae55ff449c..913bef0d2a36c4ed0d49dfba1b18cac612cdbe5f 100644 (file)
@@ -72,7 +72,7 @@ out:
        return 0;
 }
 
-static int seq_fdinfo_open(struct inode *inode, struct file *file)
+static int proc_fdinfo_access_allowed(struct inode *inode)
 {
        bool allowed = false;
        struct task_struct *task = get_proc_task(inode);
@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
        if (!allowed)
                return -EACCES;
 
+       return 0;
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+       int ret = proc_fdinfo_access_allowed(inode);
+
+       if (ret)
+               return ret;
+
        return single_open(file, seq_show, inode);
 }
 
@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
                                  proc_fdinfo_instantiate);
 }
 
+static int proc_open_fdinfo(struct inode *inode, struct file *file)
+{
+       int ret = proc_fdinfo_access_allowed(inode);
+
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
 const struct inode_operations proc_fdinfo_inode_operations = {
        .lookup         = proc_lookupfdinfo,
        .setattr        = proc_setattr,
 };
 
 const struct file_operations proc_fdinfo_operations = {
+       .open           = proc_open_fdinfo,
        .read           = generic_read_dir,
        .iterate_shared = proc_readfdinfo,
        .llseek         = generic_file_llseek,
index 622c844f6d118650ba573ebf78fa1fd07027af74..8879d052f96c6a0053096e61cb3d400beee78b38 100644 (file)
@@ -86,17 +86,10 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
        int error, i;
        struct bio *bio;
 
-       if (page_count <= BIO_MAX_VECS) {
-               bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO);
-       } else {
-               bio = bio_kmalloc(GFP_NOIO, page_count);
-               bio_set_dev(bio, sb->s_bdev);
-               bio->bi_opf = REQ_OP_READ;
-       }
-
+       bio = bio_kmalloc(page_count, GFP_NOIO);
        if (!bio)
                return -ENOMEM;
-
+       bio_init(bio, sb->s_bdev, bio->bi_inline_vecs, page_count, REQ_OP_READ);
        bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
 
        for (i = 0; i < page_count; ++i) {
@@ -126,7 +119,8 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 
 out_free_bio:
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        return error;
 }
 
@@ -190,7 +184,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
                        length |= data[0] << 8;
                }
                bio_free_pages(bio);
-               bio_put(bio);
+               bio_uninit(bio);
+               kfree(bio);
 
                compressed = SQUASHFS_COMPRESSED(length);
                length = SQUASHFS_COMPRESSED_SIZE(length);
@@ -224,7 +219,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 
 out_free_bio:
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 out:
        if (res < 0) {
                ERROR("Failed to read block 0x%llx: %d\n", index, res);
index f1d4a193602d673b8aaf435d9d2d1c25388221b1..60f57c7bc0a69a0cb931a957c354ed740aaba9f0 100644 (file)
@@ -1204,7 +1204,7 @@ static int set_bdev_super(struct super_block *s, void *data)
        s->s_dev = s->s_bdev->bd_dev;
        s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
 
-       if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
+       if (bdev_stable_writes(s->s_bdev))
                s->s_iflags |= SB_I_STABLE_WRITES;
        return 0;
 }
index 008fa46ef61e728c115979c7d8213de6c1ff33ba..7d6d2f152e039ccfaf08c01f673dd8624a50cf15 100644 (file)
 #define WORST_COMPR_FACTOR 2
 
 #ifdef CONFIG_FS_ENCRYPTION
-#define UBIFS_CIPHER_BLOCK_SIZE FS_CRYPTO_BLOCK_SIZE
+#define UBIFS_CIPHER_BLOCK_SIZE FSCRYPT_CONTENTS_ALIGNMENT
 #else
 #define UBIFS_CIPHER_BLOCK_SIZE 0
 #endif
index 0ed4861b038f6a3dc06fcf3d7eb26444981aa21b..b3d5f97f16cdb174b2e91ff4514caae2f96dfe26 100644 (file)
@@ -75,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
 
        if (fileident) {
                if (adinicb || (offset + lfi < 0)) {
-                       memcpy(udf_get_fi_ident(sfi), fileident, lfi);
+                       memcpy(sfi->impUse + liu, fileident, lfi);
                } else if (offset >= 0) {
                        memcpy(fibh->ebh->b_data + offset, fileident, lfi);
                } else {
-                       memcpy(udf_get_fi_ident(sfi), fileident, -offset);
+                       memcpy(sfi->impUse + liu, fileident, -offset);
                        memcpy(fibh->ebh->b_data, fileident - offset,
                                lfi + offset);
                }
@@ -88,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
        offset += lfi;
 
        if (adinicb || (offset + padlen < 0)) {
-               memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen);
+               memset(sfi->impUse + liu + lfi, 0x00, padlen);
        } else if (offset >= 0) {
                memset(fibh->ebh->b_data + offset, 0x00, padlen);
        } else {
-               memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset);
+               memset(sfi->impUse + liu + lfi, 0x00, -offset);
                memset(fibh->ebh->b_data, 0x00, padlen + offset);
        }
 
index 24d1b54de8079f8df6ff26924468685449b033e7..54598cd8014570c91786d158936beb39a45f15ad 100644 (file)
@@ -3,6 +3,7 @@
 config FS_VERITY
        bool "FS Verity (read-only file-based authenticity protection)"
        select CRYPTO
+       select CRYPTO_HASH_INFO
        # SHA-256 is implied as it's intended to be the default hash algorithm.
        # To avoid bloat, other wanted algorithms must be selected explicitly.
        # Note that CRYPTO_SHA256 denotes the generic C implementation, but
index 60a4372aa4d75f5157865bbf525bfb1a2cd4bb7e..d52872c808fffed4024397bbf3511c35d0f6c9da 100644 (file)
@@ -202,7 +202,7 @@ static int enable_verity(struct file *filp,
        const struct fsverity_operations *vops = inode->i_sb->s_vop;
        struct merkle_tree_params params = { };
        struct fsverity_descriptor *desc;
-       size_t desc_size = sizeof(*desc) + arg->sig_size;
+       size_t desc_size = struct_size(desc, signature, arg->sig_size);
        struct fsverity_info *vi;
        int err;
 
@@ -281,7 +281,7 @@ static int enable_verity(struct file *filp,
         * from disk.  This is simpler, and it serves as an extra check that the
         * metadata we're writing is valid before actually enabling verity.
         */
-       vi = fsverity_create_info(inode, desc, desc_size);
+       vi = fsverity_create_info(inode, desc);
        if (IS_ERR(vi)) {
                err = PTR_ERR(vi);
                goto rollback;
index a7920434bae50c3112ffb1c870549fe6aa0477d1..629785c95007641865a2b32165f382774ac18753 100644 (file)
@@ -14,7 +14,6 @@
 
 #define pr_fmt(fmt) "fs-verity: " fmt
 
-#include <crypto/sha2.h>
 #include <linux/fsverity.h>
 #include <linux/mempool.h>
 
@@ -26,12 +25,6 @@ struct ahash_request;
  */
 #define FS_VERITY_MAX_LEVELS           8
 
-/*
- * Largest digest size among all hash algorithms supported by fs-verity.
- * Currently assumed to be <= size of fsverity_descriptor::root_hash.
- */
-#define FS_VERITY_MAX_DIGEST_SIZE      SHA512_DIGEST_SIZE
-
 /* A hash algorithm supported by fs-verity */
 struct fsverity_hash_alg {
        struct crypto_ahash *tfm; /* hash tfm, allocated on demand */
@@ -122,16 +115,14 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
                                     const u8 *salt, size_t salt_size);
 
 struct fsverity_info *fsverity_create_info(const struct inode *inode,
-                                          struct fsverity_descriptor *desc,
-                                          size_t desc_size);
+                                          struct fsverity_descriptor *desc);
 
 void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
 
 void fsverity_free_info(struct fsverity_info *vi);
 
 int fsverity_get_descriptor(struct inode *inode,
-                           struct fsverity_descriptor **desc_ret,
-                           size_t *desc_size_ret);
+                           struct fsverity_descriptor **desc_ret);
 
 int __init fsverity_init_info_cache(void);
 void __init fsverity_exit_info_cache(void);
index f0d7b30c62db2bdf31db764a8d8966808993add2..e99c00350c28dc16ff1fa605848258945efe410b 100644 (file)
@@ -57,3 +57,46 @@ int fsverity_ioctl_measure(struct file *filp, void __user *_uarg)
        return 0;
 }
 EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);
+
+/**
+ * fsverity_get_digest() - get a verity file's digest
+ * @inode: inode to get digest of
+ * @digest: (out) pointer to the digest
+ * @alg: (out) pointer to the hash algorithm enumeration
+ *
+ * Return the file hash algorithm and digest of an fsverity protected file.
+ * Assumption: before calling fsverity_get_digest(), the file must have been
+ * opened.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_get_digest(struct inode *inode,
+                       u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
+                       enum hash_algo *alg)
+{
+       const struct fsverity_info *vi;
+       const struct fsverity_hash_alg *hash_alg;
+       int i;
+
+       vi = fsverity_get_info(inode);
+       if (!vi)
+               return -ENODATA; /* not a verity file */
+
+       hash_alg = vi->tree_params.hash_alg;
+       memset(digest, 0, FS_VERITY_MAX_DIGEST_SIZE);
+
+       /* convert the verity hash algorithm name to a hash_algo_name enum */
+       i = match_string(hash_algo_name, HASH_ALGO__LAST, hash_alg->name);
+       if (i < 0)
+               return -EINVAL;
+       *alg = i;
+
+       if (WARN_ON_ONCE(hash_alg->digest_size != hash_digest_size[*alg]))
+               return -EINVAL;
+       memcpy(digest, vi->file_digest, hash_alg->digest_size);
+
+       pr_debug("file digest %s:%*phN\n", hash_algo_name[*alg],
+                hash_digest_size[*alg], digest);
+
+       return 0;
+}
index 92df87f5fa3881abec4f18d20686cc991558ced4..81ff94442f7b48daabd0d1e6604cda422735f413 100644 (file)
@@ -147,8 +147,7 @@ static int compute_file_digest(struct fsverity_hash_alg *hash_alg,
  * fsverity_descriptor must have already undergone basic validation.
  */
 struct fsverity_info *fsverity_create_info(const struct inode *inode,
-                                          struct fsverity_descriptor *desc,
-                                          size_t desc_size)
+                                          struct fsverity_descriptor *desc)
 {
        struct fsverity_info *vi;
        int err;
@@ -264,8 +263,7 @@ static bool validate_fsverity_descriptor(struct inode *inode,
  * the filesystem, and do basic validation of it.
  */
 int fsverity_get_descriptor(struct inode *inode,
-                           struct fsverity_descriptor **desc_ret,
-                           size_t *desc_size_ret)
+                           struct fsverity_descriptor **desc_ret)
 {
        int res;
        struct fsverity_descriptor *desc;
@@ -297,7 +295,6 @@ int fsverity_get_descriptor(struct inode *inode,
        }
 
        *desc_ret = desc;
-       *desc_size_ret = res;
        return 0;
 }
 
@@ -306,17 +303,16 @@ static int ensure_verity_info(struct inode *inode)
 {
        struct fsverity_info *vi = fsverity_get_info(inode);
        struct fsverity_descriptor *desc;
-       size_t desc_size;
        int err;
 
        if (vi)
                return 0;
 
-       err = fsverity_get_descriptor(inode, &desc, &desc_size);
+       err = fsverity_get_descriptor(inode, &desc);
        if (err)
                return err;
 
-       vi = fsverity_create_info(inode, desc, desc_size);
+       vi = fsverity_create_info(inode, desc);
        if (IS_ERR(vi)) {
                err = PTR_ERR(vi);
                goto out_free_desc;
index 7e2d0c7bdf0de36290dd226252e0ad51ba250155..6ee849dc7bc183c0f55212cb5a5639fe2d134e19 100644 (file)
@@ -101,7 +101,7 @@ static int fsverity_read_descriptor(struct inode *inode,
        size_t desc_size;
        int res;
 
-       res = fsverity_get_descriptor(inode, &desc, &desc_size);
+       res = fsverity_get_descriptor(inode, &desc);
        if (res)
                return res;
 
@@ -119,10 +119,9 @@ static int fsverity_read_signature(struct inode *inode,
                                   void __user *buf, u64 offset, int length)
 {
        struct fsverity_descriptor *desc;
-       size_t desc_size;
        int res;
 
-       res = fsverity_get_descriptor(inode, &desc, &desc_size);
+       res = fsverity_get_descriptor(inode, &desc);
        if (res)
                return res;
 
index 998045165916edcd1f5d4215011dea8f8cb08d7b..e8dd03e4561e81a812e52f7730771d389d96169f 100644 (file)
@@ -25,6 +25,8 @@
 
 #include <linux/uaccess.h>
 
+#include "internal.h"
+
 static const char *
 strcmp_prefix(const char *a, const char *a_prefix)
 {
@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
 /*
  * Extended attribute SET operations
  */
-static long
-setxattr(struct user_namespace *mnt_userns, struct dentry *d,
-        const char __user *name, const void __user *value, size_t size,
-        int flags)
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
 {
        int error;
-       void *kvalue = NULL;
-       char kname[XATTR_NAME_MAX + 1];
 
-       if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
+       if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
                return -EINVAL;
 
-       error = strncpy_from_user(kname, name, sizeof(kname));
-       if (error == 0 || error == sizeof(kname))
-               error = -ERANGE;
+       error = strncpy_from_user(ctx->kname->name, name,
+                               sizeof(ctx->kname->name));
+       if (error == 0 || error == sizeof(ctx->kname->name))
+               return  -ERANGE;
        if (error < 0)
                return error;
 
-       if (size) {
-               if (size > XATTR_SIZE_MAX)
+       error = 0;
+       if (ctx->size) {
+               if (ctx->size > XATTR_SIZE_MAX)
                        return -E2BIG;
-               kvalue = kvmalloc(size, GFP_KERNEL);
-               if (!kvalue)
-                       return -ENOMEM;
-               if (copy_from_user(kvalue, value, size)) {
-                       error = -EFAULT;
-                       goto out;
+
+               ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
+               if (IS_ERR(ctx->kvalue)) {
+                       error = PTR_ERR(ctx->kvalue);
+                       ctx->kvalue = NULL;
                }
-               if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
-                   (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
-                       posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
-                                                     kvalue, size);
        }
 
-       error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
-out:
-       kvfree(kvalue);
+       return error;
+}
+
+static void setxattr_convert(struct user_namespace *mnt_userns,
+                            struct dentry *d, struct xattr_ctx *ctx)
+{
+       if (ctx->size &&
+               ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+               (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+               posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
+                                               ctx->kvalue, ctx->size);
+}
+
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct xattr_ctx *ctx)
+{
+       setxattr_convert(mnt_userns, dentry, ctx);
+       return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
+                       ctx->kvalue, ctx->size, ctx->flags);
+}
+
+static long
+setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+       const char __user *name, const void __user *value, size_t size,
+       int flags)
+{
+       struct xattr_name kname;
+       struct xattr_ctx ctx = {
+               .cvalue   = value,
+               .kvalue   = NULL,
+               .size     = size,
+               .kname    = &kname,
+               .flags    = flags,
+       };
+       int error;
 
+       error = setxattr_copy(name, &ctx);
+       if (error)
+               return error;
+
+       error = do_setxattr(mnt_userns, d, &ctx);
+
+       kvfree(ctx.kvalue);
        return error;
 }
 
@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 /*
  * Extended attribute GET operations
  */
-static ssize_t
-getxattr(struct user_namespace *mnt_userns, struct dentry *d,
-        const char __user *name, void __user *value, size_t size)
+ssize_t
+do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+       struct xattr_ctx *ctx)
 {
        ssize_t error;
-       void *kvalue = NULL;
-       char kname[XATTR_NAME_MAX + 1];
-
-       error = strncpy_from_user(kname, name, sizeof(kname));
-       if (error == 0 || error == sizeof(kname))
-               error = -ERANGE;
-       if (error < 0)
-               return error;
+       char *kname = ctx->kname->name;
 
-       if (size) {
-               if (size > XATTR_SIZE_MAX)
-                       size = XATTR_SIZE_MAX;
-               kvalue = kvzalloc(size, GFP_KERNEL);
-               if (!kvalue)
+       if (ctx->size) {
+               if (ctx->size > XATTR_SIZE_MAX)
+                       ctx->size = XATTR_SIZE_MAX;
+               ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
+               if (!ctx->kvalue)
                        return -ENOMEM;
        }
 
-       error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
+       error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
        if (error > 0) {
                if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
                    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
                        posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
-                                                   kvalue, error);
-               if (size && copy_to_user(value, kvalue, error))
+                                                       ctx->kvalue, error);
+               if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
                        error = -EFAULT;
-       } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
+       } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
                /* The file system tried to returned a value bigger
                   than XATTR_SIZE_MAX bytes. Not possible. */
                error = -E2BIG;
        }
 
-       kvfree(kvalue);
+       return error;
+}
+
+static ssize_t
+getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+        const char __user *name, void __user *value, size_t size)
+{
+       ssize_t error;
+       struct xattr_name kname;
+       struct xattr_ctx ctx = {
+               .value    = value,
+               .kvalue   = NULL,
+               .size     = size,
+               .kname    = &kname,
+               .flags    = 0,
+       };
+
+       error = strncpy_from_user(kname.name, name, sizeof(kname.name));
+       if (error == 0 || error == sizeof(kname.name))
+               error = -ERANGE;
+       if (error < 0)
+               return error;
+
+       error =  do_getxattr(mnt_userns, d, &ctx);
 
+       kvfree(ctx.kvalue);
        return error;
 }
 
index 0191de8ce9cedd822ff2ec3a2191fefb37296cd2..c6fe3f6ebb6b01be44ec4a4d73de9ee669348204 100644 (file)
@@ -114,7 +114,7 @@ xfs_trim_extents(
                }
 
                trace_xfs_discard_extent(mp, agno, fbno, flen);
-               error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
+               error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
                if (error)
                        goto out_del_cursor;
                *blocks_trimmed += flen;
@@ -152,8 +152,8 @@ xfs_ioc_trim(
        struct xfs_mount                *mp,
        struct fstrim_range __user      *urange)
 {
-       struct request_queue    *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);
-       unsigned int            granularity = q->limits.discard_granularity;
+       unsigned int            granularity =
+               bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
        struct fstrim_range     range;
        xfs_daddr_t             start, end, minlen;
        xfs_agnumber_t          start_agno, end_agno, agno;
@@ -162,7 +162,7 @@ xfs_ioc_trim(
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
                return -EOPNOTSUPP;
 
        /*
index ba57323bfdcea38d0c02cc308c43260cd89d5e29..c9f55e4f095710219677be4b1478c160fa90fb57 100644 (file)
@@ -605,7 +605,7 @@ xlog_discard_busy_extents(
                error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
                                XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
                                XFS_FSB_TO_BB(mp, busyp->length),
-                               GFP_NOFS, 0, &bio);
+                               GFP_NOFS, &bio);
                if (error && error != -EOPNOTSUPP) {
                        xfs_info(mp,
         "discard failed for extent [0x%llx,%u], error %d",
index 54be9d64093edacfaea509ff2d1e73cf61a58493..a276b8111f636b72415dcb8262579c04c8ec8bbe 100644 (file)
@@ -1608,14 +1608,10 @@ xfs_fs_fill_super(
                        goto out_filestream_unmount;
        }
 
-       if (xfs_has_discard(mp)) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-
-               if (!blk_queue_discard(q)) {
-                       xfs_warn(mp, "mounting with \"discard\" option, but "
-                                       "the device does not support discard");
-                       mp->m_features &= ~XFS_FEAT_DISCARD;
-               }
+       if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
+               xfs_warn(mp,
+       "mounting with \"discard\" option, but the device does not support discard");
+               mp->m_features &= ~XFS_FEAT_DISCARD;
        }
 
        if (xfs_has_reflink(mp)) {
index 33c1a4f1132e7d444d2a3c402d61c05605d0bd40..9fe54f5319f2226d5a149b98e2f47a5fb7591e05 100644 (file)
@@ -3,4 +3,4 @@ ccflags-y                               += -I$(src)
 
 obj-$(CONFIG_ZONEFS_FS) += zonefs.o
 
-zonefs-y       := super.o
+zonefs-y       := super.o sysfs.o
index e20e7c8414896c0fde07647c5065799a927ab03c..b3b0b71fdf6c4c373d6d627dd036c233abc592ad 100644 (file)
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+/*
+ * Manage the active zone count. Called with zi->i_truncate_mutex held.
+ */
+static void zonefs_account_active(struct inode *inode)
+{
+       struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+       lockdep_assert_held(&zi->i_truncate_mutex);
+
+       if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+               return;
+
+       /*
+        * If the zone is active, that is, if it is explicitly open or
+        * partially written, check if it was already accounted as active.
+        */
+       if ((zi->i_flags & ZONEFS_ZONE_OPEN) ||
+           (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) {
+               if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) {
+                       zi->i_flags |= ZONEFS_ZONE_ACTIVE;
+                       atomic_inc(&sbi->s_active_seq_files);
+               }
+               return;
+       }
+
+       /* The zone is not active. If it was, update the active count */
+       if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
+               zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
+               atomic_dec(&sbi->s_active_seq_files);
+       }
+}
+
 static inline int zonefs_zone_mgmt(struct inode *inode,
                                   enum req_opf op)
 {
@@ -68,8 +101,13 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
         * A full zone is no longer open/active and does not need
         * explicit closing.
         */
-       if (isize >= zi->i_max_size)
-               zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+       if (isize >= zi->i_max_size) {
+               struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+
+               if (zi->i_flags & ZONEFS_ZONE_ACTIVE)
+                       atomic_dec(&sbi->s_active_seq_files);
+               zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
+       }
 }
 
 static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -397,6 +435,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
        zonefs_update_stats(inode, data_size);
        zonefs_i_size_write(inode, data_size);
        zi->i_wpoffset = data_size;
+       zonefs_account_active(inode);
 
        return 0;
 }
@@ -508,6 +547,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
        zonefs_update_stats(inode, isize);
        truncate_setsize(inode, isize);
        zi->i_wpoffset = isize;
+       zonefs_account_active(inode);
 
 unlock:
        mutex_unlock(&zi->i_truncate_mutex);
@@ -689,13 +729,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file_inode(iocb->ki_filp);
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        struct block_device *bdev = inode->i_sb->s_bdev;
-       unsigned int max;
+       unsigned int max = bdev_max_zone_append_sectors(bdev);
        struct bio *bio;
        ssize_t size;
        int nr_pages;
        ssize_t ret;
 
-       max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
        max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
        iov_iter_truncate(from, max);
 
@@ -866,8 +905,15 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
            (ret > 0 || ret == -EIOCBQUEUED)) {
                if (ret > 0)
                        count = ret;
+
+               /*
+                * Update the zone write pointer offset assuming the write
+                * operation succeeded. If it did not, the error recovery path
+                * will correct it. Also do active seq file accounting.
+                */
                mutex_lock(&zi->i_truncate_mutex);
                zi->i_wpoffset += count;
+               zonefs_account_active(inode);
                mutex_unlock(&zi->i_truncate_mutex);
        }
 
@@ -1009,13 +1055,13 @@ inode_unlock:
        return ret;
 }
 
-static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *file)
+/*
+ * Write open accounting is done only for sequential files.
+ */
+static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+                                           struct file *file)
 {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
-       struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
-
-       if (!(sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN))
-               return false;
 
        if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
                return false;
@@ -1026,28 +1072,34 @@ static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *fi
        return true;
 }
 
-static int zonefs_open_zone(struct inode *inode)
+static int zonefs_seq_file_write_open(struct inode *inode)
 {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
-       struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
        int ret = 0;
 
        mutex_lock(&zi->i_truncate_mutex);
 
        if (!zi->i_wr_refcnt) {
-               if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
-                       atomic_dec(&sbi->s_open_zones);
-                       ret = -EBUSY;
-                       goto unlock;
-               }
+               struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+               unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
 
-               if (i_size_read(inode) < zi->i_max_size) {
-                       ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
-                       if (ret) {
-                               atomic_dec(&sbi->s_open_zones);
+               if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+
+                       if (wro > sbi->s_max_wro_seq_files) {
+                               atomic_dec(&sbi->s_wro_seq_files);
+                               ret = -EBUSY;
                                goto unlock;
                        }
-                       zi->i_flags |= ZONEFS_ZONE_OPEN;
+
+                       if (i_size_read(inode) < zi->i_max_size) {
+                               ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+                               if (ret) {
+                                       atomic_dec(&sbi->s_wro_seq_files);
+                                       goto unlock;
+                               }
+                               zi->i_flags |= ZONEFS_ZONE_OPEN;
+                               zonefs_account_active(inode);
+                       }
                }
        }
 
@@ -1067,30 +1119,31 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
        if (ret)
                return ret;
 
-       if (zonefs_file_use_exp_open(inode, file))
-               return zonefs_open_zone(inode);
+       if (zonefs_seq_file_need_wro(inode, file))
+               return zonefs_seq_file_write_open(inode);
 
        return 0;
 }
 
-static void zonefs_close_zone(struct inode *inode)
+static void zonefs_seq_file_write_close(struct inode *inode)
 {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       struct super_block *sb = inode->i_sb;
+       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
        int ret = 0;
 
        mutex_lock(&zi->i_truncate_mutex);
-       zi->i_wr_refcnt--;
-       if (!zi->i_wr_refcnt) {
-               struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
-               struct super_block *sb = inode->i_sb;
 
-               /*
-                * If the file zone is full, it is not open anymore and we only
-                * need to decrement the open count.
-                */
-               if (!(zi->i_flags & ZONEFS_ZONE_OPEN))
-                       goto dec;
+       zi->i_wr_refcnt--;
+       if (zi->i_wr_refcnt)
+               goto unlock;
 
+       /*
+        * The file zone may not be open anymore (e.g. the file was truncated to
+        * its maximum size or it was fully written). For this case, we only
+        * need to decrement the write open count.
+        */
+       if (zi->i_flags & ZONEFS_ZONE_OPEN) {
                ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
                if (ret) {
                        __zonefs_io_error(inode, false);
@@ -1102,14 +1155,23 @@ static void zonefs_close_zone(struct inode *inode)
                         */
                        if (zi->i_flags & ZONEFS_ZONE_OPEN &&
                            !(sb->s_flags & SB_RDONLY)) {
-                               zonefs_warn(sb, "closing zone failed, remounting filesystem read-only\n");
+                               zonefs_warn(sb,
+                                       "closing zone at %llu failed %d\n",
+                                       zi->i_zsector, ret);
+                               zonefs_warn(sb,
+                                       "remounting filesystem read-only\n");
                                sb->s_flags |= SB_RDONLY;
                        }
+                       goto unlock;
                }
+
                zi->i_flags &= ~ZONEFS_ZONE_OPEN;
-dec:
-               atomic_dec(&sbi->s_open_zones);
+               zonefs_account_active(inode);
        }
+
+       atomic_dec(&sbi->s_wro_seq_files);
+
+unlock:
        mutex_unlock(&zi->i_truncate_mutex);
 }
 
@@ -1121,8 +1183,8 @@ static int zonefs_file_release(struct inode *inode, struct file *file)
         * the zone has gone offline or read-only). Make sure we don't fail the
         * close(2) for user-space.
         */
-       if (zonefs_file_use_exp_open(inode, file))
-               zonefs_close_zone(inode);
+       if (zonefs_seq_file_need_wro(inode, file))
+               zonefs_seq_file_write_close(inode);
 
        return 0;
 }
@@ -1311,7 +1373,7 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
        struct super_block *sb = inode->i_sb;
        struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
-       int ret = 0;
+       int ret;
 
        inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
        inode->i_mode = S_IFREG | sbi->s_perm;
@@ -1337,6 +1399,8 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
        sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
        sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
 
+       mutex_lock(&zi->i_truncate_mutex);
+
        /*
         * For sequential zones, make sure that any open zone is closed first
         * to ensure that the initial number of open zones is 0, in sync with
@@ -1346,12 +1410,17 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
        if (type == ZONEFS_ZTYPE_SEQ &&
            (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
             zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
-               mutex_lock(&zi->i_truncate_mutex);
                ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
-               mutex_unlock(&zi->i_truncate_mutex);
+               if (ret)
+                       goto unlock;
        }
 
-       return ret;
+       zonefs_account_active(inode);
+
+unlock:
+       mutex_unlock(&zi->i_truncate_mutex);
+
+       return 0;
 }
 
 static struct dentry *zonefs_create_inode(struct dentry *parent,
@@ -1688,14 +1757,18 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_gid = GLOBAL_ROOT_GID;
        sbi->s_perm = 0640;
        sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
-       sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev);
-       atomic_set(&sbi->s_open_zones, 0);
-       if (!sbi->s_max_open_zones &&
+
+       atomic_set(&sbi->s_wro_seq_files, 0);
+       sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
+       if (!sbi->s_max_wro_seq_files &&
            sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
                zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
                sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
        }
 
+       atomic_set(&sbi->s_active_seq_files, 0);
+       sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
+
        ret = zonefs_read_super(sb);
        if (ret)
                return ret;
@@ -1710,6 +1783,10 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
        if (ret)
                goto cleanup;
 
+       ret = zonefs_sysfs_register(sb);
+       if (ret)
+               goto cleanup;
+
        zonefs_info(sb, "Mounting %u zones",
                    blkdev_nr_zones(sb->s_bdev->bd_disk));
 
@@ -1755,6 +1832,8 @@ static void zonefs_kill_super(struct super_block *sb)
 
        if (sb->s_root)
                d_genocide(sb->s_root);
+
+       zonefs_sysfs_unregister(sb);
        kill_block_super(sb);
        kfree(sbi);
 }
@@ -1802,16 +1881,26 @@ static int __init zonefs_init(void)
                return ret;
 
        ret = register_filesystem(&zonefs_type);
-       if (ret) {
-               zonefs_destroy_inodecache();
-               return ret;
-       }
+       if (ret)
+               goto destroy_inodecache;
+
+       ret = zonefs_sysfs_init();
+       if (ret)
+               goto unregister_fs;
 
        return 0;
+
+unregister_fs:
+       unregister_filesystem(&zonefs_type);
+destroy_inodecache:
+       zonefs_destroy_inodecache();
+
+       return ret;
 }
 
 static void __exit zonefs_exit(void)
 {
+       zonefs_sysfs_exit();
        zonefs_destroy_inodecache();
        unregister_filesystem(&zonefs_type);
 }
diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c
new file mode 100644 (file)
index 0000000..9cb6755
--- /dev/null
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple file system for zoned block devices exposing zones as files.
+ *
+ * Copyright (C) 2022 Western Digital Corporation or its affiliates.
+ */
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/blkdev.h>
+
+#include "zonefs.h"
+
+struct zonefs_sysfs_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf);
+};
+
+static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr)
+{
+       return container_of(attr, struct zonefs_sysfs_attr, attr);
+}
+
+#define ZONEFS_SYSFS_ATTR_RO(name) \
+static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name)
+
+#define ATTR_LIST(name) &zonefs_sysfs_attr_##name.attr
+
+static ssize_t zonefs_sysfs_attr_show(struct kobject *kobj,
+                                     struct attribute *attr, char *buf)
+{
+       struct zonefs_sb_info *sbi =
+               container_of(kobj, struct zonefs_sb_info, s_kobj);
+       struct zonefs_sysfs_attr *zonefs_attr =
+               container_of(attr, struct zonefs_sysfs_attr, attr);
+
+       if (!zonefs_attr->show)
+               return 0;
+
+       return zonefs_attr->show(sbi, buf);
+}
+
+static ssize_t max_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%u\n", sbi->s_max_wro_seq_files);
+}
+ZONEFS_SYSFS_ATTR_RO(max_wro_seq_files);
+
+static ssize_t nr_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_wro_seq_files));
+}
+ZONEFS_SYSFS_ATTR_RO(nr_wro_seq_files);
+
+static ssize_t max_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%u\n", sbi->s_max_active_seq_files);
+}
+ZONEFS_SYSFS_ATTR_RO(max_active_seq_files);
+
+static ssize_t nr_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_active_seq_files));
+}
+ZONEFS_SYSFS_ATTR_RO(nr_active_seq_files);
+
+static struct attribute *zonefs_sysfs_attrs[] = {
+       ATTR_LIST(max_wro_seq_files),
+       ATTR_LIST(nr_wro_seq_files),
+       ATTR_LIST(max_active_seq_files),
+       ATTR_LIST(nr_active_seq_files),
+       NULL,
+};
+ATTRIBUTE_GROUPS(zonefs_sysfs);
+
+static void zonefs_sysfs_sb_release(struct kobject *kobj)
+{
+       struct zonefs_sb_info *sbi =
+               container_of(kobj, struct zonefs_sb_info, s_kobj);
+
+       complete(&sbi->s_kobj_unregister);
+}
+
+static const struct sysfs_ops zonefs_sysfs_attr_ops = {
+       .show   = zonefs_sysfs_attr_show,
+};
+
+static struct kobj_type zonefs_sb_ktype = {
+       .default_groups = zonefs_sysfs_groups,
+       .sysfs_ops      = &zonefs_sysfs_attr_ops,
+       .release        = zonefs_sysfs_sb_release,
+};
+
+static struct kobject *zonefs_sysfs_root;
+
+int zonefs_sysfs_register(struct super_block *sb)
+{
+       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+       int ret;
+
+       init_completion(&sbi->s_kobj_unregister);
+       ret = kobject_init_and_add(&sbi->s_kobj, &zonefs_sb_ktype,
+                                  zonefs_sysfs_root, "%s", sb->s_id);
+       if (ret) {
+               kobject_put(&sbi->s_kobj);
+               wait_for_completion(&sbi->s_kobj_unregister);
+               return ret;
+       }
+
+       sbi->s_sysfs_registered = true;
+
+       return 0;
+}
+
+void zonefs_sysfs_unregister(struct super_block *sb)
+{
+       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+
+       if (!sbi || !sbi->s_sysfs_registered)
+               return;
+
+       kobject_del(&sbi->s_kobj);
+       kobject_put(&sbi->s_kobj);
+       wait_for_completion(&sbi->s_kobj_unregister);
+}
+
+int __init zonefs_sysfs_init(void)
+{
+       zonefs_sysfs_root = kobject_create_and_add("zonefs", fs_kobj);
+       if (!zonefs_sysfs_root)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void zonefs_sysfs_exit(void)
+{
+       kobject_put(zonefs_sysfs_root);
+       zonefs_sysfs_root = NULL;
+}
index 7b147907c328ed2615b718c4cb17e2e18f148dad..4b3de66c323342ab8ac10f4691e492197e7e6998 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/uuid.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/kobject.h>
 
 /*
  * Maximum length of file names: this only needs to be large enough to fit
@@ -39,6 +40,7 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
 }
 
 #define ZONEFS_ZONE_OPEN       (1 << 0)
+#define ZONEFS_ZONE_ACTIVE     (1 << 1)
 
 /*
  * In-memory inode data.
@@ -182,8 +184,15 @@ struct zonefs_sb_info {
        loff_t                  s_blocks;
        loff_t                  s_used_blocks;
 
-       unsigned int            s_max_open_zones;
-       atomic_t                s_open_zones;
+       unsigned int            s_max_wro_seq_files;
+       atomic_t                s_wro_seq_files;
+
+       unsigned int            s_max_active_seq_files;
+       atomic_t                s_active_seq_files;
+
+       bool                    s_sysfs_registered;
+       struct kobject          s_kobj;
+       struct completion       s_kobj_unregister;
 };
 
 static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
@@ -198,4 +207,9 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
 #define zonefs_warn(sb, format, args...)       \
        pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
 
+int zonefs_sysfs_register(struct super_block *sb);
+void zonefs_sysfs_unregister(struct super_block *sb);
+int zonefs_sysfs_init(void);
+void zonefs_sysfs_exit(void);
+
 #endif
index 3e8d969b22fe87737a8a487e3e67a01c9d737163..8cbfcbca7b7eebbc07cb3dff9d74275224afa3f1 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acbuffer.h - Support for buffers returned by ACPI predefined names
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 0362cbb7235915315dbf9979353c8fdec914ae80..c3ae3ea88e178b8f965dfe0c5d490a360f5258ce 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acconfig.h - Global configuration constants
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index ea3b1c41bc791da07776befe34474e1685636b78..28943c900be79a1ee80866a172f9a85e01fb07bd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acexcep.h - Exception codes returned by the ACPI subsystem
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 30869ab77fba57c829ac2d20f513bc10ac1409f5..6f22e92b17445b40747a8a9d5730a14cdc544c63 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acnames.h - Global names and strings
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5a38757446785d03025bfab152099f2c0121a997..73781aae2119296c8f31e8da2b02dd78b932e971 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acoutput.h -- debug output
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 6f6282a862bcdc399a986a38626a391a0791647e..416e59bcf149d8674f131db42ccc22dd083f981a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acpi.h - Master public include file used to interface to ACPICA
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index c4b78c21d793056f8087711b83bfcdedc801f90c..772590e2eddb8eaca3789a741b59a0847eb724da 100644 (file)
@@ -481,6 +481,8 @@ void acpi_initialize_hp_context(struct acpi_device *adev,
 extern struct bus_type acpi_bus_type;
 
 int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data);
+int acpi_dev_for_each_child(struct acpi_device *adev,
+                           int (*fn)(struct acpi_device *, void *), void *data);
 
 /*
  * Events
@@ -522,6 +524,7 @@ int acpi_device_fix_up_power(struct acpi_device *device);
 int acpi_bus_update_power(acpi_handle handle, int *state_p);
 int acpi_device_update_power(struct acpi_device *device, int *state_p);
 bool acpi_bus_power_manageable(acpi_handle handle);
+void acpi_dev_power_up_children_with_adr(struct acpi_device *adev);
 int acpi_device_power_add_dependent(struct acpi_device *adev,
                                    struct device *dev);
 void acpi_device_power_remove_dependent(struct acpi_device *adev,
index 690c369b717ad108918441ad8add5aabb0fd58f2..52844cc5eeb53f62c1ce4384240a99c7e2f9c38b 100644 (file)
@@ -5,7 +5,7 @@
  *                    interfaces must be implemented by OSL to interface the
  *                    ACPI components to the host operating system.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7417731472b7a1ea8d7cacb4d618e41338674e08..67c0b9e734b646801e8e62337e09b225e34c8db6 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acpixf.h - External interfaces to the ACPI subsystem
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20211217
+#define ACPI_CA_VERSION                 0x20220331
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
index 8e2319bbd0a22a7d273eb8bbeb9f49c0126d3eed..a7fb8ddb3dc6b7c05fc8700fc9dc5473c56f2c36 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acrestyp.h - Defines, types, and structures for resource descriptors
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index f9cda909f92ccb8c4e1a88c510e3952884e58547..c6af579f74f44baafe85d87366097847c392c52f 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: actbl.h - Basic ACPI Table Definitions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 159070edd031bedcf15d359e09fc90fd985673f3..15c78678c5d3ee4b84c4c62d72f42b5bc23934ea 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: actbl1.h - Additional ACPI table definitions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -373,17 +373,21 @@ struct acpi_cedt_cfmws {
        u32 interleave_targets[];
 };
 
+struct acpi_cedt_cfmws_target_element {
+       u32 interleave_target;
+};
+
 /* Values for Interleave Arithmetic field above */
 
-#define ACPI_CEDT_CFMWS_ARITHMETIC_MODULO      (0)
+#define ACPI_CEDT_CFMWS_ARITHMETIC_MODULO   (0)
 
 /* Values for Restrictions field above */
 
-#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2         (1)
-#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3         (1<<1)
-#define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE      (1<<2)
-#define ACPI_CEDT_CFMWS_RESTRICT_PMEM          (1<<3)
-#define ACPI_CEDT_CFMWS_RESTRICT_FIXED         (1<<4)
+#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2      (1)
+#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3      (1<<1)
+#define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE   (1<<2)
+#define ACPI_CEDT_CFMWS_RESTRICT_PMEM       (1<<3)
+#define ACPI_CEDT_CFMWS_RESTRICT_FIXED      (1<<4)
 
 /*******************************************************************************
  *
index 16847c8d9d5f37357011f65ce31f82e7bf937da7..655102bc6d1430e7c201006fcc535dba7fee1a33 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: actbl2.h - ACPI Table Definitions (tables not in ACPI spec)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -25,6 +25,7 @@
  * the wrong signature.
  */
 #define ACPI_SIG_AGDI           "AGDI" /* Arm Generic Diagnostic Dump and Reset Device Interface */
+#define ACPI_SIG_APMT           "APMT" /* Arm Performance Monitoring Unit table */
 #define ACPI_SIG_BDAT           "BDAT" /* BIOS Data ACPI Table */
 #define ACPI_SIG_IORT           "IORT" /* IO Remapping Table */
 #define ACPI_SIG_IVRS           "IVRS" /* I/O Virtualization Reporting Structure */
@@ -258,6 +259,85 @@ struct acpi_table_agdi {
 
 #define ACPI_AGDI_SIGNALING_MODE (1)
 
+/*******************************************************************************
+ *
+ * APMT - ARM Performance Monitoring Unit Table
+ *
+ * Conforms to:
+ * ARM Performance Monitoring Unit Architecture 1.0 Platform Design Document
+ * ARM DEN0117 v1.0 November 25, 2021
+ *
+ ******************************************************************************/
+
+struct acpi_table_apmt {
+       struct acpi_table_header header;        /* Common ACPI table header */
+};
+
+#define ACPI_APMT_NODE_ID_LENGTH                4
+
+/*
+ * APMT subtables
+ */
+struct acpi_apmt_node {
+       u16 length;
+       u8 flags;
+       u8 type;
+       u32 id;
+       u64 inst_primary;
+       u32 inst_secondary;
+       u64 base_address0;
+       u64 base_address1;
+       u32 ovflw_irq;
+       u32 reserved;
+       u32 ovflw_irq_flags;
+       u32 proc_affinity;
+       u32 impl_id;
+};
+
+/* Masks for Flags field above */
+
+#define ACPI_APMT_FLAGS_DUAL_PAGE               (1<<0)
+#define ACPI_APMT_FLAGS_AFFINITY                (1<<1)
+#define ACPI_APMT_FLAGS_ATOMIC                  (1<<2)
+
+/* Values for Flags dual page field above */
+
+#define ACPI_APMT_FLAGS_DUAL_PAGE_NSUPP         (0<<0)
+#define ACPI_APMT_FLAGS_DUAL_PAGE_SUPP          (1<<0)
+
+/* Values for Flags processor affinity field above */
+#define ACPI_APMT_FLAGS_AFFINITY_PROC           (0<<1)
+#define ACPI_APMT_FLAGS_AFFINITY_PROC_CONTAINER (1<<1)
+
+/* Values for Flags 64-bit atomic field above */
+#define ACPI_APMT_FLAGS_ATOMIC_NSUPP            (0<<2)
+#define ACPI_APMT_FLAGS_ATOMIC_SUPP             (1<<2)
+
+/* Values for Type field above */
+
+enum acpi_apmt_node_type {
+       ACPI_APMT_NODE_TYPE_MC = 0x00,
+       ACPI_APMT_NODE_TYPE_SMMU = 0x01,
+       ACPI_APMT_NODE_TYPE_PCIE_ROOT = 0x02,
+       ACPI_APMT_NODE_TYPE_ACPI = 0x03,
+       ACPI_APMT_NODE_TYPE_CACHE = 0x04,
+       ACPI_APMT_NODE_TYPE_COUNT
+};
+
+/* Masks for ovflw_irq_flags field above */
+
+#define ACPI_APMT_OVFLW_IRQ_FLAGS_MODE          (1<<0)
+#define ACPI_APMT_OVFLW_IRQ_FLAGS_TYPE          (1<<1)
+
+/* Values for ovflw_irq_flags mode field above */
+
+#define ACPI_APMT_OVFLW_IRQ_FLAGS_MODE_LEVEL    (0<<0)
+#define ACPI_APMT_OVFLW_IRQ_FLAGS_MODE_EDGE     (1<<0)
+
+/* Values for ovflw_irq_flags type field above */
+
+#define ACPI_APMT_OVFLW_IRQ_FLAGS_TYPE_WIRED    (0<<1)
+
 /*******************************************************************************
  *
  * BDAT - BIOS Data ACPI Table
@@ -277,7 +357,7 @@ struct acpi_table_bdat {
  * IORT - IO Remapping Table
  *
  * Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049E.b, Feb 2021
+ * Document number: ARM DEN 0049E.d, Feb 2022
  *
  ******************************************************************************/
 
@@ -374,7 +454,8 @@ struct acpi_iort_root_complex {
        u32 ats_attribute;
        u32 pci_segment_number;
        u8 memory_address_limit;        /* Memory address size limit */
-       u8 reserved[3];         /* Reserved, must be zero */
+       u16 pasid_capabilities; /* PASID Capabilities */
+       u8 reserved[1];         /* Reserved, must be zero */
 };
 
 /* Masks for ats_attribute field above */
@@ -383,6 +464,9 @@ struct acpi_iort_root_complex {
 #define ACPI_IORT_PRI_SUPPORTED         (1<<1) /* The root complex PRI support */
 #define ACPI_IORT_PASID_FWD_SUPPORTED   (1<<2) /* The root complex PASID forward support */
 
+/* Masks for pasid_capabilities field above */
+#define ACPI_IORT_PASID_MAX_WIDTH       (0x1F) /* Bits 0-4 */
+
 struct acpi_iort_smmu {
        u64 base_address;       /* SMMU base address */
        u64 span;               /* Length of memory range */
@@ -458,6 +542,25 @@ struct acpi_iort_rmr {
        u32 rmr_offset;
 };
 
+/* Masks for Flags field above */
+#define ACPI_IORT_RMR_REMAP_PERMITTED      (1)
+#define ACPI_IORT_RMR_ACCESS_PRIVILEGE     (1<<1)
+
+/*
+ * Macro to access the Access Attributes in flags field above:
+ *  Access Attributes is encoded in bits 9:2
+ */
+#define ACPI_IORT_RMR_ACCESS_ATTRIBUTES(flags)          (((flags) >> 2) & 0xFF)
+
+/* Values for above Access Attributes */
+
+#define ACPI_IORT_RMR_ATTR_DEVICE_NGNRNE   0x00
+#define ACPI_IORT_RMR_ATTR_DEVICE_NGNRE    0x01
+#define ACPI_IORT_RMR_ATTR_DEVICE_NGRE     0x02
+#define ACPI_IORT_RMR_ATTR_DEVICE_GRE      0x03
+#define ACPI_IORT_RMR_ATTR_NORMAL_NC       0x04
+#define ACPI_IORT_RMR_ATTR_NORMAL_IWB_OWB  0x05
+
 struct acpi_iort_rmr_desc {
        u64 base_address;
        u64 length;
@@ -762,7 +865,8 @@ enum acpi_madt_type {
        ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR = 14,
        ACPI_MADT_TYPE_GENERIC_TRANSLATOR = 15,
        ACPI_MADT_TYPE_MULTIPROC_WAKEUP = 16,
-       ACPI_MADT_TYPE_RESERVED = 17    /* 17 and greater are reserved */
+       ACPI_MADT_TYPE_RESERVED = 17,   /* 17 to 0x7F are reserved */
+       ACPI_MADT_TYPE_OEM_RESERVED = 0x80      /* 0x80 to 0xFF are reserved for OEM use */
 };
 
 /*
@@ -978,8 +1082,8 @@ struct acpi_madt_multiproc_wakeup {
        u64 base_address;
 };
 
-#define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE       2032
-#define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE 2048
+#define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE        2032
+#define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE  2048
 
 struct acpi_madt_multiproc_wakeup_mailbox {
        u16 command;
@@ -992,6 +1096,12 @@ struct acpi_madt_multiproc_wakeup_mailbox {
 
 #define ACPI_MP_WAKE_COMMAND_WAKEUP    1
 
+/* 17: OEM data */
+
+struct acpi_madt_oem_data {
+       u8 oem_data[0];
+};
+
 /*
  * Common flags fields for MADT subtables
  */
@@ -1597,7 +1707,7 @@ struct acpi_nhlt_mic_device_specific_config {
 
 /* Values for array_type_ext above */
 
-#define ACPI_NHLT_ARRAY_TYPE_RESERVED               0x09       // 9 and below are reserved
+#define ACPI_NHLT_ARRAY_TYPE_RESERVED               0x09       /* 9 and below are reserved */
 #define ACPI_NHLT_SMALL_LINEAR_2ELEMENT             0x0A
 #define ACPI_NHLT_BIG_LINEAR_2ELEMENT               0x0B
 #define ACPI_NHLT_FIRST_GEOMETRY_LINEAR_4ELEMENT    0x0C
@@ -1617,17 +1727,17 @@ struct acpi_nhlt_vendor_mic_count {
 struct acpi_nhlt_vendor_mic_config {
        u8 type;
        u8 panel;
-       u16 speaker_position_distance;  // mm
-       u16 horizontal_offset;  // mm
-       u16 vertical_offset;    // mm
-       u8 frequency_low_band;  // 5*hz
-       u8 frequency_high_band; // 500*hz
-       u16 direction_angle;    // -180 - + 180
-       u16 elevation_angle;    // -180 - + 180
-       u16 work_vertical_angle_begin;  // -180 - + 180 with 2 deg step
-       u16 work_vertical_angle_end;    // -180 - + 180 with 2 deg step
-       u16 work_horizontal_angle_begin;        // -180 - + 180 with 2 deg step
-       u16 work_horizontal_angle_end;  // -180 - + 180 with 2 deg step
+       u16 speaker_position_distance;  /* mm */
+       u16 horizontal_offset;  /* mm */
+       u16 vertical_offset;    /* mm */
+       u8 frequency_low_band;  /* 5*Hz */
+       u8 frequency_high_band; /* 500*Hz */
+       u16 direction_angle;    /* -180 - + 180 */
+       u16 elevation_angle;    /* -180 - + 180 */
+       u16 work_vertical_angle_begin;  /* -180 - + 180 with 2 deg step */
+       u16 work_vertical_angle_end;    /* -180 - + 180 with 2 deg step */
+       u16 work_horizontal_angle_begin;        /* -180 - + 180 with 2 deg step */
+       u16 work_horizontal_angle_end;  /* -180 - + 180 with 2 deg step */
 };
 
 /* Values for Type field above */
@@ -1638,9 +1748,9 @@ struct acpi_nhlt_vendor_mic_config {
 #define ACPI_NHLT_MIC_SUPER_CARDIOID        3
 #define ACPI_NHLT_MIC_HYPER_CARDIOID        4
 #define ACPI_NHLT_MIC_8_SHAPED              5
-#define ACPI_NHLT_MIC_RESERVED6             6  // 6 is reserved
+#define ACPI_NHLT_MIC_RESERVED6             6  /* 6 is reserved */
 #define ACPI_NHLT_MIC_VENDOR_DEFINED        7
-#define ACPI_NHLT_MIC_RESERVED              8  // 8 and above are reserved
+#define ACPI_NHLT_MIC_RESERVED              8  /* 8 and above are reserved */
 
 /* Values for Panel field above */
 
@@ -1650,12 +1760,12 @@ struct acpi_nhlt_vendor_mic_config {
 #define ACPI_NHLT_MIC_POSITION_RIGHT        3
 #define ACPI_NHLT_MIC_POSITION_FRONT        4
 #define ACPI_NHLT_MIC_POSITION_BACK         5
-#define ACPI_NHLT_MIC_POSITION_RESERVED     6  // 6 and above are reserved
+#define ACPI_NHLT_MIC_POSITION_RESERVED     6  /* 6 and above are reserved */
 
 struct acpi_nhlt_vendor_mic_device_specific_config {
        struct acpi_nhlt_mic_device_specific_config mic_array_device_config;
        u8 number_of_microphones;
-       struct acpi_nhlt_vendor_mic_config mic_config[];        // indexed by number_of_microphones
+       struct acpi_nhlt_vendor_mic_config mic_config[];        /* Indexed by number_of_microphones */
 };
 
 /* Microphone SNR and Sensitivity extension */
@@ -1668,32 +1778,23 @@ struct acpi_nhlt_mic_snr_sensitivity_extension {
 /* Render device with feedback */
 
 struct acpi_nhlt_render_feedback_device_specific_config {
-       u8 feedback_virtual_slot;       // render slot in case of capture
-       u16 feedback_channels;  // informative only
+       u8 feedback_virtual_slot;       /* Render slot in case of capture */
+       u16 feedback_channels;  /* Informative only */
        u16 feedback_valid_bits_per_sample;
 };
 
-/* Linux-specific structures */
+/* Non documented structures */
 
-struct acpi_nhlt_linux_specific_count {
+struct acpi_nhlt_device_info_count {
        u8 structure_count;
 };
 
-struct acpi_nhlt_linux_specific_data {
+struct acpi_nhlt_device_info {
        u8 device_id[16];
        u8 device_instance_id;
        u8 device_port_id;
 };
 
-struct acpi_nhlt_linux_specific_data_b {
-       u8 specific_data[18];
-};
-
-struct acpi_nhlt_table_terminator {
-       u32 terminator_value;
-       u32 terminator_signature;
-};
-
 /*******************************************************************************
  *
  * PCCT - Platform Communications Channel Table (ACPI 5.0)
@@ -2319,7 +2420,7 @@ struct acpi_table_rgrt {
        u16 version;
        u8 image_type;
        u8 reserved;
-       u8 image[0];
+       u8 image[];
 };
 
 /* image_type values */
index edbf1ad8206d31c00862fddbf6400c65c713d0ea..7b9571e00cc4b0362801c395e884e8963ae895c3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: actbl3.h - ACPI Table Definitions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 02c1fa16e6388fb928bc268c09854de2a1597697..3491e454b2abfe9951e7bd4e41adcb5d0a9a76ab 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: actypes.h - Common data types for the entire ACPI subsystem
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
@@ -539,14 +539,14 @@ typedef u64 acpi_integer;
  * Can be used with access_width of struct acpi_generic_address and access_size of
  * struct acpi_resource_generic_register.
  */
-#define ACPI_ACCESS_BIT_SHIFT          2
-#define ACPI_ACCESS_BYTE_SHIFT         -1
-#define ACPI_ACCESS_BIT_MAX            (31 - ACPI_ACCESS_BIT_SHIFT)
-#define ACPI_ACCESS_BYTE_MAX           (31 - ACPI_ACCESS_BYTE_SHIFT)
-#define ACPI_ACCESS_BIT_DEFAULT                (8 - ACPI_ACCESS_BIT_SHIFT)
-#define ACPI_ACCESS_BYTE_DEFAULT       (8 - ACPI_ACCESS_BYTE_SHIFT)
-#define ACPI_ACCESS_BIT_WIDTH(size)    (1 << ((size) + ACPI_ACCESS_BIT_SHIFT))
-#define ACPI_ACCESS_BYTE_WIDTH(size)   (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT))
+#define ACPI_ACCESS_BIT_SHIFT           2
+#define ACPI_ACCESS_BYTE_SHIFT          -1
+#define ACPI_ACCESS_BIT_MAX             (31 - ACPI_ACCESS_BIT_SHIFT)
+#define ACPI_ACCESS_BYTE_MAX            (31 - ACPI_ACCESS_BYTE_SHIFT)
+#define ACPI_ACCESS_BIT_DEFAULT         (8 - ACPI_ACCESS_BIT_SHIFT)
+#define ACPI_ACCESS_BYTE_DEFAULT        (8 - ACPI_ACCESS_BYTE_SHIFT)
+#define ACPI_ACCESS_BIT_WIDTH(size)     (1 << ((size) + ACPI_ACCESS_BIT_SHIFT))
+#define ACPI_ACCESS_BYTE_WIDTH(size)    (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT))
 
 /*******************************************************************************
  *
@@ -1303,6 +1303,7 @@ typedef enum {
 #define ACPI_OSI_WIN_10_RS5             0x13
 #define ACPI_OSI_WIN_10_19H1            0x14
 #define ACPI_OSI_WIN_10_20H1            0x15
+#define ACPI_OSI_WIN_11                 0x16
 
 /* Definitions of getopt */
 
index bc24388ce94e246649ea5d644817c17250764591..8f1e7c489df57b45521f480f501d6c3a938b86cd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acuuid.h - ACPI-related UUID/GUID definitions
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index afaca3a075e86ac5564ea94d403e4c543bb75329..dc60f7db5524f2054024f750ad878fe236f20388 100644 (file)
@@ -46,6 +46,8 @@ int erst_get_record_id_next(int *pos, u64 *record_id);
 void erst_get_record_id_end(void);
 ssize_t erst_read(u64 record_id, struct cper_record_header *record,
                  size_t buflen);
+ssize_t erst_read_record(u64 record_id, struct cper_record_header *record,
+               size_t buflen, size_t recordlen, const guid_t *creatorid);
 int erst_clear(u64 record_id);
 
 int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
index 92b7ea8d8f5e1a9c2e359c96f3d7e43da3c3c1dc..c6108581d97dc66dd5b3afc94460651d2b9dc510 100644 (file)
@@ -141,6 +141,7 @@ extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_set_enable(int cpu, bool enable);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
 extern bool acpi_cpc_valid(void);
+extern bool cppc_allow_fast_switch(void);
 extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data);
 extern unsigned int cppc_get_transition_latency(int cpu);
 extern bool cpc_ffh_supported(void);
@@ -175,6 +176,10 @@ static inline bool acpi_cpc_valid(void)
 {
        return false;
 }
+static inline bool cppc_allow_fast_switch(void)
+{
+       return false;
+}
 static inline unsigned int cppc_get_transition_latency(int cpu)
 {
        return CPUFREQ_ETERNAL;
index e8958e0d1646060f6cb11ca9afee4666324f4a9a..03eb3d9770759cc1826e47d8c55621c0f046fbfd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acenv.h - Host and compiler configuration
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 277fe2fa4d9b38289890d5688f88cab678d7f60d..3a6b1db9a984d5f38173d6fd7b52703195d56c88 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acenvex.h - Extra host and compiler configuration
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 33ad282bd3389e446ceae951b5cd4a36d30540d9..ac80111f503c6e85fcb448f3a7a6bc9940ad75db 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acgcc.h - GCC specific defines, etc.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 738d52865e0a441238324a59779c013ce891b9a4..302ea1b724b992a6a627f92723ce357ca3a4bddd 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acgccex.h - Extra GCC specific defines, etc.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 550fe9a8cd6cdc7ff5873ff620c104d26f9db5b1..85b1ae86ee636fd63fe333b1c4092a7f5dcce50d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: acintel.h - VC specific defines, etc.
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index cec41e004ecf70c00fe749f57b9175c67276e62f..a5550dd4d507001efe1e81b6638af9a4a3c9043f 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: aclinux.h - OS specific defines, etc. for Linux
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 5f642b07ad6479f5dea65023e68d471392229873..28c72744decf3dec404347c458317409deefe87a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Name: aclinuxex.h - Extra OS specific defines, etc. for Linux
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 7ae0ece07b4e4968158980a5ce71922e0cdd120c..d4cd4cc4389c0682ef5e164b56be1afabb3b323a 100644 (file)
@@ -33,8 +33,8 @@ extern void queued_read_lock_slowpath(struct qrwlock *lock);
 extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
- * queued_read_trylock - try to acquire read lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_read_trylock - try to acquire read lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
 static inline int queued_read_trylock(struct qrwlock *lock)
@@ -52,8 +52,8 @@ static inline int queued_read_trylock(struct qrwlock *lock)
 }
 
 /**
- * queued_write_trylock - try to acquire write lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_write_trylock - try to acquire write lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
 static inline int queued_write_trylock(struct qrwlock *lock)
@@ -68,8 +68,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
                                _QW_LOCKED));
 }
 /**
- * queued_read_lock - acquire read lock of a queue rwlock
- * @lock: Pointer to queue rwlock structure
+ * queued_read_lock - acquire read lock of a queued rwlock
+ * @lock: Pointer to queued rwlock structure
  */
 static inline void queued_read_lock(struct qrwlock *lock)
 {
@@ -84,8 +84,8 @@ static inline void queued_read_lock(struct qrwlock *lock)
 }
 
 /**
- * queued_write_lock - acquire write lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_write_lock - acquire write lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
@@ -98,8 +98,8 @@ static inline void queued_write_lock(struct qrwlock *lock)
 }
 
 /**
- * queued_read_unlock - release read lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_read_unlock - release read lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
  */
 static inline void queued_read_unlock(struct qrwlock *lock)
 {
@@ -110,8 +110,8 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 }
 
 /**
- * queued_write_unlock - release write lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_write_unlock - release write lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
@@ -120,7 +120,7 @@ static inline void queued_write_unlock(struct qrwlock *lock)
 
 /**
  * queued_rwlock_is_contended - check if the lock is contended
- * @lock : Pointer to queue rwlock structure
+ * @lock : Pointer to queued rwlock structure
  * Return: 1 if lock contended, 0 otherwise
  */
 static inline int queued_rwlock_is_contended(struct qrwlock *lock)
@@ -130,7 +130,7 @@ static inline int queued_rwlock_is_contended(struct qrwlock *lock)
 
 /*
  * Remapping rwlock architecture specific functions to the corresponding
- * queue rwlock functions.
+ * queued rwlock functions.
  */
 #define arch_read_lock(l)              queued_read_lock(l)
 #define arch_write_lock(l)             queued_write_lock(l)
index c36f1d5a2572cdbc01106730a815637419acbf35..12392c14c4d0fba6ce09e589d164cfed5e42b002 100644 (file)
@@ -7,7 +7,7 @@
 #include <asm/spinlock_types.h>
 
 /*
- * The queue read/write lock data structure
+ * The queued read/write lock data structure
  */
 
 typedef struct qrwlock {
index 69138e9db787f766ed9c112c46ea202df96854de..7515a465ec03a64f1a28db6b917e1d259d2aaade 100644 (file)
  */
 #define SCHED_DATA                             \
        STRUCT_ALIGN();                         \
-       __begin_sched_classes = .;              \
-       *(__idle_sched_class)                   \
-       *(__fair_sched_class)                   \
-       *(__rt_sched_class)                     \
-       *(__dl_sched_class)                     \
+       __sched_class_highest = .;              \
        *(__stop_sched_class)                   \
-       __end_sched_classes = .;
+       *(__dl_sched_class)                     \
+       *(__rt_sched_class)                     \
+       *(__fair_sched_class)                   \
+       *(__idle_sched_class)                   \
+       __sched_class_lowest = .;
 
 /* The actual configuration determine if the init/exit sections
  * are handled as text/data or they can be discarded (which
diff --git a/include/clocksource/timer-goldfish.h b/include/clocksource/timer-goldfish.h
new file mode 100644 (file)
index 0000000..05a3a4f
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * goldfish-timer clocksource
+ * Registers definition for the goldfish-timer device
+ */
+
+#ifndef _CLOCKSOURCE_TIMER_GOLDFISH_H
+#define _CLOCKSOURCE_TIMER_GOLDFISH_H
+
+/*
+ * TIMER_TIME_LOW       get low bits of current time and update TIMER_TIME_HIGH
+ * TIMER_TIME_HIGH      get high bits of time at last TIMER_TIME_LOW read
+ * TIMER_ALARM_LOW      set low bits of alarm and activate it
+ * TIMER_ALARM_HIGH     set high bits of next alarm
+ * TIMER_IRQ_ENABLED    enable alarm interrupt
+ * TIMER_CLEAR_ALARM    disarm an existing alarm
+ * TIMER_ALARM_STATUS   alarm status (running or not)
+ * TIMER_CLEAR_INTERRUPT clear interrupt
+ */
+#define TIMER_TIME_LOW         0x00
+#define TIMER_TIME_HIGH                0x04
+#define TIMER_ALARM_LOW                0x08
+#define TIMER_ALARM_HIGH       0x0c
+#define TIMER_IRQ_ENABLED      0x10
+#define TIMER_CLEAR_ALARM      0x14
+#define TIMER_ALARM_STATUS     0x18
+#define TIMER_CLEAR_INTERRUPT  0x1c
+
+extern int goldfish_timer_init(int irq, void __iomem *base);
+
+#endif /* _CLOCKSOURCE_TIMER_GOLDFISH_H */
index 2419a735420fb8dfb596ff6217d3193f192f21cc..91e080efb918905d71132d2c986ef2b700639466 100644 (file)
 
 #include <linux/key.h>
 
+enum blacklist_hash_type {
+       /* TBSCertificate hash */
+       BLACKLIST_HASH_X509_TBS = 1,
+       /* Raw data hash */
+       BLACKLIST_HASH_BINARY = 2,
+};
+
 #ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
 
 extern int restrict_link_by_builtin_trusted(struct key *keyring,
@@ -54,13 +61,14 @@ static inline void __init set_machine_trusted_keys(struct key *keyring)
 
 extern struct pkcs7_message *pkcs7;
 #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING
-extern int mark_hash_blacklisted(const char *hash);
+extern int mark_hash_blacklisted(const u8 *hash, size_t hash_len,
+                              enum blacklist_hash_type hash_type);
 extern int is_hash_blacklisted(const u8 *hash, size_t hash_len,
-                              const char *type);
+                              enum blacklist_hash_type hash_type);
 extern int is_binary_blacklisted(const u8 *hash, size_t hash_len);
 #else
 static inline int is_hash_blacklisted(const u8 *hash, size_t hash_len,
-                                     const char *type)
+                                     enum blacklist_hash_type hash_type)
 {
        return 0;
 }
index d89fa2579ac0569864508ef4d175669f22314d35..4eb64548a74f1ad0b636a8caa35cb8b4544f573e 100644 (file)
@@ -64,7 +64,7 @@ struct trusted_key_ops {
        /* Unseal a key. */
        int (*unseal)(struct trusted_key_payload *p, char *datablob);
 
-       /* Get a randomized key. */
+       /* Optional: Get a randomized key. */
        int (*get_random)(unsigned char *key, size_t key_len);
 
        /* Exit key interface. */
diff --git a/include/keys/trusted_caam.h b/include/keys/trusted_caam.h
new file mode 100644 (file)
index 0000000..73fe2f3
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Pengutronix, Ahmad Fatoum <kernel@pengutronix.de>
+ */
+
+#ifndef __CAAM_TRUSTED_KEY_H
+#define __CAAM_TRUSTED_KEY_H
+
+extern struct trusted_key_ops trusted_key_caam_ops;
+
+#endif
index d7136d13aa442cc6880801af4db19e2d5181d2b7..4781c2a07f71987f5cc0e15a8c27316c14d03fa6 100644 (file)
@@ -520,9 +520,6 @@ int acpi_check_resource_conflict(const struct resource *res);
 int acpi_check_region(resource_size_t start, resource_size_t n,
                      const char *name);
 
-acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
-                               u32 level);
-
 int acpi_resources_are_enforced(void);
 
 #ifdef CONFIG_HIBERNATION
@@ -574,6 +571,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_OSLPI_SUPPORT                   0x00000100
 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT                0x00001000
 #define OSC_SB_GENERIC_INITIATOR_SUPPORT       0x00002000
+#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE          0x00004000
 #define OSC_SB_NATIVE_USB4_SUPPORT             0x00040000
 #define OSC_SB_PRM_SUPPORT                     0x00200000
 
@@ -581,6 +579,7 @@ extern bool osc_sb_apei_support_acked;
 extern bool osc_pc_lpi_support_confirmed;
 extern bool osc_sb_native_usb4_support_confirmed;
 extern bool osc_sb_cppc_not_supported;
+extern bool osc_cpc_flexible_adr_space_confirmed;
 
 /* USB4 Capabilities */
 #define OSC_USB_USB3_TUNNELING                 0x00000001
index c92ebc39fc1fdbc5aeba2dbcb099ed6da4714081..6f96dc2209c000f4b1578173caf38e437c594274 100644 (file)
  * @ocr_mask: available voltages on the 4 pins from the block, this
  * is ignored if a regulator is used, see the MMC_VDD_* masks in
  * mmc/host.h
- * @ios_handler: a callback function to act on specfic ios changes,
- * used for example to control a levelshifter
- * mask into a value to be binary (or set some other custom bits
- * in MMCIPWR) or:ed and written into the MMCIPWR register of the
- * block.  May also control external power based on the power_mode.
  * @status: if no GPIO line was given to the block in this function will
  * be called to determine whether a card is present in the MMC slot or not
  */
 struct mmci_platform_data {
        unsigned int ocr_mask;
-       int (*ios_handler)(struct device *, struct mmc_ios *);
        unsigned int (*status)(struct device *);
 };
 
index 6db58d1808665879b04a021f27ae005a78306cc6..77bc5522e61c68a040fc8707458188671867a58c 100644 (file)
 
 #endif /* arch_try_cmpxchg_relaxed */
 
+#ifndef arch_try_cmpxchg64_relaxed
+#ifdef arch_try_cmpxchg64
+#define arch_try_cmpxchg64_acquire arch_try_cmpxchg64
+#define arch_try_cmpxchg64_release arch_try_cmpxchg64
+#define arch_try_cmpxchg64_relaxed arch_try_cmpxchg64
+#endif /* arch_try_cmpxchg64 */
+
+#ifndef arch_try_cmpxchg64
+#define arch_try_cmpxchg64(_ptr, _oldp, _new) \
+({ \
+       typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+       ___r = arch_cmpxchg64((_ptr), ___o, (_new)); \
+       if (unlikely(___r != ___o)) \
+               *___op = ___r; \
+       likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64 */
+
+#ifndef arch_try_cmpxchg64_acquire
+#define arch_try_cmpxchg64_acquire(_ptr, _oldp, _new) \
+({ \
+       typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+       ___r = arch_cmpxchg64_acquire((_ptr), ___o, (_new)); \
+       if (unlikely(___r != ___o)) \
+               *___op = ___r; \
+       likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_acquire */
+
+#ifndef arch_try_cmpxchg64_release
+#define arch_try_cmpxchg64_release(_ptr, _oldp, _new) \
+({ \
+       typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+       ___r = arch_cmpxchg64_release((_ptr), ___o, (_new)); \
+       if (unlikely(___r != ___o)) \
+               *___op = ___r; \
+       likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_release */
+
+#ifndef arch_try_cmpxchg64_relaxed
+#define arch_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \
+({ \
+       typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+       ___r = arch_cmpxchg64_relaxed((_ptr), ___o, (_new)); \
+       if (unlikely(___r != ___o)) \
+               *___op = ___r; \
+       likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_relaxed */
+
+#else /* arch_try_cmpxchg64_relaxed */
+
+#ifndef arch_try_cmpxchg64_acquire
+#define arch_try_cmpxchg64_acquire(...) \
+       __atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg64_release
+#define arch_try_cmpxchg64_release(...) \
+       __atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg64
+#define arch_try_cmpxchg64(...) \
+       __atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__)
+#endif
+
+#endif /* arch_try_cmpxchg64_relaxed */
+
 #ifndef arch_atomic_read_acquire
 static __always_inline int
 arch_atomic_read_acquire(const atomic_t *v)
@@ -2386,4 +2456,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #endif
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae
+// b5e87bdd5ede61470c29f7a7e4de781af3770f09
index 5d69b143c28e65faf61e6834d0758e2d262f9ed2..7a139ec030b0c32bcdfc11df93ffb741c48d0282 100644 (file)
@@ -2006,6 +2006,44 @@ atomic_long_dec_if_positive(atomic_long_t *v)
        arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
+#define try_cmpxchg64(ptr, oldp, ...) \
+({ \
+       typeof(ptr) __ai_ptr = (ptr); \
+       typeof(oldp) __ai_oldp = (oldp); \
+       kcsan_mb(); \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+       instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+       arch_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_acquire(ptr, oldp, ...) \
+({ \
+       typeof(ptr) __ai_ptr = (ptr); \
+       typeof(oldp) __ai_oldp = (oldp); \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+       instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+       arch_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_release(ptr, oldp, ...) \
+({ \
+       typeof(ptr) __ai_ptr = (ptr); \
+       typeof(oldp) __ai_oldp = (oldp); \
+       kcsan_release(); \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+       instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+       arch_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_relaxed(ptr, oldp, ...) \
+({ \
+       typeof(ptr) __ai_ptr = (ptr); \
+       typeof(oldp) __ai_oldp = (oldp); \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+       instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+       arch_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
 #define cmpxchg_local(ptr, ...) \
 ({ \
        typeof(ptr) __ai_ptr = (ptr); \
@@ -2045,4 +2083,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 })
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 87c974b93032afd42143613434d1a7788fa598f9
+// 764f741eb77a7ad565dc8d99ce2837d5542e8aee
index d06134ac6245f5fbb055b490873e5162b7f7280b..cece70231138855972c5f471b58f3e8632cc1d1a 100644 (file)
@@ -339,7 +339,7 @@ static inline void audit_uring_entry(u8 op)
 }
 static inline void audit_uring_exit(int success, long code)
 {
-       if (unlikely(!audit_dummy_context()))
+       if (unlikely(audit_context()))
                __audit_uring_exit(success, code);
 }
 static inline void audit_syscall_entry(int major, unsigned long a0,
index 87ce24d238f3459e3174805282b7cf37e38624df..2bd073fa6bb53ebd0efad023e3ba8c836c56b691 100644 (file)
@@ -17,8 +17,6 @@
 #include <linux/backing-dev-defs.h>
 #include <linux/slab.h>
 
-struct blkcg;
-
 static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
 {
        kref_get(&bdi->refcnt);
@@ -154,7 +152,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css,
                                    gfp_t gfp);
 void wb_memcg_offline(struct mem_cgroup *memcg);
-void wb_blkcg_offline(struct blkcg *blkcg);
+void wb_blkcg_offline(struct cgroup_subsys_state *css);
 
 /**
  * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -378,7 +376,7 @@ static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 {
 }
 
-static inline void wb_blkcg_offline(struct blkcg *blkcg)
+static inline void wb_blkcg_offline(struct cgroup_subsys_state *css)
 {
 }
 
index 00450fd86bb4373665681b02a8ac1d9cf539ddc5..1cf3738ef1ea6d794271bc619a3072c1474ce46a 100644 (file)
@@ -408,9 +408,7 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
                             unsigned int opf, gfp_t gfp_mask,
                             struct bio_set *bs);
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
-               unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
+struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
 extern void bio_put(struct bio *);
 
 struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
@@ -785,6 +783,12 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
                bio->bi_opf |= REQ_NOWAIT;
 }
 
+static inline void bio_clear_polled(struct bio *bio)
+{
+       /* can't support alloc cache if we turn off polling */
+       bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE);
+}
+
 struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
                unsigned int nr_pages, unsigned int opf, gfp_t gfp);
 
index 652cd05b0924c3d687716a0f4bb42e5c7f8d331e..9f40dbc65f82c0e8ad44eaefb7b25a27accc5bf3 100644 (file)
  *                   Nauman Rafique <nauman@google.com>
  */
 
-#include <linux/cgroup.h>
-#include <linux/percpu.h>
-#include <linux/percpu_counter.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/seq_file.h>
-#include <linux/radix-tree.h>
-#include <linux/blkdev.h>
-#include <linux/atomic.h>
-#include <linux/kthread.h>
-#include <linux/fs.h>
+#include <linux/types.h>
+
+struct bio;
+struct cgroup_subsys_state;
+struct request_queue;
 
 #define FC_APPID_LEN              129
 
 #ifdef CONFIG_BLK_CGROUP
-
-enum blkg_iostat_type {
-       BLKG_IOSTAT_READ,
-       BLKG_IOSTAT_WRITE,
-       BLKG_IOSTAT_DISCARD,
-
-       BLKG_IOSTAT_NR,
-};
-
-struct blkcg_gq;
-struct blkg_policy_data;
-
-struct blkcg {
-       struct cgroup_subsys_state      css;
-       spinlock_t                      lock;
-       refcount_t                      online_pin;
-
-       struct radix_tree_root          blkg_tree;
-       struct blkcg_gq __rcu           *blkg_hint;
-       struct hlist_head               blkg_list;
-
-       struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
-
-       struct list_head                all_blkcgs_node;
-#ifdef CONFIG_BLK_CGROUP_FC_APPID
-       char                            fc_app_id[FC_APPID_LEN];
-#endif
-#ifdef CONFIG_CGROUP_WRITEBACK
-       struct list_head                cgwb_list;
-#endif
-};
-
-struct blkg_iostat {
-       u64                             bytes[BLKG_IOSTAT_NR];
-       u64                             ios[BLKG_IOSTAT_NR];
-};
-
-struct blkg_iostat_set {
-       struct u64_stats_sync           sync;
-       struct blkg_iostat              cur;
-       struct blkg_iostat              last;
-};
-
-/* association between a blk cgroup and a request queue */
-struct blkcg_gq {
-       /* Pointer to the associated request_queue */
-       struct request_queue            *q;
-       struct list_head                q_node;
-       struct hlist_node               blkcg_node;
-       struct blkcg                    *blkcg;
-
-       /* all non-root blkcg_gq's are guaranteed to have access to parent */
-       struct blkcg_gq                 *parent;
-
-       /* reference count */
-       struct percpu_ref               refcnt;
-
-       /* is this blkg online? protected by both blkcg and q locks */
-       bool                            online;
-
-       struct blkg_iostat_set __percpu *iostat_cpu;
-       struct blkg_iostat_set          iostat;
-
-       struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
-
-       spinlock_t                      async_bio_lock;
-       struct bio_list                 async_bios;
-       union {
-               struct work_struct      async_bio_work;
-               struct work_struct      free_work;
-       };
-
-       atomic_t                        use_delay;
-       atomic64_t                      delay_nsec;
-       atomic64_t                      delay_start;
-       u64                             last_delay;
-       int                             last_use;
-
-       struct rcu_head                 rcu_head;
-};
-
 extern struct cgroup_subsys_state * const blkcg_root_css;
 
-void blkcg_destroy_blkgs(struct blkcg *blkcg);
 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
 void blkcg_maybe_throttle_current(void);
-
-static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
-{
-       return css ? container_of(css, struct blkcg, css) : NULL;
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, %NULL if not associated.
- * Callers are expected to either handle %NULL or know association has been
- * done prior to calling this.
- */
-static inline struct blkcg *bio_blkcg(struct bio *bio)
-{
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return NULL;
-}
-
-static inline bool blk_cgroup_congested(void)
-{
-       struct cgroup_subsys_state *css;
-       bool ret = false;
-
-       rcu_read_lock();
-       css = kthread_blkcg();
-       if (!css)
-               css = task_css(current, io_cgrp_id);
-       while (css) {
-               if (atomic_read(&css->cgroup->congestion_count)) {
-                       ret = true;
-                       break;
-               }
-               css = css->parent;
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * blkcg_parent - get the parent of a blkcg
- * @blkcg: blkcg of interest
- *
- * Return the parent blkcg of @blkcg.  Can be called anytime.
- */
-static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
-{
-       return css_to_blkcg(blkcg->css.parent);
-}
-
-/**
- * blkcg_pin_online - pin online state
- * @blkcg: blkcg of interest
- *
- * While pinned, a blkcg is kept online.  This is primarily used to
- * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
- * while an associated cgwb is still active.
- */
-static inline void blkcg_pin_online(struct blkcg *blkcg)
-{
-       refcount_inc(&blkcg->online_pin);
-}
-
-/**
- * blkcg_unpin_online - unpin online state
- * @blkcg: blkcg of interest
- *
- * This is primarily used to impedance-match blkg and cgwb lifetimes so
- * that blkg doesn't go offline while an associated cgwb is still active.
- * When this count goes to zero, all active cgwbs have finished so the
- * blkcg can continue destruction by calling blkcg_destroy_blkgs().
- */
-static inline void blkcg_unpin_online(struct blkcg *blkcg)
-{
-       do {
-               if (!refcount_dec_and_test(&blkcg->online_pin))
-                       break;
-               blkcg_destroy_blkgs(blkcg);
-               blkcg = blkcg_parent(blkcg);
-       } while (blkcg);
-}
+bool blk_cgroup_congested(void);
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css);
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css);
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio);
 
 #else  /* CONFIG_BLK_CGROUP */
 
-struct blkcg {
-};
-
-struct blkcg_gq {
-};
-
 #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 
 static inline void blkcg_maybe_throttle_current(void) { }
 static inline bool blk_cgroup_congested(void) { return false; }
-
-#ifdef CONFIG_BLOCK
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
-static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
-#endif /* CONFIG_BLOCK */
-
-#endif /* CONFIG_BLK_CGROUP */
-
-#ifdef CONFIG_BLK_CGROUP_FC_APPID
-/*
- * Sets the fc_app_id field associted to blkcg
- * @app_id: application identifier
- * @cgrp_id: cgroup id
- * @app_id_len: size of application identifier
- */
-static inline int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
-{
-       struct cgroup *cgrp;
-       struct cgroup_subsys_state *css;
-       struct blkcg *blkcg;
-       int ret  = 0;
-
-       if (app_id_len > FC_APPID_LEN)
-               return -EINVAL;
-
-       cgrp = cgroup_get_from_id(cgrp_id);
-       if (!cgrp)
-               return -ENOENT;
-       css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
-       if (!css) {
-               ret = -ENOENT;
-               goto out_cgrp_put;
-       }
-       blkcg = css_to_blkcg(css);
-       /*
-        * There is a slight race condition on setting the appid.
-        * Worst case an I/O may not find the right id.
-        * This is no different from the I/O we let pass while obtaining
-        * the vmid from the fabric.
-        * Adding the overhead of a lock is not necessary.
-        */
-       strlcpy(blkcg->fc_app_id, app_id, app_id_len);
-       css_put(css);
-out_cgrp_put:
-       cgroup_put(cgrp);
-       return ret;
-}
-
-/**
- * blkcg_get_fc_appid - get the fc app identifier associated with a bio
- * @bio: target bio
- *
- * On success return the fc_app_id, on failure return NULL
- */
-static inline char *blkcg_get_fc_appid(struct bio *bio)
+static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
 {
-       if (bio && bio->bi_blkg &&
-               (bio->bi_blkg->blkcg->fc_app_id[0] != '\0'))
-               return bio->bi_blkg->blkcg->fc_app_id;
        return NULL;
 }
-#else
-static inline int blkcg_set_fc_appid(char *buf, u64 id, size_t len) { return -EINVAL; }
-static inline char *blkcg_get_fc_appid(struct bio *bio) { return NULL; }
-#endif /*CONFIG_BLK_CGROUP_FC_APPID*/
+#endif /* CONFIG_BLK_CGROUP */
+
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len);
+char *blkcg_get_fc_appid(struct bio *bio);
+
 #endif /* _BLK_CGROUP_H */
index 1973ef9bd40fcfbdff0bc7817272e8963f59c80a..c007d58d2703b29415037b94c7bc480635e1f35c 100644 (file)
@@ -44,7 +44,7 @@ struct block_device {
        unsigned long           bd_stamp;
        bool                    bd_read_only;   /* read-only policy */
        dev_t                   bd_dev;
-       int                     bd_openers;
+       atomic_t                bd_openers;
        struct inode *          bd_inode;       /* will die */
        struct super_block *    bd_super;
        void *                  bd_claiming;
@@ -246,9 +246,8 @@ typedef unsigned int blk_qc_t;
 struct bio {
        struct bio              *bi_next;       /* request queue link */
        struct block_device     *bi_bdev;
-       unsigned int            bi_opf;         /* bottom bits req flags,
-                                                * top bits REQ_OP. Use
-                                                * accessors.
+       unsigned int            bi_opf;         /* bottom bits REQ_OP, top bits
+                                                * req_flags.
                                                 */
        unsigned short          bi_flags;       /* BIO_* below */
        unsigned short          bi_ioprio;
@@ -329,7 +328,6 @@ enum {
        BIO_QOS_MERGED,         /* but went through rq_qos merge path */
        BIO_REMAPPED,
        BIO_ZONE_WRITE_LOCKED,  /* Owns a zoned device zone write lock */
-       BIO_PERCPU_CACHE,       /* can participate in per-cpu alloc cache */
        BIO_FLAG_LAST
 };
 
@@ -409,15 +407,17 @@ enum req_flag_bits {
         * work item to avoid such priority inversions.
         */
        __REQ_CGROUP_PUNT,
+       __REQ_POLLED,           /* caller polls for completion using bio_poll */
+       __REQ_ALLOC_CACHE,      /* allocate IO from cache if available */
+       __REQ_SWAP,             /* swap I/O */
+       __REQ_DRV,              /* for driver use */
 
-       /* command specific flags for REQ_OP_WRITE_ZEROES: */
+       /*
+        * Command specific flags, keep last:
+        */
+       /* for REQ_OP_WRITE_ZEROES: */
        __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 
-       __REQ_POLLED,           /* caller polls for completion using bio_poll */
-
-       /* for driver use */
-       __REQ_DRV,
-       __REQ_SWAP,             /* swapping request. */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -439,6 +439,7 @@ enum req_flag_bits {
 
 #define REQ_NOUNMAP            (1ULL << __REQ_NOUNMAP)
 #define REQ_POLLED             (1ULL << __REQ_POLLED)
+#define REQ_ALLOC_CACHE                (1ULL << __REQ_ALLOC_CACHE)
 
 #define REQ_DRV                        (1ULL << __REQ_DRV)
 #define REQ_SWAP               (1ULL << __REQ_SWAP)
index 60d01613899711c3fedca8001fbc0571bd32f096..5bdf2ac9142c95a8d66d4fbd75eba47acd77142b 100644 (file)
@@ -176,6 +176,21 @@ static inline bool disk_live(struct gendisk *disk)
        return !inode_unhashed(disk->part0->bd_inode);
 }
 
+/**
+ * disk_openers - returns how many openers are there for a disk
+ * @disk: disk to check
+ *
+ * This returns the number of openers for a disk.  Note that this value is only
+ * stable if disk->open_mutex is held.
+ *
+ * Note: Due to a quirk in the block layer open code, each open partition is
+ * only counted once even if there are multiple openers.
+ */
+static inline unsigned int disk_openers(struct gendisk *disk)
+{
+       return atomic_read(&disk->part0->bd_openers);
+}
+
 /*
  * The gendisk is refcounted by the part0 block_device, and the bd_device
  * therein is also used for device model presentation in sysfs.
@@ -248,6 +263,7 @@ struct queue_limits {
        unsigned int            io_opt;
        unsigned int            max_discard_sectors;
        unsigned int            max_hw_discard_sectors;
+       unsigned int            max_secure_erase_sectors;
        unsigned int            max_write_zeroes_sectors;
        unsigned int            max_zone_append_sectors;
        unsigned int            discard_granularity;
@@ -540,10 +556,8 @@ struct request_queue {
 #define QUEUE_FLAG_NONROT      6       /* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT                QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     7       /* do disk/partitions IO accounting */
-#define QUEUE_FLAG_DISCARD     8       /* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   9       /* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM  10      /* Contributes to random pool */
-#define QUEUE_FLAG_SECERASE    11      /* supports secure erase */
 #define QUEUE_FLAG_SAME_FORCE  12      /* force complete on same CPU */
 #define QUEUE_FLAG_DEAD                13      /* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   14      /* queue is initialized */
@@ -582,11 +596,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
        test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
 #define blk_queue_io_stat(q)   test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)        test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_discard(q)   test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 #define blk_queue_zone_resetall(q)     \
        test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
-#define blk_queue_secure_erase(q) \
-       (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)       test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
 #define blk_queue_pci_p2pdma(q)        \
        test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
@@ -602,7 +613,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
                             REQ_FAILFAST_DRIVER))
 #define blk_queue_quiesced(q)  test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 #define blk_queue_pm_only(q)   atomic_read(&(q)->pm_only)
-#define blk_queue_fua(q)       test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)        test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)    test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
 
@@ -950,6 +960,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_discard_segments(struct request_queue *,
                unsigned short);
+void blk_queue_max_secure_erase_sectors(struct request_queue *q,
+               unsigned int max_sectors);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
                unsigned int max_discard_sectors);
@@ -1090,13 +1102,12 @@ static inline long nr_blockdev_pages(void)
 
 extern void blk_io_schedule(void);
 
-#define BLKDEV_DISCARD_SECURE  (1 << 0)        /* issue a secure erase */
-
-extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, int flags,
-               struct bio **biop);
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask);
+int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask, struct bio **biop);
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp);
 
 #define BLKDEV_ZERO_NOUNMAP    (1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK (1 << 1)  /* don't write explicit zeroes */
@@ -1115,7 +1126,7 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block,
                                              SECTOR_SHIFT),
                                    nr_blocks << (sb->s_blocksize_bits -
                                                  SECTOR_SHIFT),
-                                   gfp_mask, flags);
+                                   gfp_mask);
 }
 static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
                sector_t nr_blocks, gfp_t gfp_mask)
@@ -1189,6 +1200,12 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu
        return min(l->max_zone_append_sectors, l->max_sectors);
 }
 
+static inline unsigned int
+bdev_max_zone_append_sectors(struct block_device *bdev)
+{
+       return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+}
+
 static inline unsigned queue_logical_block_size(const struct request_queue *q)
 {
        int retval = 512;
@@ -1246,84 +1263,54 @@ bdev_zone_write_granularity(struct block_device *bdev)
        return queue_zone_write_granularity(bdev_get_queue(bdev));
 }
 
-static inline int queue_alignment_offset(const struct request_queue *q)
-{
-       if (q->limits.misaligned)
-               return -1;
+int bdev_alignment_offset(struct block_device *bdev);
+unsigned int bdev_discard_alignment(struct block_device *bdev);
 
-       return q->limits.alignment_offset;
+static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
+{
+       return bdev_get_queue(bdev)->limits.max_discard_sectors;
 }
 
-static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
+static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
 {
-       unsigned int granularity = max(lim->physical_block_size, lim->io_min);
-       unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
-               << SECTOR_SHIFT;
+       return bdev_get_queue(bdev)->limits.discard_granularity;
+}
 
-       return (granularity + lim->alignment_offset - alignment) % granularity;
+static inline unsigned int
+bdev_max_secure_erase_sectors(struct block_device *bdev)
+{
+       return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
 }
 
-static inline int bdev_alignment_offset(struct block_device *bdev)
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
 {
        struct request_queue *q = bdev_get_queue(bdev);
 
-       if (q->limits.misaligned)
-               return -1;
-       if (bdev_is_partition(bdev))
-               return queue_limit_alignment_offset(&q->limits,
-                               bdev->bd_start_sect);
-       return q->limits.alignment_offset;
+       if (q)
+               return q->limits.max_write_zeroes_sectors;
+
+       return 0;
 }
 
-static inline int queue_discard_alignment(const struct request_queue *q)
+static inline bool bdev_nonrot(struct block_device *bdev)
 {
-       if (q->limits.discard_misaligned)
-               return -1;
-
-       return q->limits.discard_alignment;
+       return blk_queue_nonrot(bdev_get_queue(bdev));
 }
 
-static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
+static inline bool bdev_stable_writes(struct block_device *bdev)
 {
-       unsigned int alignment, granularity, offset;
-
-       if (!lim->max_discard_sectors)
-               return 0;
-
-       /* Why are these in bytes, not sectors? */
-       alignment = lim->discard_alignment >> SECTOR_SHIFT;
-       granularity = lim->discard_granularity >> SECTOR_SHIFT;
-       if (!granularity)
-               return 0;
-
-       /* Offset of the partition start in 'granularity' sectors */
-       offset = sector_div(sector, granularity);
-
-       /* And why do we do this modulus *again* in blkdev_issue_discard()? */
-       offset = (granularity + alignment - offset) % granularity;
-
-       /* Turn it back into bytes, gaah */
-       return offset << SECTOR_SHIFT;
+       return test_bit(QUEUE_FLAG_STABLE_WRITES,
+                       &bdev_get_queue(bdev)->queue_flags);
 }
 
-static inline int bdev_discard_alignment(struct block_device *bdev)
+static inline bool bdev_write_cache(struct block_device *bdev)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       if (bdev_is_partition(bdev))
-               return queue_limit_discard_alignment(&q->limits,
-                               bdev->bd_start_sect);
-       return q->limits.discard_alignment;
+       return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags);
 }
 
-static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+static inline bool bdev_fua(struct block_device *bdev)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       if (q)
-               return q->limits.max_write_zeroes_sectors;
-
-       return 0;
+       return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
 }
 
 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
@@ -1491,9 +1478,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
                wake_up_process(waiter);
 }
 
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
-               unsigned int op);
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+                                unsigned int sectors, unsigned int op,
+                                unsigned long start_time);
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
                unsigned long start_time);
 
 void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
index 22501a293fa545b4d744ed6dc054f133fc8e799b..623e22492afa50de750593485e0e264519c44779 100644 (file)
@@ -27,12 +27,10 @@ struct blk_trace {
        atomic_t dropped;
 };
 
-struct blkcg;
-
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern __printf(3, 4)
-void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *fmt, ...);
+__printf(3, 4) void __blk_trace_note_message(struct blk_trace *bt,
+               struct cgroup_subsys_state *css, const char *fmt, ...);
 
 /**
  * blk_add_trace_msg - Add a (simple) message to the blktrace stream
@@ -47,14 +45,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
  *     NOTE: Can not use 'static inline' due to presence of var args...
  *
  **/
-#define blk_add_cgroup_trace_msg(q, cg, fmt, ...)                      \
+#define blk_add_cgroup_trace_msg(q, css, fmt, ...)                     \
        do {                                                            \
                struct blk_trace *bt;                                   \
                                                                        \
                rcu_read_lock();                                        \
                bt = rcu_dereference((q)->blk_trace);                   \
                if (unlikely(bt))                                       \
-                       __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
+                       __blk_trace_note_message(bt, css, fmt, ##__VA_ARGS__);\
                rcu_read_unlock();                                      \
        } while (0)
 #define blk_add_trace_msg(q, fmt, ...)                                 \
index bdb5298735ce93d164ab59bdddf1a41cc0761e3b..ecc3d3ec41cf3ea25f5a6cba3544810981fcbcfa 100644 (file)
@@ -2085,6 +2085,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
                                       struct net_device *netdev);
 bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
 
+void unpriv_ebpf_notify(int new_state);
+
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
index efd8205282da74e12c4cb3e1fcb99d899f8fed64..cb0d6cd1c12f24e1dd8681b5f9f0302675bec7d5 100644 (file)
@@ -72,6 +72,24 @@ enum cc_attr {
         * Examples include TDX guest & SEV.
         */
        CC_ATTR_GUEST_UNROLL_STRING_IO,
+
+       /**
+        * @CC_ATTR_SEV_SNP: Guest SNP is active.
+        *
+        * The platform/OS is running as a guest/virtual machine and actively
+        * using AMD SEV-SNP features.
+        */
+       CC_ATTR_GUEST_SEV_SNP,
+
+       /**
+        * @CC_ATTR_HOTPLUG_DISABLED: Hotplug is not supported or disabled.
+        *
+        * The platform/OS is running as a guest/virtual machine does not
+        * support CPU hotplug feature.
+        *
+        * Examples include TDX Guest.
+        */
+       CC_ATTR_HOTPLUG_DISABLED,
 };
 
 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
index 0a89f111e00e1c730c283785a5155f4ceaf02d2d..67caa909e3e615670ec415376bc59d57f64ad15d 100644 (file)
@@ -77,7 +77,6 @@ struct cdrom_device_ops {
        int (*tray_move) (struct cdrom_device_info *, int);
        int (*lock_door) (struct cdrom_device_info *, int);
        int (*select_speed) (struct cdrom_device_info *, int);
-       int (*select_disc) (struct cdrom_device_info *, int);
        int (*get_last_session) (struct cdrom_device_info *,
                                 struct cdrom_multisession *);
        int (*get_mcn) (struct cdrom_device_info *,
index 3431011f364dd4cec482bb58b93c345fb815b276..cba8a6ffc3290d068d51f5d734c3547e4de23c4f 100644 (file)
@@ -287,6 +287,9 @@ struct ceph_osd_linger_request {
        rados_watcherrcb_t errcb;
        void *data;
 
+       struct ceph_pagelist *request_pl;
+       struct page **notify_id_pages;
+
        struct page ***preply_pages;
        size_t *preply_len;
 };
index 1c758b0e03598f672267110fa99bc447446896a1..01fddf72a81f081d4392b56aa3734123f8fa7a29 100644 (file)
@@ -235,6 +235,7 @@ typedef struct compat_siginfo {
                                struct {
                                        compat_ulong_t _data;
                                        u32 _type;
+                                       u32 _flags;
                                } _perf;
                        };
                } _sigfault;
index babb1347148c5d6327601aac1d569b63825d370b..c84fec767445d69f5fadf1a2687d0c99da2b8ce4 100644 (file)
 #define __nocfi                __attribute__((__no_sanitize__("cfi")))
 #define __cficanonical __attribute__((__cfi_canonical_jump_table__))
 
+#if defined(CONFIG_CFI_CLANG)
+/*
+ * With CONFIG_CFI_CLANG, the compiler replaces function address
+ * references with the address of the function's CFI jump table
+ * entry. The function_nocfi macro always returns the address of the
+ * actual function instead.
+ */
+#define function_nocfi(x)      __builtin_function_start(x)
+#endif
+
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
index 52299c957c98e6fd626ffc995c1e3e1787d2dc77..a0c55eeaeaf163460866f9726c267d14c329857d 100644 (file)
                __builtin_unreachable();        \
        } while (0)
 
-#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
-#define __randomize_layout __attribute__((randomize_layout))
-#define __no_randomize_layout __attribute__((no_randomize_layout))
-/* This anon struct can add padding, so only enable it under randstruct. */
-#define randomized_struct_fields_start struct {
-#define randomized_struct_fields_end   } __randomize_layout;
-#endif
-
 /*
  * GCC 'asm goto' miscompiles certain code sequences:
  *
index 219aa5ddbc73d2914bae4b8e2164b00e916f039f..01ce94b58b423e6021bf007edb0fba6bce3cf141 100644 (file)
@@ -109,7 +109,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #endif
 
 /* Unreachable code */
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
 /*
  * These macros help objtool understand GCC code flow for unreachable code.
  * The __COUNTER__ based labels are a hack to make each instance of the macros
@@ -128,10 +128,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 /* Annotate a C jump table to allow objtool to follow the code flow */
 #define __annotate_jump_table __section(".rodata..c_jump_table")
 
-#else
+#else /* !CONFIG_OBJTOOL */
 #define annotate_unreachable()
 #define __annotate_jump_table
-#endif
+#endif /* CONFIG_OBJTOOL */
 
 #ifndef unreachable
 # define unreachable() do {            \
index 1c2c33ae1b37da8416b2b0a8613fe892490ae286..d08dfcb0ac6876b1124b17d3cab35de008696d75 100644 (file)
@@ -242,15 +242,15 @@ struct ftrace_likely_data {
 # define __latent_entropy
 #endif
 
-#ifndef __randomize_layout
+#if defined(RANDSTRUCT) && !defined(__CHECKER__)
+# define __randomize_layout __designated_init __attribute__((randomize_layout))
+# define __no_randomize_layout __attribute__((no_randomize_layout))
+/* This anon struct can add padding, so only enable it under randstruct. */
+# define randomized_struct_fields_start        struct {
+# define randomized_struct_fields_end  } __randomize_layout;
+#else
 # define __randomize_layout __designated_init
-#endif
-
-#ifndef __no_randomize_layout
 # define __no_randomize_layout
-#endif
-
-#ifndef randomized_struct_fields_start
 # define randomized_struct_fields_start
 # define randomized_struct_fields_end
 #endif
index 6a511a1078ca069c4fa0e120b781c4203571afc8..eacb7dd7b3af34a6ea82c17eda3752b4cfb072b2 100644 (file)
@@ -558,6 +558,7 @@ extern const char *const cper_proc_error_type_strs[4];
 u64 cper_next_record_id(void);
 const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
+const char *cper_mem_err_status_str(u64 status);
 void cper_print_bits(const char *prefix, unsigned int bits,
                     const char * const strs[], unsigned int strs_size);
 void cper_mem_err_pack(const struct cper_sec_mem_err *,
@@ -568,5 +569,7 @@ void cper_print_proc_arm(const char *pfx,
                         const struct cper_sec_proc_arm *proc);
 void cper_print_proc_ia(const char *pfx,
                        const struct cper_sec_proc_ia *proc);
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);
 
 #endif
index 35c7d6db4139e46df2d8da6b8a0e147c012fc71d..d5595d57f4e53680a7e8b2a6e1bf6435a7c14501 100644 (file)
@@ -1199,7 +1199,6 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
                        struct cpufreq_governor *old_gov) { }
 #endif
 
-extern void arch_freq_prepare_all(void);
 extern unsigned int arch_freq_get_on_cpu(int cpu);
 
 #ifndef arch_set_freq_scale
index 82e33137f917fb70f055bfd8b823b700d4e2d5be..b66c5f38915951f5437c7decf5f1e7f7364ce374 100644 (file)
@@ -222,6 +222,7 @@ enum cpuhp_state {
        CPUHP_AP_PERF_S390_SF_ONLINE,
        CPUHP_AP_PERF_ARM_CCI_ONLINE,
        CPUHP_AP_PERF_ARM_CCN_ONLINE,
+       CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
        CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
        CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
        CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
index 142474b4af9637e75323da69f08dd05ee6e605d1..dc10bee75a729246e5dd176941a3aa78b4e3ef74 100644 (file)
@@ -38,6 +38,7 @@ enum devfreq_timer {
 
 struct devfreq;
 struct devfreq_governor;
+struct devfreq_cpu_data;
 struct thermal_cooling_device;
 
 /**
@@ -288,6 +289,11 @@ struct devfreq_simple_ondemand_data {
 #endif
 
 #if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE)
+enum devfreq_parent_dev_type {
+       DEVFREQ_PARENT_DEV,
+       CPUFREQ_PARENT_DEV,
+};
+
 /**
  * struct devfreq_passive_data - ``void *data`` fed to struct devfreq
  *     and devfreq_add_device
@@ -299,8 +305,11 @@ struct devfreq_simple_ondemand_data {
  *                     using governors except for passive governor.
  *                     If the devfreq device has the specific method to decide
  *                     the next frequency, should use this callback.
- * @this:      the devfreq instance of own device.
- * @nb:                the notifier block for DEVFREQ_TRANSITION_NOTIFIER list
+ * @parent_type:       the parent type of the device.
+ * @this:              the devfreq instance of own device.
+ * @nb:                        the notifier block for DEVFREQ_TRANSITION_NOTIFIER or
+ *                     CPUFREQ_TRANSITION_NOTIFIER list.
+ * @cpu_data_list:     the list of cpu frequency data for all cpufreq_policy.
  *
  * The devfreq_passive_data have to set the devfreq instance of parent
  * device with governors except for the passive governor. But, don't need to
@@ -314,9 +323,13 @@ struct devfreq_passive_data {
        /* Optional callback to decide the next frequency of passvice device */
        int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
 
+       /* Should set the type of parent device */
+       enum devfreq_parent_dev_type parent_type;
+
        /* For passive governor's internal use. Don't need to set them */
        struct devfreq *this;
        struct notifier_block nb;
+       struct list_head cpu_data_list;
 };
 #endif
 
index ccd4d3f91c98c247cb772df2a94e104b231a6bf7..db424f3dc3f2fc3550ffceceef226439a0cd2c8c 100644 (file)
@@ -213,6 +213,8 @@ struct capsule_info {
        size_t                  page_bytes_remain;
 };
 
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+                           size_t hdr_bytes);
 int __efi_capsule_setup_info(struct capsule_info *cap_info);
 
 /*
@@ -383,6 +385,7 @@ void efi_native_runtime_setup(void);
 #define EFI_LOAD_FILE_PROTOCOL_GUID            EFI_GUID(0x56ec3091, 0x954c, 0x11d2,  0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
 #define EFI_LOAD_FILE2_PROTOCOL_GUID           EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e,  0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
 #define EFI_RT_PROPERTIES_TABLE_GUID           EFI_GUID(0xeb66918a, 0x7eef, 0x402a,  0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
+#define EFI_DXE_SERVICES_TABLE_GUID            EFI_GUID(0x05ad34ba, 0x6f02, 0x4214,  0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
 
 #define EFI_IMAGE_SECURITY_DATABASE_GUID       EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596,  0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
 #define EFI_SHIM_LOCK_GUID                     EFI_GUID(0x605dab50, 0xe046, 0x4300,  0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
@@ -390,6 +393,7 @@ void efi_native_runtime_setup(void);
 #define EFI_CERT_SHA256_GUID                   EFI_GUID(0xc1c41626, 0x504c, 0x4092, 0xac, 0xa9, 0x41, 0xf9, 0x36, 0x93, 0x43, 0x28)
 #define EFI_CERT_X509_GUID                     EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72)
 #define EFI_CERT_X509_SHA256_GUID              EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed)
+#define EFI_CC_BLOB_GUID                       EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
 
 /*
  * This GUID is used to pass to the kernel proper the struct screen_info
@@ -405,6 +409,20 @@ void efi_native_runtime_setup(void);
 #define LINUX_EFI_MEMRESERVE_TABLE_GUID                EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5,  0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
 #define LINUX_EFI_INITRD_MEDIA_GUID            EFI_GUID(0x5568e427, 0x68fc, 0x4f3d,  0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68)
 #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID      EFI_GUID(0xc451ed2b, 0x9694, 0x45d3,  0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
+#define LINUX_EFI_COCO_SECRET_AREA_GUID                EFI_GUID(0xadf956ad, 0xe98c, 0x484c,  0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47)
+
+#define RISCV_EFI_BOOT_PROTOCOL_GUID           EFI_GUID(0xccd15fec, 0x6f73, 0x4eec,  0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf)
+
+/*
+ * This GUID may be installed onto the kernel image's handle as a NULL protocol
+ * to signal to the stub that the placement of the image should be respected,
+ * and moving the image in physical memory is undesirable. To ensure
+ * compatibility with 64k pages kernels with virtually mapped stacks, and to
+ * avoid defeating physical randomization, this protocol should only be
+ * installed if the image was placed at a randomized 128k aligned address in
+ * memory.
+ */
+#define LINUX_EFI_LOADED_IMAGE_FIXED_GUID      EFI_GUID(0xf5a37b6d, 0x3344, 0x42a5,  0xb6, 0xbb, 0x97, 0x86, 0x48, 0xc1, 0x89, 0x0a)
 
 /* OEM GUIDs */
 #define DELLEMC_EFI_RCI2_TABLE_GUID            EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
@@ -435,6 +453,7 @@ typedef struct {
 } efi_config_table_type_t;
 
 #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
+#define EFI_DXE_SERVICES_TABLE_SIGNATURE ((u64)0x565245535f455844ULL)
 
 #define EFI_2_30_SYSTEM_TABLE_REVISION  ((2 << 16) | (30))
 #define EFI_2_20_SYSTEM_TABLE_REVISION  ((2 << 16) | (20))
@@ -596,6 +615,7 @@ extern struct efi {
        unsigned long                   tpm_log;                /* TPM2 Event Log table */
        unsigned long                   tpm_final_log;          /* TPM2 Final Events Log table */
        unsigned long                   mokvar_table;           /* MOK variable config table */
+       unsigned long                   coco_secret;            /* Confidential computing secret table */
 
        efi_get_time_t                  *get_time;
        efi_set_time_t                  *set_time;
@@ -1335,4 +1355,12 @@ extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
 static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { }
 #endif
 
+struct linux_efi_coco_secret_area {
+       u64     base_pa;
+       u64     size;
+};
+
+/* Header of a populated EFI secret area */
+#define EFI_SECRET_TABLE_HEADER_GUID   EFI_GUID(0x1e74f542, 0x71dd, 0x4d66,  0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b)
+
 #endif /* _LINUX_EFI_H */
index f8e206e82476c30f067ac96f0ca4036dc693638c..346a8b56cdc831f798b0c59eab56160446d6de74 100644 (file)
@@ -84,15 +84,6 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re
 #endif
 }
 
-static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
-{
-#ifdef ELF_CORE_COPY_KERNEL_REGS
-       ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
-#else
-       elf_core_copy_regs(elfregs, regs);
-#endif
-}
-
 static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
 {
 #if defined (ELF_CORE_COPY_TASK_REGS)
index 9f3c400bc52d4346d368d80072a8c205dadc7590..8419bffb4398f8ce569bacf255fd343b3e02c8fc 100644 (file)
@@ -67,11 +67,16 @@ struct em_perf_domain {
  *
  *  EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
  *  energy consumption.
+ *
+ *  EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be
+ *  created by platform missing real power information
  */
 #define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
 #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
+#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2)
 
 #define em_span_cpus(em) (to_cpumask((em)->cpus))
+#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL)
 
 #ifdef CONFIG_ENERGY_MODEL
 #define EM_MAX_POWER 0xFFFF
@@ -96,11 +101,11 @@ struct em_data_callback {
        /**
         * active_power() - Provide power at the next performance state of
         *              a device
+        * @dev         : Device for which we do this operation (can be a CPU)
         * @power       : Active power at the performance state
         *              (modified)
         * @freq        : Frequency at the performance state in kHz
         *              (modified)
-        * @dev         : Device for which we do this operation (can be a CPU)
         *
         * active_power() must find the lowest performance state of 'dev' above
         * 'freq' and update 'power' and 'freq' to the matching active power
@@ -112,11 +117,32 @@ struct em_data_callback {
         *
         * Return 0 on success.
         */
-       int (*active_power)(unsigned long *power, unsigned long *freq,
-                           struct device *dev);
+       int (*active_power)(struct device *dev, unsigned long *power,
+                           unsigned long *freq);
+
+       /**
+        * get_cost() - Provide the cost at the given performance state of
+        *              a device
+        * @dev         : Device for which we do this operation (can be a CPU)
+        * @freq        : Frequency at the performance state in kHz
+        * @cost        : The cost value for the performance state
+        *              (modified)
+        *
+        * In case of CPUs, the cost is the one of a single CPU in the domain.
+        * It is expected to fit in the [0, EM_MAX_POWER] range due to internal
+        * usage in EAS calculation.
+        *
+        * Return 0 on success, or appropriate error value in case of failure.
+        */
+       int (*get_cost)(struct device *dev, unsigned long freq,
+                       unsigned long *cost);
 };
-#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
 #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb)
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb)     \
+       { .active_power = _active_power_cb,             \
+         .get_cost = _cost_cb }
+#define EM_DATA_CB(_active_power_cb)                   \
+               EM_ADV_DATA_CB(_active_power_cb, NULL)
 
 struct em_perf_domain *em_cpu_get(int cpu);
 struct em_perf_domain *em_pd_get(struct device *dev);
@@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
 
 #else
 struct em_data_callback {};
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
 #define EM_DATA_CB(_active_power_cb) { }
 #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0)
 
index ab78bd4c2eb0fa4ced448083d09dfb096d005bf3..c92ac75d6556dd6ea901460fda5a7bb2076c2533 100644 (file)
@@ -63,7 +63,7 @@
         ARCH_EXIT_TO_USER_MODE_WORK)
 
 /**
- * arch_check_user_regs - Architecture specific sanity check for user mode regs
+ * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
  * @regs:      Pointer to currents pt_regs
  *
  * Defaults to an empty implementation. Can be replaced by architecture
  * section. Use __always_inline so the compiler cannot push it out of line
  * and make it instrumentable.
  */
-static __always_inline void arch_check_user_regs(struct pt_regs *regs);
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs);
 
-#ifndef arch_check_user_regs
-static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
+#ifndef arch_enter_from_user_mode
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
 #endif
 
 /**
index bbde95387a23af8daf91f0926069a2090c35bd0f..87b5af1d9fbe037dbcbe404547fac061544c3abb 100644 (file)
@@ -1953,6 +1953,7 @@ struct dir_context {
 #define REMAP_FILE_ADVISORY            (REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
+struct io_uring_cmd;
 
 struct file_operations {
        struct module *owner;
@@ -1995,6 +1996,7 @@ struct file_operations {
                                   struct file *file_out, loff_t pos_out,
                                   loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
+       int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 } __randomize_layout;
 
 struct inode_operations {
index 50d92d805bd8c996528f257c87ecec87694ddfd0..e60d57c99cb6f20dcbcc38ad10ee1a0320b89de1 100644 (file)
 #include <linux/slab.h>
 #include <uapi/linux/fscrypt.h>
 
-#define FS_CRYPTO_BLOCK_SIZE           16
+/*
+ * The lengths of all file contents blocks must be divisible by this value.
+ * This is needed to ensure that all contents encryption modes will work, as
+ * some of the supported modes don't support arbitrarily byte-aligned messages.
+ *
+ * Since the needed alignment is 16 bytes, most filesystems will meet this
+ * requirement naturally, as typical block sizes are powers of 2.  However, if a
+ * filesystem can generate arbitrarily byte-aligned block lengths (e.g., via
+ * compression), then it will need to pad to this alignment before encryption.
+ */
+#define FSCRYPT_CONTENTS_ALIGNMENT 16
 
 union fscrypt_policy;
 struct fscrypt_info;
+struct fs_parameter;
 struct seq_file;
 
 struct fscrypt_str {
@@ -279,10 +290,19 @@ struct fscrypt_dummy_policy {
        const union fscrypt_policy *policy;
 };
 
+int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param,
+                                   struct fscrypt_dummy_policy *dummy_policy);
+bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1,
+                                 const struct fscrypt_dummy_policy *p2);
 int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg,
                                struct fscrypt_dummy_policy *dummy_policy);
 void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep,
                                        struct super_block *sb);
+static inline bool
+fscrypt_is_dummy_policy_set(const struct fscrypt_dummy_policy *dummy_policy)
+{
+       return dummy_policy->policy != NULL;
+}
 static inline void
 fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
 {
@@ -293,6 +313,8 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
 /* keyring.c */
 void fscrypt_sb_free(struct super_block *sb);
 int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
+int fscrypt_add_test_dummy_key(struct super_block *sb,
+                              const struct fscrypt_dummy_policy *dummy_policy);
 int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
 int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg);
 int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
@@ -467,12 +489,32 @@ static inline int fscrypt_set_context(struct inode *inode, void *fs_data)
 struct fscrypt_dummy_policy {
 };
 
+static inline int
+fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param,
+                                   struct fscrypt_dummy_policy *dummy_policy)
+{
+       return -EINVAL;
+}
+
+static inline bool
+fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1,
+                            const struct fscrypt_dummy_policy *p2)
+{
+       return true;
+}
+
 static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq,
                                                      char sep,
                                                      struct super_block *sb)
 {
 }
 
+static inline bool
+fscrypt_is_dummy_policy_set(const struct fscrypt_dummy_policy *dummy_policy)
+{
+       return false;
+}
+
 static inline void
 fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
 {
@@ -488,6 +530,13 @@ static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg)
        return -EOPNOTSUPP;
 }
 
+static inline int
+fscrypt_add_test_dummy_key(struct super_block *sb,
+                          const struct fscrypt_dummy_policy *dummy_policy)
+{
+       return 0;
+}
+
 static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg)
 {
        return -EOPNOTSUPP;
index a7afc800bd8d27a927cce6c1eb08680d828fde38..7af030fa3c366cb18cd1ca794d65d68e95268c9a 100644 (file)
 #define _LINUX_FSVERITY_H
 
 #include <linux/fs.h>
+#include <crypto/hash_info.h>
+#include <crypto/sha2.h>
 #include <uapi/linux/fsverity.h>
 
+/*
+ * Largest digest size among all hash algorithms supported by fs-verity.
+ * Currently assumed to be <= size of fsverity_descriptor::root_hash.
+ */
+#define FS_VERITY_MAX_DIGEST_SIZE      SHA512_DIGEST_SIZE
+
 /* Verity operations for filesystems */
 struct fsverity_operations {
 
@@ -131,6 +139,9 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
 /* measure.c */
 
 int fsverity_ioctl_measure(struct file *filp, void __user *arg);
+int fsverity_get_digest(struct inode *inode,
+                       u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
+                       enum hash_algo *alg);
 
 /* open.c */
 
@@ -170,6 +181,13 @@ static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg)
        return -EOPNOTSUPP;
 }
 
+static inline int fsverity_get_digest(struct inode *inode,
+                                     u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
+                                     enum hash_algo *alg)
+{
+       return -EOPNOTSUPP;
+}
+
 /* open.c */
 
 static inline int fsverity_file_open(struct inode *inode, struct file *filp)
index 12be1601fd845c484eb381a4ac11410469410158..bcc17f95b90667d863103b98ab68049dab53de32 100644 (file)
@@ -8,14 +8,21 @@
 
 /* Helpers for Goldfish virtual platform */
 
+#ifndef gf_ioread32
+#define gf_ioread32 ioread32
+#endif
+#ifndef gf_iowrite32
+#define gf_iowrite32 iowrite32
+#endif
+
 static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
                                void __iomem *porth)
 {
        const unsigned long addr = (unsigned long)ptr;
 
-       __raw_writel(lower_32_bits(addr), portl);
+       gf_iowrite32(lower_32_bits(addr), portl);
 #ifdef CONFIG_64BIT
-       __raw_writel(upper_32_bits(addr), porth);
+       gf_iowrite32(upper_32_bits(addr), porth);
 #endif
 }
 
@@ -23,9 +30,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr,
                                     void __iomem *portl,
                                     void __iomem *porth)
 {
-       __raw_writel(lower_32_bits(addr), portl);
+       gf_iowrite32(lower_32_bits(addr), portl);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       __raw_writel(upper_32_bits(addr), porth);
+       gf_iowrite32(upper_32_bits(addr), porth);
 #endif
 }
 
index 874aabd270c9bf7a7a649a2de9d37f05b42f77a8..cb689264f3e9170599667174243e670462812b79 100644 (file)
@@ -588,6 +588,22 @@ void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset);
 void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset);
 void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset);
 
+/* irq_data versions of the above */
+int gpiochip_irq_reqres(struct irq_data *data);
+void gpiochip_irq_relres(struct irq_data *data);
+
+/* Paste this in your irq_chip structure  */
+#define        GPIOCHIP_IRQ_RESOURCE_HELPERS                                   \
+               .irq_request_resources  = gpiochip_irq_reqres,          \
+               .irq_release_resources  = gpiochip_irq_relres
+
+static inline void gpio_irq_chip_set_chip(struct gpio_irq_chip *girq,
+                                         const struct irq_chip *chip)
+{
+       /* Yes, dropping const is ugly, but it isn't like we have a choice */
+       girq->chip = (struct irq_chip *)chip;
+}
+
 /* Line status inquiry for drivers */
 bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset);
 bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset);
index a77be563020940c2449f1be0222ceef60254a4c1..337bd9f3292182d4aabc52ee855ed35c484ca136 100644 (file)
@@ -149,6 +149,11 @@ static inline void totalhigh_pages_add(long count)
        atomic_long_add(count, &_totalhigh_pages);
 }
 
+static inline bool is_kmap_addr(const void *x)
+{
+       unsigned long addr = (unsigned long)x;
+       return addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP);
+}
 #else /* CONFIG_HIGHMEM */
 
 static inline struct page *kmap_to_page(void *addr)
@@ -234,6 +239,11 @@ static inline void __kunmap_atomic(void *addr)
 static inline unsigned int nr_free_highpages(void) { return 0; }
 static inline unsigned long totalhigh_pages(void) { return 0UL; }
 
+static inline bool is_kmap_addr(const void *x)
+{
+       return false;
+}
+
 #endif /* CONFIG_HIGHMEM */
 
 /*
index eba380b76d157671bd2ea75e27e99e896ea3ac44..14325f93c6b2bbfbe4648d18f057ac23094850b5 100644 (file)
@@ -450,6 +450,9 @@ hwmon_device_register_with_info(struct device *dev,
                                const struct hwmon_chip_info *info,
                                const struct attribute_group **extra_groups);
 struct device *
+hwmon_device_register_for_thermal(struct device *dev, const char *name,
+                                 void *drvdata);
+struct device *
 devm_hwmon_device_register_with_info(struct device *dev,
                                const char *name, void *drvdata,
                                const struct hwmon_chip_info *info,
@@ -461,6 +464,9 @@ void devm_hwmon_device_unregister(struct device *dev);
 int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
                       u32 attr, int channel);
 
+char *hwmon_sanitize_name(const char *name);
+char *devm_hwmon_sanitize_name(struct device *dev, const char *name);
+
 /**
  * hwmon_is_bad_char - Is the char invalid in a hwmon name
  * @ch: the char to be considered
index ce78d4804994baba70cf39f67d1f3bf0f927f4c4..aa21b032e861e2e82c0e223392c36c0782a06651 100644 (file)
@@ -152,6 +152,8 @@ int qcom_adc5_hw_scale(enum vadc_scale_fn_type scaletype,
 u16 qcom_adc_tm5_temp_volt_scale(unsigned int prescale_ratio,
                                 u32 full_scale_code_volt, int temp);
 
+u16 qcom_adc_tm5_gen2_temp_res_scale(int temp);
+
 int qcom_adc5_prescaling_from_dt(u32 num, u32 den);
 
 int qcom_adc5_hw_settle_time_from_dt(u32 value, const unsigned int *hw_settle);
index 24359b4a960537805898618a28e99b6ee1db44a5..bc7babe91b2e6492073b82d5c013c687e6ccca0b 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef __LINUX_INSTRUMENTATION_H
 #define __LINUX_INSTRUMENTATION_H
 
-#if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_STACK_VALIDATION)
+#ifdef CONFIG_NOINSTR_VALIDATION
 
 #include <linux/stringify.h>
 
@@ -53,9 +53,9 @@
                     ".popsection\n\t" : : "i" (c));                    \
 })
 #define instrumentation_end() __instrumentation_end(__COUNTER__)
-#else
+#else /* !CONFIG_NOINSTR_VALIDATION */
 # define instrumentation_begin()       do { } while(0)
 # define instrumentation_end()         do { } while(0)
-#endif
+#endif /* CONFIG_NOINSTR_VALIDATION */
 
 #endif /* __LINUX_INSTRUMENTATION_H */
index f40754caaefa43710c7b215a6dc2463733537ba2..a92bce40b04b3a0352534f76334faec42d85fbb1 100644 (file)
@@ -222,24 +222,6 @@ devm_request_any_context_irq(struct device *dev, unsigned int irq,
 
 extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
 
-/*
- * On lockdep we dont want to enable hardirqs in hardirq
- * context. Use local_irq_enable_in_hardirq() to annotate
- * kernel code that has to do this nevertheless (pretty much
- * the only valid case is for old/broken hardware that is
- * insanely slow).
- *
- * NOTE: in theory this might break fragile code that relies
- * on hardirq delivery - in practice we dont seem to have such
- * places left. So the only effect should be slightly increased
- * irqs-off latencies.
- */
-#ifdef CONFIG_LOCKDEP
-# define local_irq_enable_in_hardirq() do { } while (0)
-#else
-# define local_irq_enable_in_hardirq() local_irq_enable()
-#endif
-
 bool irq_has_action(unsigned int irq);
 extern void disable_irq_nosync(unsigned int irq);
 extern bool disable_hardirq(unsigned int irq);
@@ -607,6 +589,15 @@ struct softirq_action
 asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
 
+#ifdef CONFIG_PREEMPT_RT
+extern void do_softirq_post_smp_call_flush(unsigned int was_pending);
+#else
+static inline void do_softirq_post_smp_call_flush(unsigned int unused)
+{
+       do_softirq();
+}
+#endif
+
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 extern void __raise_softirq_irqoff(unsigned int nr);
index 1814e698d86115910cdc4b766ffc7f6fd09a9c3f..4a2f6cc5a4927fdc0280b2b5b86d47938b8bd768 100644 (file)
@@ -5,11 +5,37 @@
 #include <linux/sched.h>
 #include <linux/xarray.h>
 
+enum io_uring_cmd_flags {
+       IO_URING_F_COMPLETE_DEFER       = 1,
+       IO_URING_F_UNLOCKED             = 2,
+       /* int's last bit, sign checks are usually faster than a bit test */
+       IO_URING_F_NONBLOCK             = INT_MIN,
+
+       /* ctx state flags, for URING_CMD */
+       IO_URING_F_SQE128               = 4,
+       IO_URING_F_CQE32                = 8,
+       IO_URING_F_IOPOLL               = 16,
+};
+
+struct io_uring_cmd {
+       struct file     *file;
+       const void      *cmd;
+       /* callback to defer completions to task context */
+       void (*task_work_cb)(struct io_uring_cmd *cmd);
+       u32             cmd_op;
+       u32             pad;
+       u8              pdu[32]; /* available inline for free use */
+};
+
 #if defined(CONFIG_IO_URING)
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *));
 struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_cancel(bool cancel_all);
 void __io_uring_free(struct task_struct *tsk);
 void io_uring_unreg_ringfd(void);
+const char *io_uring_get_opcode(u8 opcode);
 
 static inline void io_uring_files_cancel(void)
 {
@@ -29,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
                __io_uring_free(tsk);
 }
 #else
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+               ssize_t ret2)
+{
+}
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *))
+{
+}
 static inline struct sock *io_uring_get_socket(struct file *file)
 {
        return NULL;
@@ -42,6 +76,10 @@ static inline void io_uring_files_cancel(void)
 static inline void io_uring_free(struct task_struct *tsk)
 {
 }
+static inline const char *io_uring_get_opcode(u8 opcode)
+{
+       return "";
+}
 #endif
 
 #endif
index 163831a087ef1c8a1c411dc037e10a532217cdb9..a1c9c0d48ebf65248f161a5e39ee065d4e929910 100644 (file)
@@ -72,6 +72,11 @@ struct ipmi_recv_msg {
        unsigned char   msg_data[IPMI_MAX_MSG_LENGTH];
 };
 
+#define INIT_IPMI_RECV_MSG(done_handler) \
+{                                      \
+       .done = done_handler            \
+}
+
 /* Allocate and free the receive message. */
 void ipmi_free_recv_msg(struct ipmi_recv_msg *msg);
 
index 9277d21c2690cc6daff1f576060d7a4667d49d1d..5d69820d8b027e1076cf8705d282b43b0c2a9733 100644 (file)
@@ -125,6 +125,12 @@ struct ipmi_smi_msg {
        void (*done)(struct ipmi_smi_msg *msg);
 };
 
+#define INIT_IPMI_SMI_MSG(done_handler) \
+{                                              \
+       .done = done_handler,                   \
+       .type = IPMI_SMI_MSG_TYPE_NORMAL        \
+}
+
 struct ipmi_smi_handlers {
        struct module *owner;
 
index f92788ccdba270e4a665df40c4e83026a9698792..505308253d23ce49fed4b8f947437170ffa5571c 100644 (file)
@@ -569,6 +569,7 @@ struct irq_chip {
  * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND:  Invokes __enable_irq()/__disable_irq() for wake irqs
  *                                    in the suspend path if they are in disabled state
  * IRQCHIP_AFFINITY_PRE_STARTUP:      Default affinity update before startup
+ * IRQCHIP_IMMUTABLE:                Don't ever change anything in this chip
  */
 enum {
        IRQCHIP_SET_TYPE_MASKED                 = (1 <<  0),
@@ -582,6 +583,7 @@ enum {
        IRQCHIP_SUPPORTS_NMI                    = (1 <<  8),
        IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND        = (1 <<  9),
        IRQCHIP_AFFINITY_PRE_STARTUP            = (1 << 10),
+       IRQCHIP_IMMUTABLE                       = (1 << 11),
 };
 
 #include <linux/irqdesc.h>
index 12d91f0dedf90caa46f6b8ceef81e876c97fb926..728691365464c1585b23338184c95ba90f1b65c1 100644 (file)
 #define GICR_PIDR2                     GICD_PIDR2
 
 #define GICR_CTLR_ENABLE_LPIS          (1UL << 0)
+#define GICR_CTLR_CES                  (1UL << 1)
+#define GICR_CTLR_IR                   (1UL << 2)
 #define GICR_CTLR_RWP                  (1UL << 3)
 
 #define GICR_TYPER_CPU_NUMBER(r)       (((r) >> 8) & 0xffff)
index 4b140938b03e205aa25be6c2c56129cb5ae59a6d..5ec0fa71399e47e526265fe2c11ef52ad6088e80 100644 (file)
 #ifdef CONFIG_PROVE_LOCKING
   extern void lockdep_softirqs_on(unsigned long ip);
   extern void lockdep_softirqs_off(unsigned long ip);
-  extern void lockdep_hardirqs_on_prepare(unsigned long ip);
+  extern void lockdep_hardirqs_on_prepare(void);
   extern void lockdep_hardirqs_on(unsigned long ip);
   extern void lockdep_hardirqs_off(unsigned long ip);
 #else
   static inline void lockdep_softirqs_on(unsigned long ip) { }
   static inline void lockdep_softirqs_off(unsigned long ip) { }
-  static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on_prepare(void) { }
   static inline void lockdep_hardirqs_on(unsigned long ip) { }
   static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif
index de5d75bafd6651240d86bed578eef16d77f290a9..30e5bec81d2b6246c4cfdb6ef0aecfb46f046e2b 100644 (file)
@@ -222,9 +222,5 @@ void kthread_associate_blkcg(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *kthread_blkcg(void);
 #else
 static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
-static inline struct cgroup_subsys_state *kthread_blkcg(void)
-{
-       return NULL;
-}
 #endif
 #endif /* _LINUX_KTHREAD_H */
index 34eed5f85ed607432bd40559d1f9c6356cb6f083..4640393f20ab708a6444ff1fec25f4bfde88c994 100644 (file)
@@ -453,7 +453,7 @@ static __always_inline void guest_state_enter_irqoff(void)
 {
        instrumentation_begin();
        trace_hardirqs_on_prepare();
-       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       lockdep_hardirqs_on_prepare();
        instrumentation_end();
 
        guest_context_enter_irqoff();
index 9b1d3d8b12520165836d4e51b6938a4a15913364..732de90146261feadadb7a8eca01341fb9d66f33 100644 (file)
@@ -820,7 +820,6 @@ struct ata_port {
        unsigned int            cbl;    /* cable type; ATA_CBL_xxx */
 
        struct ata_queued_cmd   qcmd[ATA_MAX_QUEUE + 1];
-       unsigned long           sas_tag_allocated; /* for sas tag allocation only */
        u64                     qc_active;
        int                     nr_active_links; /* #links with active qcs */
        unsigned int            sas_last_tag;   /* track next tag hw expects */
@@ -1111,7 +1110,7 @@ extern void ata_unpack_xfermask(unsigned long xfer_mask,
                        unsigned long *udma_mask);
 extern u8 ata_xfer_mask2mode(unsigned long xfer_mask);
 extern unsigned long ata_xfer_mode2mask(u8 xfer_mode);
-extern int ata_xfer_mode2shift(unsigned long xfer_mode);
+extern int ata_xfer_mode2shift(u8 xfer_mode);
 extern const char *ata_mode_string(unsigned long xfer_mask);
 extern unsigned long ata_id_xfermask(const u16 *id);
 extern int ata_std_qc_defer(struct ata_queued_cmd *qc);
index acb1ad2356f1b29c6fe4dcb1e8dc1d371d8c2684..1feab6136b5b583397cc6267f563037812d6cb7d 100644 (file)
 
 /* SYM_ALIAS -- use only if you have to */
 #ifndef SYM_ALIAS
-#define SYM_ALIAS(alias, name, sym_type, linkage)                      \
-       linkage(alias) ASM_NL                                           \
-       .set alias, name ASM_NL                                         \
-       .type alias sym_type ASM_NL                                     \
-       .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL           \
-       .size alias, .L__sym_size_##alias
+#define SYM_ALIAS(alias, name, linkage)                        \
+       linkage(alias) ASM_NL                           \
+       .set alias, name ASM_NL
 #endif
 
 /* === code annotations === */
  */
 #ifndef SYM_FUNC_ALIAS
 #define SYM_FUNC_ALIAS(alias, name)                                    \
-       SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+       SYM_ALIAS(alias, name, SYM_L_GLOBAL)
 #endif
 
 /*
  */
 #ifndef SYM_FUNC_ALIAS_LOCAL
 #define SYM_FUNC_ALIAS_LOCAL(alias, name)                              \
-       SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+       SYM_ALIAS(alias, name, SYM_L_LOCAL)
 #endif
 
 /*
  */
 #ifndef SYM_FUNC_ALIAS_WEAK
 #define SYM_FUNC_ALIAS_WEAK(alias, name)                               \
-       SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+       SYM_ALIAS(alias, name, SYM_L_WEAK)
 #endif
 
 /* SYM_CODE_START -- use for non-C (special) functions */
index 17d02eda953898958ee184653e0fa2e56e532772..97a8b21eb03339166c96e94743148e9e96fb2bc4 100644 (file)
@@ -76,6 +76,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_IBENDPORT 14
 #define LSM_AUDIT_DATA_LOCKDOWN 15
 #define LSM_AUDIT_DATA_NOTIFICATION 16
+#define LSM_AUDIT_DATA_ANONINODE       17
        union   {
                struct path path;
                struct dentry *dentry;
@@ -96,6 +97,7 @@ struct common_audit_data {
                struct lsm_ibpkey_audit *ibpkey;
                struct lsm_ibendport_audit *ibendport;
                int reason;
+               const char *anonclass;
        } u;
        /* this union contains LSM specific data */
        union {
index db924fe379c9ca1b39fc453b19a6c617ef0f8ef9..eafa1d2489fdac3d7e48b1e24f7795b89b859887 100644 (file)
@@ -100,7 +100,7 @@ LSM_HOOK(int, 0, path_link, struct dentry *old_dentry,
         const struct path *new_dir, struct dentry *new_dentry)
 LSM_HOOK(int, 0, path_rename, const struct path *old_dir,
         struct dentry *old_dentry, const struct path *new_dir,
-        struct dentry *new_dentry)
+        struct dentry *new_dentry, unsigned int flags)
 LSM_HOOK(int, 0, path_chmod, const struct path *path, umode_t mode)
 LSM_HOOK(int, 0, path_chown, const struct path *path, kuid_t uid, kgid_t gid)
 LSM_HOOK(int, 0, path_chroot, const struct path *path)
index 419b5febc3ca5ebad865bafda56df43e0f6eebcc..91c8146649f59086400efdbf5ee4033c8db0dd06 100644 (file)
  *     @old_dentry contains the dentry structure of the old link.
  *     @new_dir contains the path structure for parent of the new link.
  *     @new_dentry contains the dentry structure of the new link.
+ *     @flags may contain rename options such as RENAME_EXCHANGE.
  *     Return 0 if permission is granted.
  * @path_chmod:
  *     Check for permission to change a mode of the file @path. The new
@@ -1595,7 +1596,7 @@ struct security_hook_list {
        struct hlist_node               list;
        struct hlist_head               *head;
        union security_list_options     hook;
-       char                            *lsm;
+       const char                      *lsm;
 } __randomize_layout;
 
 /*
@@ -1630,7 +1631,7 @@ extern struct security_hook_heads security_hook_heads;
 extern char *lsm_names;
 
 extern void security_add_hooks(struct security_hook_list *hooks, int count,
-                               char *lsm);
+                               const char *lsm);
 
 #define LSM_FLAG_LEGACY_MAJOR  BIT(0)
 #define LSM_FLAG_EXCLUSIVE     BIT(1)
index 9f44254af8ce9e6d229adfe027e3ba5955ce50e8..b0183450e484b24340fdd4fac6ee7a573bbaec4b 100644 (file)
@@ -2677,6 +2677,7 @@ extern int install_special_mapping(struct mm_struct *mm,
                                   unsigned long flags, struct page **pages);
 
 unsigned long randomize_stack_top(unsigned long stack_top);
+unsigned long randomize_page(unsigned long start, unsigned long range);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
index de5c64bbdb725818faf590107f0ecde475152852..6efec0b9820c180181a57df0d92f5a2700d38de2 100644 (file)
@@ -176,7 +176,7 @@ int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd,
                int retries);
 
 int mmc_hw_reset(struct mmc_card *card);
-int mmc_sw_reset(struct mmc_host *host);
+int mmc_sw_reset(struct mmc_card *card);
 void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card);
 
 #endif /* LINUX_MMC_CORE_H */
index 7afb57cab00b791e492428539dfd82fd2da65969..c193c50ccd785e4cf064311352728f8a60664b42 100644 (file)
@@ -181,7 +181,7 @@ struct mmc_host_ops {
                                         unsigned int max_dtr, int host_drv,
                                         int card_drv, int *drv_type);
        /* Reset the eMMC card via RST_n */
-       void    (*hw_reset)(struct mmc_host *host);
+       void    (*card_hw_reset)(struct mmc_host *host);
        void    (*card_event)(struct mmc_host *host);
 
        /*
index fd1ecb8211060accb59065d07217d1ded9bdd4f6..d88bb56c18e2e9d78308f85e4e843fee790422a3 100644 (file)
@@ -286,6 +286,7 @@ struct cfi_private {
        map_word sector_erase_cmd;
        unsigned long chipshift; /* Because they're of the same type */
        const char *im_name;     /* inter_module name for cmdset_setup */
+       unsigned long quirks;
        struct flchip chips[];  /* per-chip data structure for each chip */
 };
 
index 5e25a7b75ae2fad388f613ec59b48f2cf5568291..1ede4c89805a1d88cfaab2a8624e9808be34de1f 100644 (file)
@@ -47,8 +47,6 @@
 #define SPINOR_OP_RDID         0x9f    /* Read JEDEC ID */
 #define SPINOR_OP_RDSFDP       0x5a    /* Read SFDP */
 #define SPINOR_OP_RDCR         0x35    /* Read configuration register */
-#define SPINOR_OP_RDEAR                0xc8    /* Read Extended Address Register */
-#define SPINOR_OP_WREAR                0xc5    /* Write Extended Address Register */
 #define SPINOR_OP_SRSTEN       0x66    /* Software Reset Enable */
 #define SPINOR_OP_SRST         0x99    /* Software Reset */
 #define SPINOR_OP_GBULK                0x98    /* Global Block Unlock */
@@ -365,6 +363,7 @@ struct spi_nor_flash_parameter;
  * @write_proto:       the SPI protocol for write operations
  * @reg_proto:         the SPI protocol for read_reg/write_reg/erase operations
  * @sfdp:              the SFDP data of the flash
+ * @debugfs_root:      pointer to the debugfs directory
  * @controller_ops:    SPI NOR controller driver specific operations.
  * @params:            [FLASH-SPECIFIC] SPI NOR flash parameters and settings.
  *                      The structure includes legacy flash parameters and
@@ -394,6 +393,7 @@ struct spi_nor {
        u32                     flags;
        enum spi_nor_cmd_ext    cmd_ext_type;
        struct sfdp             *sfdp;
+       struct dentry           *debugfs_root;
 
        const struct spi_nor_controller_ops *controller_ops;
 
index 3aa28240a77fe7cdb278d0203f5a0a2d57720934..5584d3bb6556baf9ce57837e9ed138cfeac63802 100644 (file)
@@ -266,6 +266,7 @@ extern const struct spinand_manufacturer micron_spinand_manufacturer;
 extern const struct spinand_manufacturer paragon_spinand_manufacturer;
 extern const struct spinand_manufacturer toshiba_spinand_manufacturer;
 extern const struct spinand_manufacturer winbond_spinand_manufacturer;
+extern const struct spinand_manufacturer xtx_spinand_manufacturer;
 
 /**
  * struct spinand_op_variants - SPI NAND operation variants
index 2c6b9e4162254f7116ed95ee88eb03afcc0fe64f..7c2d77d75a888cdb2b317cc89296681643b0423e 100644 (file)
@@ -169,7 +169,7 @@ enum {
 #define NETIF_F_HW_HSR_FWD     __NETIF_F(HW_HSR_FWD)
 #define NETIF_F_HW_HSR_DUP     __NETIF_F(HW_HSR_DUP)
 
-/* Finds the next feature with the highest number of the range of start till 0.
+/* Finds the next feature with the highest number of the range of start-1 till 0.
  */
 static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 {
@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
        for ((bit) = find_next_netdev_feature((mask_addr),              \
                                              NETDEV_FEATURE_COUNT);    \
             (bit) >= 0;                                                \
-            (bit) = find_next_netdev_feature((mask_addr), (bit) - 1))
+            (bit) = find_next_netdev_feature((mask_addr), (bit)))
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
index b1fbe21650bb5ed1d8593d895ba3106869311074..f736c020cde27761b913be89deb210100d599ea0 100644 (file)
@@ -900,7 +900,7 @@ struct net_device_path_stack {
 
 struct net_device_path_ctx {
        const struct net_device *dev;
-       const u8                *daddr;
+       u8                      daddr[ETH_ALEN];
 
        int                     num_vlans;
        struct {
index c7bf1eaf51d5aaa71c60a52006de277b94177a1f..0c33b715cbfd269fff1808905811a939213c2105 100644 (file)
@@ -295,7 +295,7 @@ extern void netfs_stats_show(struct seq_file *);
  */
 static inline struct netfs_i_context *netfs_i_context(struct inode *inode)
 {
-       return (struct netfs_i_context *)(inode + 1);
+       return (void *)inode + sizeof(*inode);
 }
 
 /**
@@ -307,7 +307,7 @@ static inline struct netfs_i_context *netfs_i_context(struct inode *inode)
  */
 static inline struct inode *netfs_inode(struct netfs_i_context *ctx)
 {
-       return ((struct inode *)ctx) - 1;
+       return (void *)ctx - sizeof(struct inode);
 }
 
 /**
index f626a445d1a872647c27e6c09139dca1d99af676..29ec3e3481ff61901899e6b2c836bf4a1fbe6e65 100644 (file)
@@ -137,6 +137,7 @@ enum {
        NVME_REG_CMBMSC = 0x0050,       /* Controller Memory Buffer Memory
                                         * Space Control
                                         */
+       NVME_REG_CRTO   = 0x0068,       /* Controller Ready Timeouts */
        NVME_REG_PMRCAP = 0x0e00,       /* Persistent Memory Capabilities */
        NVME_REG_PMRCTL = 0x0e04,       /* Persistent Memory Region Control */
        NVME_REG_PMRSTS = 0x0e08,       /* Persistent Memory Region Status */
@@ -161,6 +162,9 @@ enum {
 #define NVME_CMB_BIR(cmbloc)   ((cmbloc) & 0x7)
 #define NVME_CMB_OFST(cmbloc)  (((cmbloc) >> 12) & 0xfffff)
 
+#define NVME_CRTO_CRIMT(crto)  ((crto) >> 16)
+#define NVME_CRTO_CRWMT(crto)  ((crto) & 0xffff)
+
 enum {
        NVME_CMBSZ_SQS          = 1 << 0,
        NVME_CMBSZ_CQS          = 1 << 1,
@@ -204,8 +208,10 @@ enum {
        NVME_CC_SHN_MASK        = 3 << NVME_CC_SHN_SHIFT,
        NVME_CC_IOSQES          = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
        NVME_CC_IOCQES          = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
-       NVME_CAP_CSS_NVM        = 1 << 0,
-       NVME_CAP_CSS_CSI        = 1 << 6,
+       NVME_CC_CRIME           = 1 << 24,
+};
+
+enum {
        NVME_CSTS_RDY           = 1 << 0,
        NVME_CSTS_CFS           = 1 << 1,
        NVME_CSTS_NSSRO         = 1 << 4,
@@ -214,10 +220,23 @@ enum {
        NVME_CSTS_SHST_OCCUR    = 1 << 2,
        NVME_CSTS_SHST_CMPLT    = 2 << 2,
        NVME_CSTS_SHST_MASK     = 3 << 2,
+};
+
+enum {
        NVME_CMBMSC_CRE         = 1 << 0,
        NVME_CMBMSC_CMSE        = 1 << 1,
 };
 
+enum {
+       NVME_CAP_CSS_NVM        = 1 << 0,
+       NVME_CAP_CSS_CSI        = 1 << 6,
+};
+
+enum {
+       NVME_CAP_CRMS_CRIMS     = 1ULL << 59,
+       NVME_CAP_CRMS_CRWMS     = 1ULL << 60,
+};
+
 struct nvme_id_power_state {
        __le16                  max_power;      /* centiwatts */
        __u8                    rsvd2;
@@ -405,6 +424,21 @@ struct nvme_id_ns {
        __u8                    vs[3712];
 };
 
+/* I/O Command Set Independent Identify Namespace Data Structure */
+struct nvme_id_ns_cs_indep {
+       __u8                    nsfeat;
+       __u8                    nmic;
+       __u8                    rescap;
+       __u8                    fpi;
+       __le32                  anagrpid;
+       __u8                    nsattr;
+       __u8                    rsvd9;
+       __le16                  nvmsetid;
+       __le16                  endgid;
+       __u8                    nstat;
+       __u8                    rsvd15[4081];
+};
+
 struct nvme_zns_lbafe {
        __le64                  zsze;
        __u8                    zdes;
@@ -469,6 +503,7 @@ enum {
        NVME_ID_CNS_NS_DESC_LIST        = 0x03,
        NVME_ID_CNS_CS_NS               = 0x05,
        NVME_ID_CNS_CS_CTRL             = 0x06,
+       NVME_ID_CNS_NS_CS_INDEP         = 0x08,
        NVME_ID_CNS_NS_PRESENT_LIST     = 0x10,
        NVME_ID_CNS_NS_PRESENT          = 0x11,
        NVME_ID_CNS_CTRL_NS_LIST        = 0x12,
@@ -522,6 +557,10 @@ enum {
        NVME_NS_DPS_PI_TYPE3    = 3,
 };
 
+enum {
+       NVME_NSTAT_NRDY         = 1 << 0,
+};
+
 enum {
        NVME_NVM_NS_16B_GUARD   = 0,
        NVME_NVM_NS_32B_GUARD   = 1,
@@ -1583,6 +1622,7 @@ enum {
        NVME_SC_NS_WRITE_PROTECTED      = 0x20,
        NVME_SC_CMD_INTERRUPTED         = 0x21,
        NVME_SC_TRANSIENT_TR_ERR        = 0x22,
+       NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
        NVME_SC_INVALID_IO_CMD_SET      = 0x2C,
 
        NVME_SC_LBA_RANGE               = 0x80,
@@ -1679,9 +1719,11 @@ enum {
        /*
         * Path-related Errors:
         */
+       NVME_SC_INTERNAL_PATH_ERROR     = 0x300,
        NVME_SC_ANA_PERSISTENT_LOSS     = 0x301,
        NVME_SC_ANA_INACCESSIBLE        = 0x302,
        NVME_SC_ANA_TRANSITION          = 0x303,
+       NVME_SC_CTRL_PATH_ERROR         = 0x360,
        NVME_SC_HOST_PATH_ERROR         = 0x370,
        NVME_SC_HOST_ABORTED_CMD        = 0x371,
 
index 586d35720f135f62b9d7434beccacb2e22a5bd2d..6491fa8fba6d5028eb55cc7c827d162ee7578ef3 100644 (file)
@@ -38,7 +38,9 @@ struct unwind_hint {
 #define UNWIND_HINT_TYPE_REGS_PARTIAL  2
 #define UNWIND_HINT_TYPE_FUNC          3
 
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
+
+#include <asm/asm.h>
 
 #ifndef __ASSEMBLY__
 
@@ -137,7 +139,7 @@ struct unwind_hint {
 
 .macro STACK_FRAME_NON_STANDARD func:req
        .pushsection .discard.func_stack_frame_non_standard, "aw"
-               .long \func - .
+       _ASM_PTR \func
        .popsection
 .endm
 
@@ -157,7 +159,7 @@ struct unwind_hint {
 
 #endif /* __ASSEMBLY__ */
 
-#else /* !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_OBJTOOL */
 
 #ifndef __ASSEMBLY__
 
@@ -179,6 +181,6 @@ struct unwind_hint {
 .endm
 #endif
 
-#endif /* CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_OBJTOOL */
 
 #endif /* _LINUX_OBJTOOL_H */
index 993994cd943a0db8a84b374cbfd791e5a61c0dd8..6165283bdb6f6b52efc46f3008e71ec5d67df39d 100644 (file)
@@ -1046,6 +1046,7 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
  * Fault in userspace address range.
  */
 size_t fault_in_writeable(char __user *uaddr, size_t size);
+size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
 size_t fault_in_readable(const char __user *uaddr, size_t size);
 
index af97dd427501cbc9d80a1c1480e942576d1a229c..da759560eec5646f51ec8af94b2d561b786c3a2b 100644 (file)
@@ -1063,6 +1063,22 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
        data->txn = 0;
 }
 
+/*
+ * Clear all bitfields in the perf_branch_entry.
+ * The to and from fields are not cleared because they are
+ * systematically modified by caller.
+ */
+static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
+{
+       br->mispred = 0;
+       br->predicted = 0;
+       br->in_tx = 0;
+       br->abort = 0;
+       br->cycles = 0;
+       br->type = 0;
+       br->reserved = 0;
+}
+
 extern void perf_output_sample(struct perf_output_handle *handle,
                               struct perf_event_header *header,
                               struct perf_sample_data *data,
@@ -1660,4 +1676,10 @@ typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
                                           unsigned int cnt);
 DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
 
+#ifndef PERF_NEEDS_LOPWR_CB
+static inline void perf_lopwr_cb(bool mode)
+{
+}
+#endif
+
 #endif /* _LINUX_PERF_EVENT_H */
index 40185f9d7c14b6ec1750a749954af508a2c6cb83..a6bd74e29b6ba624be177b16eaa74bc1a5a499a9 100644 (file)
@@ -216,6 +216,8 @@ struct mlxreg_core_platform_data {
  * @mask_low: low aggregation interrupt common mask;
  * @deferred_nr: I2C adapter number must be exist prior probing execution;
  * @shift_nr: I2C adapter numbers must be incremented by this value;
+ * @handle: handle to be passed by callback;
+ * @completion_notify: callback to notify when platform driver probing is done;
  */
 struct mlxreg_core_hotplug_platform_data {
        struct mlxreg_core_item *items;
@@ -228,6 +230,8 @@ struct mlxreg_core_hotplug_platform_data {
        u32 mask_low;
        int deferred_nr;
        int shift_nr;
+       void *handle;
+       int (*completion_notify)(void *handle, int id);
 };
 
 #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */
similarity index 99%
rename from include/linux/mmc/sh_mmcif.h
rename to include/linux/platform_data/sh_mmcif.h
index e25533b95d9f27af088cba6628307a8446cbcaef..6eb914f958f94306b5d3e7f598f92884339ce0da 100644 (file)
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * include/linux/mmc/sh_mmcif.h
- *
  * platform data for eMMC driver
  *
  * Copyright (C) 2010 Renesas Solutions Corp.
index 022bcea9edec5ba8b1c333331746e1532d393917..6807839c718bd1da7dcde110a1af58cba1caac3d 100644 (file)
 #define        SLEEP_ENABLE            0x2000
 
 extern int pmc_atom_read(int offset, u32 *value);
-extern int pmc_atom_write(int offset, u32 value);
 
 #endif /* PMC_ATOM_H */
index e65b3ab28377bf7ee89096fa88d6fe0b1cdb087e..ffe9419585012b1b490808a4f7de3c0111e0c72a 100644 (file)
@@ -368,13 +368,13 @@ const struct dev_pm_ops name = { \
 
 #ifdef CONFIG_PM
 #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
-                          runtime_resume_fn, idle_fn, sec) \
+                          runtime_resume_fn, idle_fn, sec, ns)         \
        _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
                           runtime_resume_fn, idle_fn); \
-       _EXPORT_SYMBOL(name, sec)
+       __EXPORT_SYMBOL(name, sec, ns)
 #else
 #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
-                          runtime_resume_fn, idle_fn, sec) \
+                          runtime_resume_fn, idle_fn, sec, ns) \
 static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
                                         resume_fn, runtime_suspend_fn, \
                                         runtime_resume_fn, idle_fn)
@@ -391,9 +391,13 @@ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
        _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
 
 #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-       _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "")
+       _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "")
 #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
-       _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl")
+       _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "")
+#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns)   \
+       _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns)
+#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns)       \
+       _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns)
 
 /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
 #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
index 67017c9390c878cc9e09f20e4bdaa5499e3d2646..ebc3516980907e1bc65bdf75362fbd58ee02ec29 100644 (file)
@@ -91,6 +91,14 @@ struct gpd_dev_ops {
        int (*stop)(struct device *dev);
 };
 
+struct genpd_governor_data {
+       s64 max_off_time_ns;
+       bool max_off_time_changed;
+       ktime_t next_wakeup;
+       bool cached_power_down_ok;
+       bool cached_power_down_state_idx;
+};
+
 struct genpd_power_state {
        s64 power_off_latency_ns;
        s64 power_on_latency_ns;
@@ -98,7 +106,7 @@ struct genpd_power_state {
        u64 usage;
        u64 rejected;
        struct fwnode_handle *fwnode;
-       ktime_t idle_time;
+       u64 idle_time;
        void *data;
 };
 
@@ -114,6 +122,7 @@ struct generic_pm_domain {
        struct list_head child_links;   /* Links with PM domain as a child */
        struct list_head dev_list;      /* List of devices */
        struct dev_power_governor *gov;
+       struct genpd_governor_data *gd; /* Data used by a genpd governor. */
        struct work_struct power_off_work;
        struct fwnode_handle *provider; /* Identity of the domain provider */
        bool has_provider;
@@ -134,11 +143,6 @@ struct generic_pm_domain {
        int (*set_performance_state)(struct generic_pm_domain *genpd,
                                     unsigned int state);
        struct gpd_dev_ops dev_ops;
-       s64 max_off_time_ns;    /* Maximum allowed "suspended" time. */
-       ktime_t next_wakeup;    /* Maintained by the domain governor */
-       bool max_off_time_changed;
-       bool cached_power_down_ok;
-       bool cached_power_down_state_idx;
        int (*attach_dev)(struct generic_pm_domain *domain,
                          struct device *dev);
        void (*detach_dev)(struct generic_pm_domain *domain,
@@ -149,8 +153,8 @@ struct generic_pm_domain {
                            unsigned int state_count);
        unsigned int state_count; /* number of states */
        unsigned int state_idx; /* state that genpd will go to when off */
-       ktime_t on_time;
-       ktime_t accounting_time;
+       u64 on_time;
+       u64 accounting_time;
        const struct genpd_lock_ops *lock_ops;
        union {
                struct mutex mlock;
@@ -182,6 +186,7 @@ struct gpd_timing_data {
        s64 suspend_latency_ns;
        s64 resume_latency_ns;
        s64 effective_constraint_ns;
+       ktime_t next_wakeup;
        bool constraint_changed;
        bool cached_suspend_ok;
 };
@@ -193,14 +198,13 @@ struct pm_domain_data {
 
 struct generic_pm_domain_data {
        struct pm_domain_data base;
-       struct gpd_timing_data td;
+       struct gpd_timing_data *td;
        struct notifier_block nb;
        struct notifier_block *power_nb;
        int cpu;
        unsigned int performance_state;
        unsigned int default_pstate;
        unsigned int rpm_pstate;
-       ktime_t next_wakeup;
        void *data;
 };
 
index 2bff6a10095d16a3771c926f44e303395de78854..9e4d056967c6618aad1885f21fbaef388a580e25 100644 (file)
 
 #define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
        _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
-                          suspend_fn, resume_fn, idle_fn, "")
+                          suspend_fn, resume_fn, idle_fn, "", "")
 #define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
        _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
-                          suspend_fn, resume_fn, idle_fn, "_gpl")
+                          suspend_fn, resume_fn, idle_fn, "_gpl", "")
+#define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
+       _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+                          suspend_fn, resume_fn, idle_fn, "", #ns)
+#define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
+       _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+                          suspend_fn, resume_fn, idle_fn, "_gpl", #ns)
 
 #ifdef CONFIG_PM
 extern struct workqueue_struct *pm_wq;
diff --git a/include/linux/polynomial.h b/include/linux/polynomial.h
new file mode 100644 (file)
index 0000000..9e074a0
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+ */
+
+#ifndef _POLYNOMIAL_H
+#define _POLYNOMIAL_H
+
+/*
+ * struct polynomial_term - one term descriptor of a polynomial
+ * @deg: degree of the term.
+ * @coef: multiplication factor of the term.
+ * @divider: distributed divider per each degree.
+ * @divider_leftover: divider leftover, which couldn't be redistributed.
+ */
+struct polynomial_term {
+       unsigned int deg;
+       long coef;
+       long divider;
+       long divider_leftover;
+};
+
+/*
+ * struct polynomial - a polynomial descriptor
+ * @total_divider: total data divider.
+ * @terms: polynomial terms, last term must have degree of 0
+ */
+struct polynomial {
+       long total_divider;
+       struct polynomial_term terms[];
+};
+
+long polynomial_calc(const struct polynomial *poly, long data);
+
+#endif
index 056d31317e4990a6f3f25041be076a9db289d846..deace5fb4e628beccac22d6df991761d35646584 100644 (file)
 
 #include <linux/types.h>
 #include <linux/percpu.h>
+#include <linux/random.h>
 
-u32 prandom_u32(void);
-void prandom_bytes(void *buf, size_t nbytes);
-void prandom_seed(u32 seed);
-void prandom_reseed_late(void);
-
-DECLARE_PER_CPU(unsigned long, net_rand_noise);
-
-#define PRANDOM_ADD_NOISE(a, b, c, d) \
-       prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \
-                             (unsigned long)(c), (unsigned long)(d))
-
-#if BITS_PER_LONG == 64
-/*
- * The core SipHash round function.  Each line can be executed in
- * parallel given enough CPU resources.
- */
-#define PRND_SIPROUND(v0, v1, v2, v3) ( \
-       v0 += v1, v1 = rol64(v1, 13),  v2 += v3, v3 = rol64(v3, 16), \
-       v1 ^= v0, v0 = rol64(v0, 32),  v3 ^= v2,                     \
-       v0 += v3, v3 = rol64(v3, 21),  v2 += v1, v1 = rol64(v1, 17), \
-       v3 ^= v0,                      v1 ^= v2, v2 = rol64(v2, 32)  \
-)
-
-#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
-#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)
-
-#elif BITS_PER_LONG == 32
-/*
- * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
- * This is weaker, but 32-bit machines are not used for high-traffic
- * applications, so there is less output for an attacker to analyze.
- */
-#define PRND_SIPROUND(v0, v1, v2, v3) ( \
-       v0 += v1, v1 = rol32(v1,  5),  v2 += v3, v3 = rol32(v3,  8), \
-       v1 ^= v0, v0 = rol32(v0, 16),  v3 ^= v2,                     \
-       v0 += v3, v3 = rol32(v3,  7),  v2 += v1, v1 = rol32(v1, 13), \
-       v3 ^= v0,                      v1 ^= v2, v2 = rol32(v2, 16)  \
-)
-#define PRND_K0 0x6c796765
-#define PRND_K1 0x74656462
-
-#else
-#error Unsupported BITS_PER_LONG
-#endif
+static inline u32 prandom_u32(void)
+{
+       return get_random_u32();
+}
 
-static inline void prandom_u32_add_noise(unsigned long a, unsigned long b,
-                                        unsigned long c, unsigned long d)
+static inline void prandom_bytes(void *buf, size_t nbytes)
 {
-       /*
-        * This is not used cryptographically; it's just
-        * a convenient 4-word hash function. (3 xor, 2 add, 2 rol)
-        */
-       a ^= raw_cpu_read(net_rand_noise);
-       PRND_SIPROUND(a, b, c, d);
-       raw_cpu_write(net_rand_noise, d);
+       return get_random_bytes(buf, nbytes);
 }
 
 struct rnd_state {
@@ -117,7 +71,6 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
        state->s2 = __seed(i,   8U);
        state->s3 = __seed(i,  16U);
        state->s4 = __seed(i, 128U);
-       PRANDOM_ADD_NOISE(state, i, 0, 0);
 }
 
 /* Pseudo random number generator from numerical recipes. */
index f673fbb838b35f80194e049784851ba7c94ae719..fae0c84027fdcc4384f227aa75873d36608b36b5 100644 (file)
 
 struct notifier_block;
 
-extern void add_device_randomness(const void *, size_t);
-extern void add_bootloader_randomness(const void *, size_t);
+void add_device_randomness(const void *buf, size_t len);
+void add_bootloader_randomness(const void *buf, size_t len);
+void add_input_randomness(unsigned int type, unsigned int code,
+                         unsigned int value) __latent_entropy;
+void add_interrupt_randomness(int irq) __latent_entropy;
+void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy);
 
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 static inline void add_latent_entropy(void)
 {
-       add_device_randomness((const void *)&latent_entropy,
-                             sizeof(latent_entropy));
+       add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
 }
 #else
-static inline void add_latent_entropy(void) {}
+static inline void add_latent_entropy(void) { }
 #endif
 
-extern void add_input_randomness(unsigned int type, unsigned int code,
-                                unsigned int value) __latent_entropy;
-extern void add_interrupt_randomness(int irq) __latent_entropy;
-extern void add_hwgenerator_randomness(const void *buffer, size_t count,
-                                      size_t entropy);
 #if IS_ENABLED(CONFIG_VMGENID)
-extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
-extern int register_random_vmfork_notifier(struct notifier_block *nb);
-extern int unregister_random_vmfork_notifier(struct notifier_block *nb);
+void add_vmfork_randomness(const void *unique_vm_id, size_t len);
+int register_random_vmfork_notifier(struct notifier_block *nb);
+int unregister_random_vmfork_notifier(struct notifier_block *nb);
 #else
 static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
 static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
 #endif
 
-extern void get_random_bytes(void *buf, size_t nbytes);
-extern int wait_for_random_bytes(void);
-extern int __init rand_initialize(void);
-extern bool rng_is_initialized(void);
-extern int register_random_ready_notifier(struct notifier_block *nb);
-extern int unregister_random_ready_notifier(struct notifier_block *nb);
-extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
-
-#ifndef MODULE
-extern const struct file_operations random_fops, urandom_fops;
-#endif
-
+void get_random_bytes(void *buf, size_t len);
 u32 get_random_u32(void);
 u64 get_random_u64(void);
 static inline unsigned int get_random_int(void)
@@ -82,11 +69,14 @@ static inline unsigned long get_random_long(void)
 
 static inline unsigned long get_random_canary(void)
 {
-       unsigned long val = get_random_long();
-
-       return val & CANARY_MASK;
+       return get_random_long() & CANARY_MASK;
 }
 
+int __init random_init(const char *command_line);
+bool rng_is_initialized(void);
+bool rng_has_arch_random(void);
+int wait_for_random_bytes(void);
+
 /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
  * Returns the result of the call to wait_for_random_bytes. */
 static inline int get_random_bytes_wait(void *buf, size_t nbytes)
@@ -96,22 +86,20 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes)
        return ret;
 }
 
-#define declare_get_random_var_wait(var) \
-       static inline int get_random_ ## var ## _wait(var *out) { \
+#define declare_get_random_var_wait(name, ret_type) \
+       static inline int get_random_ ## name ## _wait(ret_type *out) { \
                int ret = wait_for_random_bytes(); \
                if (unlikely(ret)) \
                        return ret; \
-               *out = get_random_ ## var(); \
+               *out = get_random_ ## name(); \
                return 0; \
        }
-declare_get_random_var_wait(u32)
-declare_get_random_var_wait(u64)
-declare_get_random_var_wait(int)
-declare_get_random_var_wait(long)
+declare_get_random_var_wait(u32, u32)
+declare_get_random_var_wait(u64, u32)
+declare_get_random_var_wait(int, unsigned int)
+declare_get_random_var_wait(long, unsigned long)
 #undef declare_get_random_var
 
-unsigned long randomize_page(unsigned long start, unsigned long range);
-
 /*
  * This is designed to be standalone for just prandom
  * users, but for now we include it from <linux/random.h>
@@ -122,22 +110,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range);
 #ifdef CONFIG_ARCH_RANDOM
 # include <asm/archrandom.h>
 #else
-static inline bool __must_check arch_get_random_long(unsigned long *v)
-{
-       return false;
-}
-static inline bool __must_check arch_get_random_int(unsigned int *v)
-{
-       return false;
-}
-static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
-{
-       return false;
-}
-static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
-{
-       return false;
-}
+static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; }
+static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; }
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; }
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; }
 #endif
 
 /*
@@ -161,8 +137,12 @@ static inline bool __init arch_get_random_long_early(unsigned long *v)
 #endif
 
 #ifdef CONFIG_SMP
-extern int random_prepare_cpu(unsigned int cpu);
-extern int random_online_cpu(unsigned int cpu);
+int random_prepare_cpu(unsigned int cpu);
+int random_online_cpu(unsigned int cpu);
+#endif
+
+#ifndef MODULE
+extern const struct file_operations random_fops, urandom_fops;
 #endif
 
 #endif /* _LINUX_RANDOM_H */
index 1468caf001c05e8f9a3be48f2d49dbf2d411cf2b..5d868505a94e43fe4f6124915e90dc814c269278 100644 (file)
@@ -40,10 +40,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
  */
 #define KSTACK_OFFSET_MAX(x)   ((x) & 0x3FF)
 
-/*
- * These macros must be used during syscall entry when interrupts and
+/**
+ * add_random_kstack_offset - Increase stack utilization by previously
+ *                           chosen random offset
+ *
+ * This should be used in the syscall entry path when interrupts and
  * preempt are disabled, and after user registers have been stored to
- * the stack.
+ * the stack. For testing the resulting entropy, please see:
+ * tools/testing/selftests/lkdtm/stack-entropy.sh
  */
 #define add_random_kstack_offset() do {                                        \
        if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
@@ -55,6 +59,23 @@ DECLARE_PER_CPU(u32, kstack_offset);
        }                                                               \
 } while (0)
 
+/**
+ * choose_random_kstack_offset - Choose the random offset for the next
+ *                              add_random_kstack_offset()
+ *
+ * This should only be used during syscall exit when interrupts and
+ * preempt are disabled. This position in the syscall flow is done to
+ * frustrate attacks from userspace attempting to learn the next offset:
+ * - Maximize the timing uncertainty visible from userspace: if the
+ *   offset is chosen at syscall entry, userspace has much more control
+ *   over the timing between choosing offsets. "How long will we be in
+ *   kernel mode?" tends to be more difficult to predict than "how long
+ *   will we be in user mode?"
+ * - Reduce the lifetime of the new offset sitting in memory during
+ *   kernel mode execution. Exposure of "thread-local" memory content
+ *   (e.g. current, percpu, etc) tends to be easier than arbitrary
+ *   location memory exposure.
+ */
 #define choose_random_kstack_offset(rand) do {                         \
        if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
                                &randomize_kstack_offset)) {            \
index e7c39c200e2b0cc6c353c9d5205713964318d584..1a32036c918cd1e22e99bdbdd305769cfc74f429 100644 (file)
@@ -196,6 +196,7 @@ void synchronize_rcu_tasks_rude(void);
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
 #define rcu_tasks_qs(t, preempt) do { } while (0)
 #define rcu_note_voluntary_context_switch(t) do { } while (0)
 #define call_rcu_tasks call_rcu
index de81a94d7b307aa9e6a2f532185fddff99b0c33a..8952fa3d0d59332d494f95e5a94bc97aee838d29 100644 (file)
@@ -299,6 +299,12 @@ typedef void (*regmap_unlock)(void *);
  *                  if the function require special handling with lock and reg
  *                  handling and the operation cannot be represented as a simple
  *                  update_bits operation on a bus such as SPI, I2C, etc.
+ * @read: Optional callback that if filled will be used to perform all the
+ *        bulk reads from the registers. Data is returned in the buffer used
+ *        to transmit data.
+ * @write: Same as above for writing.
+ * @max_raw_read: Max raw read size that can be used on the device.
+ * @max_raw_write: Max raw write size that can be used on the device.
  * @fast_io:     Register IO is fast. Use a spinlock instead of a mutex
  *               to perform locking. This field is ignored if custom lock/unlock
  *               functions are used (see fields lock/unlock of struct regmap_config).
@@ -385,6 +391,12 @@ struct regmap_config {
        int (*reg_write)(void *context, unsigned int reg, unsigned int val);
        int (*reg_update_bits)(void *context, unsigned int reg,
                               unsigned int mask, unsigned int val);
+       /* Bulk read/write */
+       int (*read)(void *context, const void *reg_buf, size_t reg_size,
+                   void *val_buf, size_t val_size);
+       int (*write)(void *context, const void *data, size_t count);
+       size_t max_raw_read;
+       size_t max_raw_write;
 
        bool fast_io;
 
index 1cc304946d09e35d74ea037b77c2f6922ed62f9f..bdcf83cd719ef522babe97261f3578bb76e6fe1e 100644 (file)
@@ -48,9 +48,54 @@ enum {
        MT6358_ID_VLDO28,
        MT6358_ID_VAUD28,
        MT6358_ID_VSIM2,
+       MT6358_ID_VCORE_SSHUB,
+       MT6358_ID_VSRAM_OTHERS_SSHUB,
        MT6358_ID_RG_MAX,
 };
 
+enum {
+       MT6366_ID_VDRAM1 = 0,
+       MT6366_ID_VCORE,
+       MT6366_ID_VPA,
+       MT6366_ID_VPROC11,
+       MT6366_ID_VPROC12,
+       MT6366_ID_VGPU,
+       MT6366_ID_VS2,
+       MT6366_ID_VMODEM,
+       MT6366_ID_VS1,
+       MT6366_ID_VDRAM2,
+       MT6366_ID_VSIM1,
+       MT6366_ID_VIBR,
+       MT6366_ID_VRF12,
+       MT6366_ID_VIO18,
+       MT6366_ID_VUSB,
+       MT6366_ID_VCN18,
+       MT6366_ID_VFE28,
+       MT6366_ID_VSRAM_PROC11,
+       MT6366_ID_VCN28,
+       MT6366_ID_VSRAM_OTHERS,
+       MT6366_ID_VSRAM_GPU,
+       MT6366_ID_VXO22,
+       MT6366_ID_VEFUSE,
+       MT6366_ID_VAUX18,
+       MT6366_ID_VMCH,
+       MT6366_ID_VBIF28,
+       MT6366_ID_VSRAM_PROC12,
+       MT6366_ID_VEMC,
+       MT6366_ID_VIO28,
+       MT6366_ID_VA12,
+       MT6366_ID_VRF18,
+       MT6366_ID_VCN33_BT,
+       MT6366_ID_VCN33_WIFI,
+       MT6366_ID_VMC,
+       MT6366_ID_VAUD28,
+       MT6366_ID_VSIM2,
+       MT6366_ID_VCORE_SSHUB,
+       MT6366_ID_VSRAM_OTHERS_SSHUB,
+       MT6366_ID_RG_MAX,
+};
+
 #define MT6358_MAX_REGULATOR   MT6358_ID_RG_MAX
+#define MT6366_MAX_REGULATOR   MT6366_ID_RG_MAX
 
 #endif /* __LINUX_REGULATOR_MT6358_H */
index 71902f41c91999d21cc037e9c9d1731a71844f0f..3c01c2bf84f53ba6b1d0fa90fec837313a2f6d03 100644 (file)
@@ -226,4 +226,11 @@ enum {
 #define WDOG_B_CFG_COLD_LDO12          0x80
 #define WDOG_B_CFG_COLD                        0xC0
 
+/* PCA9450_REG_CONFIG2 bits */
+#define I2C_LT_MASK                    0x03
+#define I2C_LT_FORCE_DISABLE           0x00
+#define I2C_LT_ON_STANDBY_RUN          0x01
+#define I2C_LT_ON_RUN                  0x02
+#define I2C_LT_FORCE_ENABLE            0x03
+
 #endif /* __LINUX_REG_PCA9450_H__ */
index a8911b1f35aad335ddcf580cfa39a9edffb29b5e..b89c8571187b7cd42cbca98197fac11bfb41a2c5 100644 (file)
@@ -941,6 +941,9 @@ struct task_struct {
 #ifdef CONFIG_IOMMU_SVA
        unsigned                        pasid_activated:1;
 #endif
+#ifdef CONFIG_CPU_SUP_INTEL
+       unsigned                        reported_split_lock:1;
+#endif
 
        unsigned long                   atomic_flags; /* Flags requiring atomic access. */
 
@@ -2118,6 +2121,47 @@ static inline void cond_resched_rcu(void)
 #endif
 }
 
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+extern bool preempt_model_none(void);
+extern bool preempt_model_voluntary(void);
+extern bool preempt_model_full(void);
+
+#else
+
+static inline bool preempt_model_none(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT_NONE);
+}
+static inline bool preempt_model_voluntary(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
+}
+static inline bool preempt_model_full(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT);
+}
+
+#endif
+
+static inline bool preempt_model_rt(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT_RT);
+}
+
+/*
+ * Does the preemption model allow non-cooperative preemption?
+ *
+ * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
+ * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
+ * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
+ * PREEMPT_NONE model.
+ */
+static inline bool preempt_model_preemptible(void)
+{
+       return preempt_model_full() || preempt_model_rt();
+}
+
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
@@ -2338,20 +2382,6 @@ static inline void rseq_syscall(struct pt_regs *regs)
 
 #endif
 
-const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
-char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
-int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
-
-const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
-const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
-const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
-
-int sched_trace_rq_cpu(struct rq *rq);
-int sched_trace_rq_cpu_capacity(struct rq *rq);
-int sched_trace_rq_nr_running(struct rq *rq);
-
-const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
-
 #ifdef CONFIG_SCHED_CORE
 extern void sched_core_free(struct task_struct *tsk);
 extern void sched_core_fork(struct task_struct *p);
@@ -2362,4 +2392,6 @@ static inline void sched_core_free(struct task_struct *tsk) { }
 static inline void sched_core_fork(struct task_struct *p) { }
 #endif
 
+extern void sched_set_stop_task(int cpu, struct task_struct *stop);
+
 #endif
index 3c8b34876744bda628be4c1084e39fc06dbdb938..20ed5ba2bde4367a299efa0c49e96d339450f1e6 100644 (file)
@@ -320,7 +320,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *);
 
 int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper);
 int force_sig_pkuerr(void __user *addr, u32 pkey);
-int force_sig_perf(void __user *addr, u32 type, u64 sig_data);
+int send_sig_perf(void __user *addr, u32 type, u64 sig_data);
 
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno);
@@ -355,14 +355,23 @@ static inline void clear_notify_signal(void)
        smp_mb__after_atomic();
 }
 
+/*
+ * Returns 'true' if kick_process() is needed to force a transition from
+ * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work.
+ */
+static inline bool __set_notify_signal(struct task_struct *task)
+{
+       return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+              !wake_up_state(task, TASK_INTERRUPTIBLE);
+}
+
 /*
  * Called to break out of interruptible wait loops, and enter the
  * exit_to_user_mode_loop().
  */
 static inline void set_notify_signal(struct task_struct *task)
 {
-       if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
-           !wake_up_state(task, TASK_INTERRUPTIBLE))
+       if (__set_notify_signal(task))
                kick_process(task);
 }
 
index 892562ebbd3aa9522115cbc149ef9b77cbda4b6e..5e799a47431e842b8d60f0b966ab90a36c6fcb77 100644 (file)
@@ -16,7 +16,7 @@
  * try_get_task_stack() instead.  task_stack_page will return a pointer
  * that could get freed out from under you.
  */
-static inline void *task_stack_page(const struct task_struct *task)
+static __always_inline void *task_stack_page(const struct task_struct *task)
 {
        return task->stack;
 }
index 0c564e5d40ff23bf4d02e6b4f971f7d19c9fb7e9..d31d76be4982597af8d6d53f1f087745306cc59a 100644 (file)
@@ -8,7 +8,8 @@
                                         SECCOMP_FILTER_FLAG_LOG | \
                                         SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
                                         SECCOMP_FILTER_FLAG_NEW_LISTENER | \
-                                        SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
+                                        SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \
+                                        SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
 
 /* sizeof() the first published struct seccomp_notif_addfd */
 #define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
index 25b3ef71f495e0d4cdbf389ddde905d73922a435..7fc4e9f49f542b521afb53514f1229bca0683edc 100644 (file)
@@ -121,10 +121,12 @@ enum lockdown_reason {
        LOCKDOWN_DEBUGFS,
        LOCKDOWN_XMON_WR,
        LOCKDOWN_BPF_WRITE_USER,
+       LOCKDOWN_DBG_WRITE_KERNEL,
        LOCKDOWN_INTEGRITY_MAX,
        LOCKDOWN_KCORE,
        LOCKDOWN_KPROBES,
        LOCKDOWN_BPF_READ_KERNEL,
+       LOCKDOWN_DBG_READ_KERNEL,
        LOCKDOWN_PERF,
        LOCKDOWN_TRACEFS,
        LOCKDOWN_XMON_RW,
index cce8a9acc76cbe423a1e4e7c9c8fcba29cb5c66b..3af1428da5597b8935643d8848689e0900b32da5 100644 (file)
@@ -138,4 +138,32 @@ static inline u32 hsiphash(const void *data, size_t len,
        return ___hsiphash_aligned(data, len, key);
 }
 
+/*
+ * These macros expose the raw SipHash and HalfSipHash permutations.
+ * Do not use them directly! If you think you have a use for them,
+ * be sure to CC the maintainer of this file explaining why.
+ */
+
+#define SIPHASH_PERMUTATION(a, b, c, d) ( \
+       (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
+       (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
+       (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
+       (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
+
+#define SIPHASH_CONST_0 0x736f6d6570736575ULL
+#define SIPHASH_CONST_1 0x646f72616e646f6dULL
+#define SIPHASH_CONST_2 0x6c7967656e657261ULL
+#define SIPHASH_CONST_3 0x7465646279746573ULL
+
+#define HSIPHASH_PERMUTATION(a, b, c, d) ( \
+       (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \
+       (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \
+       (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \
+       (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16))
+
+#define HSIPHASH_CONST_0 0U
+#define HSIPHASH_CONST_1 0U
+#define HSIPHASH_CONST_2 0x6c796765U
+#define HSIPHASH_CONST_3 0x74656462U
+
 #endif /* _LINUX_SIPHASH_H */
index 6f85f5d957efea06c7fc19c78181feda9e57aead..17311ad9f9af247967d0704aef7f84fd2bce1a07 100644 (file)
@@ -50,6 +50,9 @@ struct linger {
 struct msghdr {
        void            *msg_name;      /* ptr to socket address structure */
        int             msg_namelen;    /* size of socket address structure */
+
+       int             msg_inq;        /* output, data left in socket */
+
        struct iov_iter msg_iter;       /* data */
 
        /*
@@ -62,8 +65,9 @@ struct msghdr {
                void __user     *msg_control_user;
        };
        bool            msg_control_is_user : 1;
-       __kernel_size_t msg_controllen; /* ancillary data buffer length */
+       bool            msg_get_inq : 1;/* return INQ after receive */
        unsigned int    msg_flags;      /* flags on received message */
+       __kernel_size_t msg_controllen; /* ancillary data buffer length */
        struct kiocb    *msg_iocb;      /* ptr to iocb for async requests */
 };
 
@@ -434,6 +438,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
                         int __user *upeer_addrlen, int flags);
 extern int __sys_socket(int family, int type, int protocol);
+extern struct file *__sys_socket_file(int family, int type, int protocol);
 extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
 extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
                              int addrlen, int file_flags);
index 5f8c063ddff45c4e722252666227b15a900f27d2..df70eb1a671e984ed08b4b9858107fc0a7c300b0 100644 (file)
@@ -347,6 +347,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @max_message_size: function that returns the max message size for
  *     a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
  * @io_mutex: mutex for physical bus access
+ * @add_lock: mutex to avoid adding devices to the same chipselect
  * @bus_lock_spinlock: spinlock for SPI bus locking
  * @bus_lock_mutex: mutex for exclusion of multiple callers
  * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use
@@ -361,6 +362,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @transfer: adds a message to the controller's transfer queue.
  * @cleanup: frees controller-specific state
  * @can_dma: determine whether this controller supports DMA
+ * @dma_map_dev: device which can be used for DMA mapping
  * @queued: whether this controller is providing an internal message queue
  * @kworker: pointer to thread struct for message pump
  * @pump_messages: work struct for scheduling work to the message pump
index cb1f4351e8baa2d6ddf9c45b1d8f93c9013de0c8..e3014319d1ade73060534db1865429416dea581f 100644 (file)
@@ -47,11 +47,9 @@ struct srcu_data {
  */
 struct srcu_node {
        spinlock_t __private lock;
-       unsigned long srcu_have_cbs[4];         /* GP seq for children */
-                                               /*  having CBs, but only */
-                                               /*  is > ->srcu_gq_seq. */
-       unsigned long srcu_data_have_cbs[4];    /* Which srcu_data structs */
-                                               /*  have CBs for given GP? */
+       unsigned long srcu_have_cbs[4];         /* GP seq for children having CBs, but only */
+                                               /*  if greater than ->srcu_gq_seq. */
+       unsigned long srcu_data_have_cbs[4];    /* Which srcu_data structs have CBs for given GP? */
        unsigned long srcu_gp_seq_needed_exp;   /* Furthest future exp GP. */
        struct srcu_node *srcu_parent;          /* Next up in tree. */
        int grplo;                              /* Least CPU for node. */
@@ -62,18 +60,24 @@ struct srcu_node {
  * Per-SRCU-domain structure, similar in function to rcu_state.
  */
 struct srcu_struct {
-       struct srcu_node node[NUM_RCU_NODES];   /* Combining tree. */
+       struct srcu_node *node;                 /* Combining tree. */
        struct srcu_node *level[RCU_NUM_LVLS + 1];
                                                /* First node at each level. */
+       int srcu_size_state;                    /* Small-to-big transition state. */
        struct mutex srcu_cb_mutex;             /* Serialize CB preparation. */
-       spinlock_t __private lock;              /* Protect counters */
+       spinlock_t __private lock;              /* Protect counters and size state. */
        struct mutex srcu_gp_mutex;             /* Serialize GP work. */
        unsigned int srcu_idx;                  /* Current rdr array element. */
        unsigned long srcu_gp_seq;              /* Grace-period seq #. */
        unsigned long srcu_gp_seq_needed;       /* Latest gp_seq needed. */
        unsigned long srcu_gp_seq_needed_exp;   /* Furthest future exp GP. */
+       unsigned long srcu_gp_start;            /* Last GP start timestamp (jiffies) */
        unsigned long srcu_last_gp_end;         /* Last GP end timestamp (ns) */
+       unsigned long srcu_size_jiffies;        /* Current contention-measurement interval. */
+       unsigned long srcu_n_lock_retries;      /* Contention events in current interval. */
+       unsigned long srcu_n_exp_nodelay;       /* # expedited no-delays in current GP phase. */
        struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
+       bool sda_is_static;                     /* May ->sda be passed to free_percpu()? */
        unsigned long srcu_barrier_seq;         /* srcu_barrier seq #. */
        struct mutex srcu_barrier_mutex;        /* Serialize barrier ops. */
        struct completion srcu_barrier_completion;
@@ -81,10 +85,23 @@ struct srcu_struct {
        atomic_t srcu_barrier_cpu_cnt;          /* # CPUs not yet posting a */
                                                /*  callback for the barrier */
                                                /*  operation. */
+       unsigned long reschedule_jiffies;
+       unsigned long reschedule_count;
        struct delayed_work work;
        struct lockdep_map dep_map;
 };
 
+/* Values for size state variable (->srcu_size_state). */
+#define SRCU_SIZE_SMALL                0
+#define SRCU_SIZE_ALLOC                1
+#define SRCU_SIZE_WAIT_BARRIER 2
+#define SRCU_SIZE_WAIT_CALL    3
+#define SRCU_SIZE_WAIT_CBS1    4
+#define SRCU_SIZE_WAIT_CBS2    5
+#define SRCU_SIZE_WAIT_CBS3    6
+#define SRCU_SIZE_WAIT_CBS4    7
+#define SRCU_SIZE_BIG          8
+
 /* Values for state variable (bottom bits of ->srcu_gp_seq). */
 #define SRCU_STATE_IDLE                0
 #define SRCU_STATE_SCAN1       1
@@ -121,6 +138,7 @@ struct srcu_struct {
 #ifdef MODULE
 # define __DEFINE_SRCU(name, is_static)                                        \
        is_static struct srcu_struct name;                              \
+       extern struct srcu_struct * const __srcu_struct_##name;         \
        struct srcu_struct * const __srcu_struct_##name                 \
                __section("___srcu_struct_ptrs") = &name
 #else
index ccaab2043fcd513febda43f1a1dc61d5062791ef..c36e7a3b45e7e73fc24dcf1a836d568b0dcb85d1 100644 (file)
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 #include <asm/stacktrace.h>
 
+/*
+ * The lowest address on tsk's stack which we can plausibly erase.
+ */
+static __always_inline unsigned long
+stackleak_task_low_bound(const struct task_struct *tsk)
+{
+       /*
+        * The lowest unsigned long on the task stack contains STACK_END_MAGIC,
+        * which we must not corrupt.
+        */
+       return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long);
+}
+
+/*
+ * The address immediately after the highest address on tsk's stack which we
+ * can plausibly erase.
+ */
+static __always_inline unsigned long
+stackleak_task_high_bound(const struct task_struct *tsk)
+{
+       /*
+        * The task's pt_regs lives at the top of the task stack and will be
+        * overwritten by exception entry, so there's no need to erase them.
+        */
+       return (unsigned long)task_pt_regs(tsk);
+}
+
+/*
+ * Find the address immediately above the poisoned region of the stack, where
+ * that region falls between 'low' (inclusive) and 'high' (exclusive).
+ */
+static __always_inline unsigned long
+stackleak_find_top_of_poison(const unsigned long low, const unsigned long high)
+{
+       const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+       unsigned int poison_count = 0;
+       unsigned long poison_high = high;
+       unsigned long sp = high;
+
+       while (sp > low && poison_count < depth) {
+               sp -= sizeof(unsigned long);
+
+               if (*(unsigned long *)sp == STACKLEAK_POISON) {
+                       poison_count++;
+               } else {
+                       poison_count = 0;
+                       poison_high = sp;
+               }
+       }
+
+       return poison_high;
+}
+
 static inline void stackleak_task_init(struct task_struct *t)
 {
-       t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+       t->lowest_stack = stackleak_task_low_bound(t);
 # ifdef CONFIG_STACKLEAK_METRICS
        t->prev_lowest_stack = t->lowest_stack;
 # endif
index 46fb3ebdd16e44c61de04ec7349fb193bcb525f0..ea7a74ea7389318627bae6f5410576e00135ca76 100644 (file)
@@ -124,6 +124,22 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
  */
 int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
+/**
+ * stop_core_cpuslocked: - stop all threads on just one core
+ * @cpu: any cpu in the targeted core
+ * @fn: the function to run
+ * @data: the data ptr for @fn()
+ *
+ * Same as above, but instead of every CPU, only the logical CPUs of a
+ * single core are affected.
+ *
+ * Context: Must be called from within a cpus_read_lock() protected region.
+ *
+ * Return: 0 if all executions of @fn returned 0, any non zero return
+ * value if any returned non zero.
+ */
+int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data);
+
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
                                   const struct cpumask *cpus);
 #else  /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
index db5149567305e60d4800f6e24611c882dbc53553..90501404fa49fefd578b1c9cd0c9b549032bb408 100644 (file)
@@ -160,7 +160,7 @@ struct rpc_add_xprt_test {
 #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT     (1UL << 9)
 #define RPC_CLNT_CREATE_SOFTERR                (1UL << 10)
 #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
-#define RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL (1UL << 12)
+#define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
index 300273ff40cc1c8f39d6f44472314a6c8e4229d9..70f2921e2e703248944bda50a2e2b5951d743ef7 100644 (file)
@@ -542,22 +542,56 @@ static inline void unlock_system_sleep(void) {}
 #ifdef CONFIG_PM_SLEEP_DEBUG
 extern bool pm_print_times_enabled;
 extern bool pm_debug_messages_on;
-extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...);
+static inline int pm_dyn_debug_messages_on(void)
+{
+#ifdef CONFIG_DYNAMIC_DEBUG
+       return 1;
+#else
+       return 0;
+#endif
+}
+#ifndef pr_fmt
+#define pr_fmt(fmt) "PM: " fmt
+#endif
+#define __pm_pr_dbg(fmt, ...)                                  \
+       do {                                                    \
+               if (pm_debug_messages_on)                       \
+                       printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__);  \
+               else if (pm_dyn_debug_messages_on())            \
+                       pr_debug(fmt, ##__VA_ARGS__);   \
+       } while (0)
+#define __pm_deferred_pr_dbg(fmt, ...)                         \
+       do {                                                    \
+               if (pm_debug_messages_on)                       \
+                       printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
+       } while (0)
 #else
 #define pm_print_times_enabled (false)
 #define pm_debug_messages_on   (false)
 
 #include <linux/printk.h>
 
-#define __pm_pr_dbg(defer, fmt, ...) \
-       no_printk(KERN_DEBUG fmt, ##__VA_ARGS__)
+#define __pm_pr_dbg(fmt, ...) \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#define __pm_deferred_pr_dbg(fmt, ...) \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #endif
 
+/**
+ * pm_pr_dbg - print pm sleep debug messages
+ *
+ * If pm_debug_messages_on is enabled, print message.
+ * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled,
+ *     print message only from instances explicitly enabled on dynamic debug's
+ *     control.
+ * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is disabled,
+ *     don't print message.
+ */
 #define pm_pr_dbg(fmt, ...) \
-       __pm_pr_dbg(false, fmt, ##__VA_ARGS__)
+       __pm_pr_dbg(fmt, ##__VA_ARGS__)
 
 #define pm_deferred_pr_dbg(fmt, ...) \
-       __pm_pr_dbg(true, fmt, ##__VA_ARGS__)
+       __pm_deferred_pr_dbg(fmt, ##__VA_ARGS__)
 
 #ifdef CONFIG_PM_AUTOSLEEP
 
index 897494b597ba4f488784fd847c1a68752f3a49aa..795ef5a6842946fcf22d96e6e8e6100a64c7afd8 100644 (file)
@@ -17,6 +17,7 @@ enum task_work_notify_mode {
        TWA_NONE,
        TWA_RESUME,
        TWA_SIGNAL,
+       TWA_SIGNAL_NO_IPI,
 };
 
 static inline bool task_work_pending(struct task_struct *task)
index c314893970b3511cefc3263b44021b8ed95cfe8b..365733b428d8f222852ee86b11768c0402eb7953 100644 (file)
@@ -299,6 +299,8 @@ struct thermal_zone_params {
  *                temperature.
  * @set_trip_temp: a pointer to a function that sets the trip temperature on
  *                hardware.
+ * @change_mode: a pointer to a function that notifies the thermal zone
+ *                mode change.
  */
 struct thermal_zone_of_device_ops {
        int (*get_temp)(void *, int *);
@@ -306,6 +308,7 @@ struct thermal_zone_of_device_ops {
        int (*set_trips)(void *, int, int);
        int (*set_emul_temp)(void *, int);
        int (*set_trip_temp)(void *, int, int);
+       int (*change_mode) (void *, enum thermal_device_mode);
 };
 
 /* Function declarations */
index 78a98bdff76d6a7e9f3ef630e90c2a5176ace9e7..fe1e467ba046f86b3222b1ceed41b6f2b7f85f97 100644 (file)
@@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void)
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
 extern u64 ktime_get_boot_fast_ns(void);
+extern u64 ktime_get_tai_fast_ns(void);
 extern u64 ktime_get_real_fast_ns(void);
 
 /*
index fda13c9d1256c35f59b45f1f825e13d3ea494adc..648f00105f588dac3e713aab419c21ac8e056e99 100644 (file)
@@ -196,14 +196,6 @@ extern void init_timers(void);
 struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-struct ctl_table;
-
-extern unsigned int sysctl_timer_migration;
-int timer_migration_handler(struct ctl_table *table, int write,
-                           void *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
 unsigned long __round_jiffies(unsigned long j, int cpu);
 unsigned long __round_jiffies_relative(unsigned long j, int cpu);
 unsigned long round_jiffies(unsigned long j);
index 5745c90c880054ac2a734d43142f8aa4d81431e5..3871b06bd302ceb8e999b364c85eece21eaeac1f 100644 (file)
@@ -62,6 +62,8 @@
 #include <linux/types.h>
 #include <linux/param.h>
 
+unsigned long random_get_entropy_fallback(void);
+
 #include <asm/timex.h>
 
 #ifndef random_get_entropy
  *
  * By default we use get_cycles() for this purpose, but individual
  * architectures may override this in their asm/timex.h header file.
+ * If a given arch does not have get_cycles(), then we fallback to
+ * using random_get_entropy_fallback().
  */
+#ifdef get_cycles
 #define random_get_entropy()   ((unsigned long)get_cycles())
+#else
+#define random_get_entropy()   random_get_entropy_fallback()
+#endif
 #endif
 
 /*
index f19bc3626297ad76c113a5f492032dfb6b7d9856..4564faafd0e127eeee0439b093c5f4207daeac32 100644 (file)
@@ -240,13 +240,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
 }
 #endif
 
-#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
-static inline const struct cpumask *cpu_cluster_mask(int cpu)
-{
-       return topology_cluster_cpumask(cpu);
-}
-#endif
-
 static inline const struct cpumask *cpu_cpu_mask(int cpu)
 {
        return cpumask_of_node(cpu_to_node(cpu));
index 63fa4196e51cb2522704ec1f542817e8eb29d0f0..7038104463e481486424da66e909bcaf0604be2e 100644 (file)
@@ -118,7 +118,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
        _torture_stop_kthread("Stopping " #n " task", &(tp))
 
 #ifdef CONFIG_PREEMPTION
-#define torture_preempt_schedule() preempt_schedule()
+#define torture_preempt_schedule() __preempt_schedule()
 #else
 #define torture_preempt_schedule()     do { } while (0)
 #endif
index 546179418ffa20b3f24934682623bc9445945f20..5a328cf02b75ecb1afe27c570317c1964742751e 100644 (file)
@@ -231,6 +231,28 @@ static inline bool pagefault_disabled(void)
  */
 #define faulthandler_disabled() (pagefault_disabled() || in_atomic())
 
+#ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
+
+/**
+ * probe_subpage_writeable: probe the user range for write faults at sub-page
+ *                         granularity (e.g. arm64 MTE)
+ * @uaddr: start of address range
+ * @size: size of address range
+ *
+ * Returns 0 on success, the number of bytes not probed on fault.
+ *
+ * It is expected that the caller checked for the write permission of each
+ * page in the range either by put_user() or GUP. The architecture port can
+ * implement a more efficient get_user() probing if the same sub-page faults
+ * are triggered by either a read or a write.
+ */
+static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size)
+{
+       return 0;
+}
+
+#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
 static inline __must_check unsigned long
index 329d63babaeb80fd4a7abbbfb17861c0fcf8914b..a54046bf37e55bd5460c3f0d090346e443f96c95 100644 (file)
 #else
 #define MODULE_VERMAGIC_MODVERSIONS ""
 #endif
-#ifdef RANDSTRUCT_PLUGIN
-#include <generated/randomize_layout_hash.h>
-#define MODULE_RANDSTRUCT_PLUGIN "RANDSTRUCT_PLUGIN_" RANDSTRUCT_HASHED_SEED
+#ifdef RANDSTRUCT
+#include <generated/randstruct_hash.h>
+#define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED
 #else
-#define MODULE_RANDSTRUCT_PLUGIN
+#define MODULE_RANDSTRUCT
 #endif
 
 #define VERMAGIC_STRING                                                \
@@ -44,6 +44,6 @@
        MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT                     \
        MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS       \
        MODULE_ARCH_VERMAGIC                                            \
-       MODULE_RANDSTRUCT_PLUGIN
+       MODULE_RANDSTRUCT
 
 #endif /* _LINUX_VERMAGIC_H */
index 8abd0824532634736dc13fb1d26e9449f6c70a7c..62d7b81b1cb74621d70a21f8295f57227b388dfe 100644 (file)
@@ -36,6 +36,9 @@
 /* HCI priority */
 #define HCI_PRIO_MAX   7
 
+/* HCI maximum id value */
+#define HCI_MAX_ID 10000
+
 /* HCI Core structures */
 struct inquiry_data {
        bdaddr_t        bdaddr;
index 463ae5d33eb09c40caeb4d039af268609b5e563b..5b47545f22d39eb2dd9725ac37bd7d7a9016a03c 100644 (file)
@@ -71,7 +71,6 @@ struct inet_timewait_sock {
                                tw_tos          : 8;
        u32                     tw_txhash;
        u32                     tw_priority;
-       u32                     tw_bslot; /* bind bucket slot */
        struct timer_list       tw_timer;
        struct inet_bind_bucket *tw_tb;
 };
@@ -110,6 +109,8 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
 
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
+
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
 {
index 3984f2c39c4ba8b4d2a4e4dab6d743f0c9faf798..0161137914cf9d2111fde635b00604a5cf3d768c 100644 (file)
@@ -56,6 +56,7 @@ struct inet_skb_parm {
 #define IPSKB_DOREDIRECT       BIT(5)
 #define IPSKB_FRAG_PMTU                BIT(6)
 #define IPSKB_L3SLAVE          BIT(7)
+#define IPSKB_NOPOLICY         BIT(8)
 
        u16                     frag_max_size;
 };
index 748cf87a4d7ea5c92b4fd48dd3302b8ad64944fe..3e02709a1df656931942be4851a115dd6bef8b4c 100644 (file)
@@ -14,6 +14,7 @@ struct tcf_pedit {
        struct tc_action        common;
        unsigned char           tcfp_nkeys;
        unsigned char           tcfp_flags;
+       u32                     tcfp_off_max_hint;
        struct tc_pedit_key     *tcfp_keys;
        struct tcf_pedit_key_ex *tcfp_keys_ex;
 };
index 6fb899ff5afce9d73c7f02748bd14ffc4cdebcfe..d2efddce65d46b10f9c0983e4d597eb1685ba47f 100644 (file)
@@ -1093,6 +1093,18 @@ static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
        return false;
 }
 
+static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
+                                            int dir, unsigned short family)
+{
+       if (dir != XFRM_POLICY_OUT && family == AF_INET) {
+               /* same dst may be used for traffic originating from
+                * devices with different policy settings.
+                */
+               return IPCB(skb)->flags & IPSKB_NOPOLICY;
+       }
+       return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
+}
+
 static inline int __xfrm_policy_check2(struct sock *sk, int dir,
                                       struct sk_buff *skb,
                                       unsigned int family, int reverse)
@@ -1104,7 +1116,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
                return __xfrm_policy_check(sk, ndir, skb, family);
 
        return __xfrm_check_nopolicy(net, skb, dir) ||
-              (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+              __xfrm_check_dev_nopolicy(skb, dir, family) ||
               __xfrm_policy_check(sk, ndir, skb, family);
 }
 
diff --git a/include/soc/fsl/caam-blob.h b/include/soc/fsl/caam-blob.h
new file mode 100644 (file)
index 0000000..937cac5
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Pengutronix, Ahmad Fatoum <kernel@pengutronix.de>
+ */
+
+#ifndef __CAAM_BLOB_GEN
+#define __CAAM_BLOB_GEN
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+#define CAAM_BLOB_KEYMOD_LENGTH                16
+#define CAAM_BLOB_OVERHEAD             (32 + 16)
+#define CAAM_BLOB_MAX_LEN              4096
+
+struct caam_blob_priv;
+
+/**
+ * struct caam_blob_info - information for CAAM blobbing
+ * @input:       pointer to input buffer (must be DMAable)
+ * @input_len:   length of @input buffer in bytes.
+ * @output:      pointer to output buffer (must be DMAable)
+ * @output_len:  length of @output buffer in bytes.
+ * @key_mod:     key modifier
+ * @key_mod_len: length of @key_mod in bytes.
+ *              May not exceed %CAAM_BLOB_KEYMOD_LENGTH
+ */
+struct caam_blob_info {
+       void *input;
+       size_t input_len;
+
+       void *output;
+       size_t output_len;
+
+       const void *key_mod;
+       size_t key_mod_len;
+};
+
+/**
+ * caam_blob_gen_init - initialize blob generation
+ * Return: pointer to new &struct caam_blob_priv instance on success
+ * and ``ERR_PTR(-ENODEV)`` if CAAM has no hardware blobbing support
+ * or no job ring could be allocated.
+ */
+struct caam_blob_priv *caam_blob_gen_init(void);
+
+/**
+ * caam_blob_gen_exit - free blob generation resources
+ * @priv: instance returned by caam_blob_gen_init()
+ */
+void caam_blob_gen_exit(struct caam_blob_priv *priv);
+
+/**
+ * caam_process_blob - encapsulate or decapsulate blob
+ * @priv:   instance returned by caam_blob_gen_init()
+ * @info:   pointer to blobbing info describing key, blob and
+ *          key modifier buffers.
+ * @encap:  true for encapsulation, false for decapsulation
+ *
+ * Return: %0 and sets ``info->output_len`` on success and a negative
+ * error code otherwise.
+ */
+int caam_process_blob(struct caam_blob_priv *priv,
+                     struct caam_blob_info *info, bool encap);
+
+/**
+ * caam_encap_blob - encapsulate blob
+ * @priv:   instance returned by caam_blob_gen_init()
+ * @info:   pointer to blobbing info describing input key,
+ *          output blob and key modifier buffers.
+ *
+ * Return: %0 and sets ``info->output_len`` on success and
+ * a negative error code otherwise.
+ */
+static inline int caam_encap_blob(struct caam_blob_priv *priv,
+                                 struct caam_blob_info *info)
+{
+       if (info->output_len < info->input_len + CAAM_BLOB_OVERHEAD)
+               return -EINVAL;
+
+       return caam_process_blob(priv, info, true);
+}
+
+/**
+ * caam_decap_blob - decapsulate blob
+ * @priv:   instance returned by caam_blob_gen_init()
+ * @info:   pointer to blobbing info describing output key,
+ *          input blob and key modifier buffers.
+ *
+ * Return: %0 and sets ``info->output_len`` on success and
+ * a negative error code otherwise.
+ */
+static inline int caam_decap_blob(struct caam_blob_priv *priv,
+                                 struct caam_blob_info *info)
+{
+       if (info->input_len < CAAM_BLOB_OVERHEAD ||
+           info->output_len < info->input_len - CAAM_BLOB_OVERHEAD)
+               return -EINVAL;
+
+       return caam_process_blob(priv, info, false);
+}
+
+#endif
index 7b2bf9b1fe697e8d9670e1f25eb07485271598ca..de26c992f82146cd4ae7ca8b5fde03563fbc70a2 100644 (file)
@@ -681,7 +681,6 @@ struct ocelot_vcap_id {
 
 struct ocelot_vcap_filter {
        struct list_head list;
-       struct list_head trap_list;
 
        enum ocelot_vcap_filter_type type;
        int block_id;
@@ -695,6 +694,7 @@ struct ocelot_vcap_filter {
        struct ocelot_vcap_stats stats;
        /* For VCAP IS1 and IS2 */
        bool take_ts;
+       bool is_trap;
        unsigned long ingress_port_mask;
        /* For VCAP ES0 */
        struct ocelot_vcap_port ingress_port;
diff --git a/include/soc/rockchip/pm_domains.h b/include/soc/rockchip/pm_domains.h
new file mode 100644 (file)
index 0000000..7dbd941
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022, The Chromium OS Authors. All rights reserved.
+ */
+
+#ifndef __SOC_ROCKCHIP_PM_DOMAINS_H__
+#define __SOC_ROCKCHIP_PM_DOMAINS_H__
+
+#ifdef CONFIG_ROCKCHIP_PM_DOMAINS
+
+int rockchip_pmu_block(void);
+void rockchip_pmu_unblock(void);
+
+#else /* CONFIG_ROCKCHIP_PM_DOMAINS */
+
+static inline int rockchip_pmu_block(void)
+{
+       return 0;
+}
+
+static inline void rockchip_pmu_unblock(void) { }
+
+#endif /* CONFIG_ROCKCHIP_PM_DOMAINS */
+
+#endif /* __SOC_ROCKCHIP_PM_DOMAINS_H__ */
index 675f3a1fe613944464ab5c14fe841d590f91f17e..773963a1e0b5362d4e5b4423f8534c23dc6d7957 100644 (file)
@@ -14,7 +14,7 @@
 #define TRANSPORT_FLAG_PASSTHROUGH_ALUA                0x2
 #define TRANSPORT_FLAG_PASSTHROUGH_PGR          0x4
 
-struct request_queue;
+struct block_device;
 struct scatterlist;
 
 struct target_backend_ops {
@@ -117,7 +117,7 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 bool target_sense_desc_format(struct se_device *dev);
 sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-                                      struct request_queue *q);
+                                      struct block_device *bdev);
 
 static inline bool target_dev_configured(struct se_device *se_dev)
 {
diff --git a/include/trace/events/intel_ifs.h b/include/trace/events/intel_ifs.h
new file mode 100644 (file)
index 0000000..d735302
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM intel_ifs
+
+#if !defined(_TRACE_IFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IFS_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(ifs_status,
+
+       TP_PROTO(int cpu, union ifs_scan activate, union ifs_status status),
+
+       TP_ARGS(cpu, activate, status),
+
+       TP_STRUCT__entry(
+               __field(        u64,    status  )
+               __field(        int,    cpu     )
+               __field(        u8,     start   )
+               __field(        u8,     stop    )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu    = cpu;
+               __entry->start  = activate.start;
+               __entry->stop   = activate.stop;
+               __entry->status = status.data;
+       ),
+
+       TP_printk("cpu: %d, start: %.2x, stop: %.2x, status: %llx",
+               __entry->cpu,
+               __entry->start,
+               __entry->stop,
+               __entry->status)
+);
+
+#endif /* _TRACE_IFS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index cddf5b6fbeb452183d1d9ff5d497e31dd9334a9b..66fcc5a1a5b1c0321414418031a7894f543cdef4 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/tracepoint.h>
 #include <uapi/linux/io_uring.h>
+#include <linux/io_uring.h>
 
 struct io_wq_work;
 
@@ -147,7 +148,7 @@ TRACE_EVENT(io_uring_queue_async_work,
        TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode,
                unsigned int flags, struct io_wq_work *work, int rw),
 
-       TP_ARGS(ctx, req, user_data, flags, opcode, work, rw),
+       TP_ARGS(ctx, req, user_data, opcode, flags, work, rw),
 
        TP_STRUCT__entry (
                __field(  void *,                       ctx             )
@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
                __entry->rw             = rw;
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
-               __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
+               __entry->ctx, __entry->req, __entry->user_data,
+               io_uring_get_opcode(__entry->opcode),
                __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
 );
 
@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
                __entry->opcode = opcode;
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
-               __entry->ctx, __entry->req, __entry->data, __entry->opcode)
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
+               __entry->ctx, __entry->req, __entry->data,
+               io_uring_get_opcode(__entry->opcode))
 );
 
 /**
@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
                __entry->link           = link;
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
-               __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
-               __entry->link)
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
+               __entry->ctx, __entry->req, __entry->user_data,
+               io_uring_get_opcode(__entry->opcode), __entry->link)
 );
 
 /**
@@ -318,13 +321,16 @@ TRACE_EVENT(io_uring_fail_link,
  * @user_data:         user data associated with the request
  * @res:               result of the request
  * @cflags:            completion flags
+ * @extra1:            extra 64-bit data for CQE32
+ * @extra2:            extra 64-bit data for CQE32
  *
  */
 TRACE_EVENT(io_uring_complete,
 
-       TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
+       TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags,
+                u64 extra1, u64 extra2),
 
-       TP_ARGS(ctx, req, user_data, res, cflags),
+       TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
 
        TP_STRUCT__entry (
                __field(  void *,       ctx             )
@@ -332,6 +338,8 @@ TRACE_EVENT(io_uring_complete,
                __field(  u64,          user_data       )
                __field(  int,          res             )
                __field(  unsigned,     cflags          )
+               __field(  u64,          extra1          )
+               __field(  u64,          extra2          )
        ),
 
        TP_fast_assign(
@@ -340,12 +348,17 @@ TRACE_EVENT(io_uring_complete,
                __entry->user_data      = user_data;
                __entry->res            = res;
                __entry->cflags         = cflags;
+               __entry->extra1         = extra1;
+               __entry->extra2         = extra2;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
+       TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
+                 "extra1 %llu extra2 %llu ",
                __entry->ctx, __entry->req,
                __entry->user_data,
-               __entry->res, __entry->cflags)
+               __entry->res, __entry->cflags,
+               (unsigned long long) __entry->extra1,
+               (unsigned long long) __entry->extra2)
 );
 
 /**
@@ -389,9 +402,9 @@ TRACE_EVENT(io_uring_submit_sqe,
                __entry->sq_thread      = sq_thread;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
                  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
-                 __entry->user_data, __entry->opcode,
+                 __entry->user_data, io_uring_get_opcode(__entry->opcode),
                  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
 );
 
@@ -433,8 +446,9 @@ TRACE_EVENT(io_uring_poll_arm,
                __entry->events         = events;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
-                 __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
+                 __entry->ctx, __entry->req, __entry->user_data,
+                 io_uring_get_opcode(__entry->opcode),
                  __entry->mask, __entry->events)
 );
 
@@ -470,8 +484,9 @@ TRACE_EVENT(io_uring_task_add,
                __entry->mask           = mask;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
-               __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
+               __entry->ctx, __entry->req, __entry->user_data,
+               io_uring_get_opcode(__entry->opcode),
                __entry->mask)
 );
 
@@ -506,7 +521,7 @@ TRACE_EVENT(io_uring_req_failed,
                __field( u16,                   personality     )
                __field( u32,                   file_index      )
                __field( u64,                   pad1            )
-               __field( u64,                   pad2            )
+               __field( u64,                   addr3           )
                __field( int,                   error           )
        ),
 
@@ -520,27 +535,69 @@ TRACE_EVENT(io_uring_req_failed,
                __entry->off            = sqe->off;
                __entry->addr           = sqe->addr;
                __entry->len            = sqe->len;
-               __entry->op_flags       = sqe->rw_flags;
+               __entry->op_flags       = sqe->poll32_events;
                __entry->buf_index      = sqe->buf_index;
                __entry->personality    = sqe->personality;
                __entry->file_index     = sqe->file_index;
                __entry->pad1           = sqe->__pad2[0];
-               __entry->pad2           = sqe->__pad2[1];
+               __entry->addr3          = sqe->addr3;
                __entry->error          = error;
        ),
 
        TP_printk("ring %p, req %p, user_data 0x%llx, "
-               "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
+                 "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
                  "len=%u, rw_flags=0x%x, buf_index=%d, "
-                 "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+                 "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
+                 "error=%d",
                  __entry->ctx, __entry->req, __entry->user_data,
-                 __entry->opcode, __entry->flags, __entry->ioprio,
+                 io_uring_get_opcode(__entry->opcode),
+                 __entry->flags, __entry->ioprio,
                  (unsigned long long)__entry->off,
                  (unsigned long long) __entry->addr, __entry->len,
                  __entry->op_flags,
                  __entry->buf_index, __entry->personality, __entry->file_index,
                  (unsigned long long) __entry->pad1,
-                 (unsigned long long) __entry->pad2, __entry->error)
+                 (unsigned long long) __entry->addr3, __entry->error)
+);
+
+
+/*
+ * io_uring_cqe_overflow - a CQE overflowed
+ *
+ * @ctx:               pointer to a ring context structure
+ * @user_data:         user data associated with the request
+ * @res:               CQE result
+ * @cflags:            CQE flags
+ * @ocqe:              pointer to the overflow cqe (if available)
+ *
+ */
+TRACE_EVENT(io_uring_cqe_overflow,
+
+       TP_PROTO(void *ctx, unsigned long long user_data, s32 res, u32 cflags,
+                void *ocqe),
+
+       TP_ARGS(ctx, user_data, res, cflags, ocqe),
+
+       TP_STRUCT__entry (
+               __field(  void *,               ctx             )
+               __field(  unsigned long long,   user_data       )
+               __field(  s32,                  res             )
+               __field(  u32,                  cflags          )
+               __field(  void *,               ocqe            )
+       ),
+
+       TP_fast_assign(
+               __entry->ctx            = ctx;
+               __entry->user_data      = user_data;
+               __entry->res            = res;
+               __entry->cflags         = cflags;
+               __entry->ocqe           = ocqe;
+       ),
+
+       TP_printk("ring %p, user_data 0x%llx, res %d, flags %x, "
+                 "overflow_cqe %p",
+                 __entry->ctx, __entry->user_data, __entry->res,
+                 __entry->cflags, __entry->ocqe)
 );
 
 #endif /* _TRACE_IO_URING_H */
index d7512129a324e5de49e1cafb8a7856a63324c989..9ebd081e057e8769a9779e3b00ee69d45fecf57d 100644 (file)
@@ -5,11 +5,22 @@
 #if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_LOCK_H
 
-#include <linux/lockdep.h>
+#include <linux/sched.h>
 #include <linux/tracepoint.h>
 
+/* flags for lock:contention_begin */
+#define LCB_F_SPIN     (1U << 0)
+#define LCB_F_READ     (1U << 1)
+#define LCB_F_WRITE    (1U << 2)
+#define LCB_F_RT       (1U << 3)
+#define LCB_F_PERCPU   (1U << 4)
+#define LCB_F_MUTEX    (1U << 5)
+
+
 #ifdef CONFIG_LOCKDEP
 
+#include <linux/lockdep.h>
+
 TRACE_EVENT(lock_acquire,
 
        TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
@@ -78,8 +89,54 @@ DEFINE_EVENT(lock, lock_acquired,
        TP_ARGS(lock, ip)
 );
 
-#endif
-#endif
+#endif /* CONFIG_LOCK_STAT */
+#endif /* CONFIG_LOCKDEP */
+
+TRACE_EVENT(contention_begin,
+
+       TP_PROTO(void *lock, unsigned int flags),
+
+       TP_ARGS(lock, flags),
+
+       TP_STRUCT__entry(
+               __field(void *, lock_addr)
+               __field(unsigned int, flags)
+       ),
+
+       TP_fast_assign(
+               __entry->lock_addr = lock;
+               __entry->flags = flags;
+       ),
+
+       TP_printk("%p (flags=%s)", __entry->lock_addr,
+                 __print_flags(__entry->flags, "|",
+                               { LCB_F_SPIN,           "SPIN" },
+                               { LCB_F_READ,           "READ" },
+                               { LCB_F_WRITE,          "WRITE" },
+                               { LCB_F_RT,             "RT" },
+                               { LCB_F_PERCPU,         "PERCPU" },
+                               { LCB_F_MUTEX,          "MUTEX" }
+                         ))
+);
+
+TRACE_EVENT(contention_end,
+
+       TP_PROTO(void *lock, int ret),
+
+       TP_ARGS(lock, ret),
+
+       TP_STRUCT__entry(
+               __field(void *, lock_addr)
+               __field(int, ret)
+       ),
+
+       TP_fast_assign(
+               __entry->lock_addr = lock;
+               __entry->ret = ret;
+       ),
+
+       TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
+);
 
 #endif /* _TRACE_LOCK_H */
 
index 65e7867563214270ec80bdbad1da5d8a824a4f79..fbb99a61f714cbebb91ba9280ce44f812ece32de 100644 (file)
@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt,
 TRACE_EVENT(sched_switch,
 
        TP_PROTO(bool preempt,
-                unsigned int prev_state,
                 struct task_struct *prev,
-                struct task_struct *next),
+                struct task_struct *next,
+                unsigned int prev_state),
 
-       TP_ARGS(preempt, prev_state, prev, next),
+       TP_ARGS(preempt, prev, next, prev_state),
 
        TP_STRUCT__entry(
                __array(        char,   prev_comm,      TASK_COMM_LEN   )
index 6ad031c71be748528a4cc28d6324a4c8353de161..2e713a7d9aa3a45c11c551d038d8ea4d087017d6 100644 (file)
@@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init,
  * timer_start - called when the timer is started
  * @timer:     pointer to struct timer_list
  * @expires:   the timers expiry time
+ * @flags:     the timers flags
  */
 TRACE_EVENT(timer_start,
 
@@ -84,6 +85,7 @@ TRACE_EVENT(timer_start,
 /**
  * timer_expire_entry - called immediately before the timer callback
  * @timer:     pointer to struct timer_list
+ * @baseclk:   value of timer_base::clk when timer expires
  *
  * Allows to determine the timer latency.
  */
@@ -190,7 +192,8 @@ TRACE_EVENT(hrtimer_init,
 
 /**
  * hrtimer_start - called when the hrtimer is started
- * @hrtimer: pointer to struct hrtimer
+ * @hrtimer:   pointer to struct hrtimer
+ * @mode:      the hrtimers mode
  */
 TRACE_EVENT(hrtimer_start,
 
index 3ba180f550d7cfc9cf78e4c7133047070625b11b..ffbe4cec9f32de390af4887de2e0aa42c945f153 100644 (file)
@@ -99,6 +99,7 @@ union __sifields {
                        struct {
                                unsigned long _data;
                                __u32 _type;
+                               __u32 _flags;
                        } _perf;
                };
        } _sigfault;
@@ -164,6 +165,7 @@ typedef struct siginfo {
 #define si_pkey                _sifields._sigfault._addr_pkey._pkey
 #define si_perf_data   _sifields._sigfault._perf._data
 #define si_perf_type   _sifields._sigfault._perf._type
+#define si_perf_flags  _sifields._sigfault._perf._flags
 #define si_band                _sifields._sigpoll._band
 #define si_fd          _sifields._sigpoll._fd
 #define si_call_addr   _sifields._sigsys._call_addr
@@ -270,6 +272,11 @@ typedef struct siginfo {
  * that are of the form: ((PTRACE_EVENT_XXX << 8) | SIGTRAP)
  */
 
+/*
+ * Flags for si_perf_flags if SIGTRAP si_code is TRAP_PERF.
+ */
+#define TRAP_PERF_FLAG_ASYNC (1u << 0)
+
 /*
  * SIGCHLD si_codes
  */
index 804ff8d98f710c873c4876fcf3d6878cec4728f5..011e594e4a0da66f652dc77c8a468aa815591c65 100644 (file)
 #define CDROMREADALL           0x5318  /* read all 2646 bytes */
 
 /* 
- * These ioctls are (now) only in ide-cd.c for controlling 
+ * These ioctls were only in (now removed) ide-cd.c for controlling
  * drive spindown time.  They should be implemented in the
  * Uniform driver, via generic packet commands, GPCMD_MODE_SELECT_10,
  * GPCMD_MODE_SENSE_10 and the GPMODE_POWER_PAGE...
index 8e4a2ca0bcbf70abfa3735982bc2fd9f87508fb2..b1523cb8ab307d7454160a5d5b3f1d1c0fc1b596 100644 (file)
@@ -92,7 +92,7 @@ struct dma_buf_sync {
  * between them in actual uapi, they're just different numbers.
  */
 #define DMA_BUF_SET_NAME       _IOW(DMA_BUF_BASE, 1, const char *)
-#define DMA_BUF_SET_NAME_A     _IOW(DMA_BUF_BASE, 1, u32)
-#define DMA_BUF_SET_NAME_B     _IOW(DMA_BUF_BASE, 1, u64)
+#define DMA_BUF_SET_NAME_A     _IOW(DMA_BUF_BASE, 1, __u32)
+#define DMA_BUF_SET_NAME_B     _IOW(DMA_BUF_BASE, 1, __u64)
 
 #endif
index 7ce993e6786ccae99474795844a01cc4e6b0ae8d..429bec8dd70aa75ba4141f3d05d25816a6bbdcbd 100644 (file)
@@ -431,6 +431,8 @@ typedef struct elf64_shdr {
 #define NT_ARM_PACG_KEYS       0x408   /* ARM pointer authentication generic key */
 #define NT_ARM_TAGGED_ADDR_CTRL        0x409   /* arm64 tagged address control (prctl()) */
 #define NT_ARM_PAC_ENABLED_KEYS        0x40a   /* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE    0x40b           /* ARM Streaming SVE registers */
+#define NT_ARM_ZA      0x40c           /* ARM SME ZA registers */
 #define NT_ARC_V2      0x600           /* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD    0x700           /* Vmcore Device Dump Note */
 #define NT_MIPS_DSP    0x800           /* MIPS DSP ASE registers */
index ee3127461ee0093f33888d88e8385a267a42b829..ef4257ab30265c67940b2d7a06fc237d776dee80 100644 (file)
@@ -271,6 +271,7 @@ struct input_mask {
 #define BUS_RMI                        0x1D
 #define BUS_CEC                        0x1E
 #define BUS_INTEL_ISHTP                0x1F
+#define BUS_AMD_SFH            0x20
 
 /*
  * MT_TOOL types
index 1845cf7c80bade8458a78843fa2d0d6e9ad4710c..53e7dae92e42e4d9a373d2ef61ab925f0a8d64d8 100644 (file)
@@ -22,6 +22,7 @@ struct io_uring_sqe {
        union {
                __u64   off;    /* offset into file */
                __u64   addr2;
+               __u32   cmd_op;
        };
        union {
                __u64   addr;   /* pointer to buffer or iovecs */
@@ -45,6 +46,7 @@ struct io_uring_sqe {
                __u32           rename_flags;
                __u32           unlink_flags;
                __u32           hardlink_flags;
+               __u32           xattr_flags;
        };
        __u64   user_data;      /* data to be passed back at completion time */
        /* pack this to avoid bogus arm OABI complaints */
@@ -60,9 +62,28 @@ struct io_uring_sqe {
                __s32   splice_fd_in;
                __u32   file_index;
        };
-       __u64   __pad2[2];
+       union {
+               struct {
+                       __u64   addr3;
+                       __u64   __pad2[1];
+               };
+               /*
+                * If the ring is initialized with IORING_SETUP_SQE128, then
+                * this field is used for 80 bytes of arbitrary command data
+                */
+               __u8    cmd[0];
+       };
 };
 
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC                (~0U)
+
 enum {
        IOSQE_FIXED_FILE_BIT,
        IOSQE_IO_DRAIN_BIT,
@@ -102,8 +123,25 @@ enum {
 #define IORING_SETUP_ATTACH_WQ (1U << 5)       /* attach to existing wq */
 #define IORING_SETUP_R_DISABLED        (1U << 6)       /* start with ring disabled */
 #define IORING_SETUP_SUBMIT_ALL        (1U << 7)       /* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN      (1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG      (1U << 9)
 
-enum {
+#define IORING_SETUP_SQE128            (1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32             (1U << 11) /* CQEs are 32 byte */
+
+enum io_uring_op {
        IORING_OP_NOP,
        IORING_OP_READV,
        IORING_OP_WRITEV,
@@ -145,6 +183,12 @@ enum {
        IORING_OP_SYMLINKAT,
        IORING_OP_LINKAT,
        IORING_OP_MSG_RING,
+       IORING_OP_FSETXATTR,
+       IORING_OP_SETXATTR,
+       IORING_OP_FGETXATTR,
+       IORING_OP_GETXATTR,
+       IORING_OP_SOCKET,
+       IORING_OP_URING_CMD,
 
        /* this goes last, obviously */
        IORING_OP_LAST,
@@ -187,6 +231,33 @@ enum {
 #define IORING_POLL_UPDATE_EVENTS      (1U << 1)
 #define IORING_POLL_UPDATE_USER_DATA   (1U << 2)
 
+/*
+ * ASYNC_CANCEL flags.
+ *
+ * IORING_ASYNC_CANCEL_ALL     Cancel all requests that match the given key
+ * IORING_ASYNC_CANCEL_FD      Key off 'fd' for cancelation rather than the
+ *                             request 'user_data'
+ * IORING_ASYNC_CANCEL_ANY     Match any request
+ */
+#define IORING_ASYNC_CANCEL_ALL        (1U << 0)
+#define IORING_ASYNC_CANCEL_FD (1U << 1)
+#define IORING_ASYNC_CANCEL_ANY        (1U << 2)
+
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ *
+ * IORING_RECVSEND_POLL_FIRST  If set, instead of first attempting to send
+ *                             or receive and arm poll if that yields an
+ *                             -EAGAIN result, arm poll upfront and skip
+ *                             the initial transfer attempt.
+ */
+#define IORING_RECVSEND_POLL_FIRST     (1U << 0)
+
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT        (1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
@@ -194,6 +265,12 @@ struct io_uring_cqe {
        __u64   user_data;      /* sqe->data submission passed back */
        __s32   res;            /* result code for this event */
        __u32   flags;
+
+       /*
+        * If the ring is initialized with IORING_SETUP_CQE32, then this field
+        * contains 16-bytes of padding, doubling the size of the CQE.
+        */
+       __u64 big_cqe[];
 };
 
 /*
@@ -201,9 +278,11 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE   If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY  If set, more data to read after socket recv
  */
 #define IORING_CQE_F_BUFFER            (1U << 0)
 #define IORING_CQE_F_MORE              (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY     (1U << 2)
 
 enum {
        IORING_CQE_BUFFER_SHIFT         = 16,
@@ -236,6 +315,7 @@ struct io_sqring_offsets {
  */
 #define IORING_SQ_NEED_WAKEUP  (1U << 0) /* needs io_uring_enter wakeup */
 #define IORING_SQ_CQ_OVERFLOW  (1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN      (1U << 2) /* task should enter the kernel */
 
 struct io_cqring_offsets {
        __u32 head;
@@ -333,6 +413,10 @@ enum {
        IORING_REGISTER_RING_FDS                = 20,
        IORING_UNREGISTER_RING_FDS              = 21,
 
+       /* register ring based provide buffer group */
+       IORING_REGISTER_PBUF_RING               = 22,
+       IORING_UNREGISTER_PBUF_RING             = 23,
+
        /* this goes last */
        IORING_REGISTER_LAST
 };
@@ -350,9 +434,15 @@ struct io_uring_files_update {
        __aligned_u64 /* __s32 * */ fds;
 };
 
+/*
+ * Register a fully sparse file space, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE    (1U << 0)
+
 struct io_uring_rsrc_register {
        __u32 nr;
-       __u32 resv;
+       __u32 flags;
        __u64 resv2;
        __aligned_u64 data;
        __aligned_u64 tags;
@@ -404,6 +494,38 @@ struct io_uring_restriction {
        __u32 resv2[3];
 };
 
+struct io_uring_buf {
+       __u64   addr;
+       __u32   len;
+       __u16   bid;
+       __u16   resv;
+};
+
+struct io_uring_buf_ring {
+       union {
+               /*
+                * To avoid spilling into more pages than we need to, the
+                * ring tail is overlaid with the io_uring_buf->resv field.
+                */
+               struct {
+                       __u64   resv1;
+                       __u32   resv2;
+                       __u16   resv3;
+                       __u16   tail;
+               };
+               struct io_uring_buf     bufs[0];
+       };
+};
+
+/* argument for IORING_(UN)REGISTER_PBUF_RING */
+struct io_uring_buf_reg {
+       __u64   ring_addr;
+       __u32   ring_entries;
+       __u16   bgid;
+       __u16   pad;
+       __u64   resv[3];
+};
+
 /*
  * io_uring_restriction->opcode values
  */
index b3d952067f59c12af7bb3105ab00543ec2d23ec8..23df4e0e8ace78a6b58d9289fd30ee435fb7f2ff 100644 (file)
@@ -21,8 +21,14 @@ struct landlock_ruleset_attr {
        /**
         * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_)
         * that is handled by this ruleset and should then be forbidden if no
-        * rule explicitly allow them.  This is needed for backward
-        * compatibility reasons.
+        * rule explicitly allow them: it is a deny-by-default list that should
+        * contain as much Landlock access rights as possible. Indeed, all
+        * Landlock filesystem access rights that are not part of
+        * handled_access_fs are allowed.  This is needed for backward
+        * compatibility reasons.  One exception is the
+        * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly
+        * handled, but must still be explicitly handled to add new rules with
+        * this access right.
         */
        __u64 handled_access_fs;
 };
@@ -33,7 +39,9 @@ struct landlock_ruleset_attr {
  * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI
  *   version.
  */
+/* clang-format off */
 #define LANDLOCK_CREATE_RULESET_VERSION                        (1U << 0)
+/* clang-format on */
 
 /**
  * enum landlock_rule_type - Landlock rule type
@@ -60,8 +68,9 @@ struct landlock_path_beneath_attr {
         */
        __u64 allowed_access;
        /**
-        * @parent_fd: File descriptor, open with ``O_PATH``, which identifies
-        * the parent directory of a file hierarchy, or just a file.
+        * @parent_fd: File descriptor, preferably opened with ``O_PATH``,
+        * which identifies the parent directory of a file hierarchy, or just a
+        * file.
         */
        __s32 parent_fd;
        /*
@@ -109,6 +118,22 @@ struct landlock_path_beneath_attr {
  * - %LANDLOCK_ACCESS_FS_MAKE_FIFO: Create (or rename or link) a named pipe.
  * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device.
  * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link.
+ * - %LANDLOCK_ACCESS_FS_REFER: Link or rename a file from or to a different
+ *   directory (i.e. reparent a file hierarchy).  This access right is
+ *   available since the second version of the Landlock ABI.  This is also the
+ *   only access right which is always considered handled by any ruleset in
+ *   such a way that reparenting a file hierarchy is always denied by default.
+ *   To avoid privilege escalation, it is not enough to add a rule with this
+ *   access right.  When linking or renaming a file, the destination directory
+ *   hierarchy must also always have the same or a superset of restrictions of
+ *   the source hierarchy.  If it is not the case, or if the domain doesn't
+ *   handle this access right, such actions are denied by default with errno
+ *   set to EXDEV.  Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access
+ *   right on the destination directory, and renaming also requires a
+ *   LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or
+ *   directory) parent.  Otherwise, such actions are denied with errno set to
+ *   EACCES.  The EACCES errno prevails over EXDEV to let user space
+ *   efficiently deal with an unrecoverable error.
  *
  * .. warning::
  *
@@ -120,6 +145,7 @@ struct landlock_path_beneath_attr {
  *   :manpage:`access(2)`.
  *   Future Landlock evolutions will enable to restrict them.
  */
+/* clang-format off */
 #define LANDLOCK_ACCESS_FS_EXECUTE                     (1ULL << 0)
 #define LANDLOCK_ACCESS_FS_WRITE_FILE                  (1ULL << 1)
 #define LANDLOCK_ACCESS_FS_READ_FILE                   (1ULL << 2)
@@ -133,5 +159,7 @@ struct landlock_path_beneath_attr {
 #define LANDLOCK_ACCESS_FS_MAKE_FIFO                   (1ULL << 10)
 #define LANDLOCK_ACCESS_FS_MAKE_BLOCK                  (1ULL << 11)
 #define LANDLOCK_ACCESS_FS_MAKE_SYM                    (1ULL << 12)
+#define LANDLOCK_ACCESS_FS_REFER                       (1ULL << 13)
+/* clang-format on */
 
 #endif /* _UAPI_LINUX_LANDLOCK_H */
index 98e60801195e29a5e38d7f2b2b3fb40d39d34364..6f63527dd2ed6ad964ee54c4d72e6f5a78e245ed 100644 (file)
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
 /*
- * include/linux/loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
+ * Copyright 1993 by Theodore Ts'o.
  */
 #ifndef _UAPI_LINUX_LOOP_H
 #define _UAPI_LINUX_LOOP_H
index b2e43185e3b5562cc17015d5d5274aa4552af92b..2f76cba6716637baff53e167a6141b68420d75c3 100644 (file)
@@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 {
        __u64   result;
 };
 
+/* same as struct nvme_passthru_cmd64, minus the 8b result field */
+struct nvme_uring_cmd {
+       __u8    opcode;
+       __u8    flags;
+       __u16   rsvd1;
+       __u32   nsid;
+       __u32   cdw2;
+       __u32   cdw3;
+       __u64   metadata;
+       __u64   addr;
+       __u32   metadata_len;
+       __u32   data_len;
+       __u32   cdw10;
+       __u32   cdw11;
+       __u32   cdw12;
+       __u32   cdw13;
+       __u32   cdw14;
+       __u32   cdw15;
+       __u32   timeout_ms;
+       __u32   rsvd2;
+};
+
 #define nvme_admin_cmd nvme_passthru_cmd
 
 #define NVME_IOCTL_ID          _IO('N', 0x40)
@@ -83,4 +105,10 @@ struct nvme_passthru_cmd64 {
 #define NVME_IOCTL_IO64_CMD    _IOWR('N', 0x48, struct nvme_passthru_cmd64)
 #define NVME_IOCTL_IO64_CMD_VEC        _IOWR('N', 0x49, struct nvme_passthru_cmd64)
 
+/* io_uring async commands: */
+#define NVME_URING_CMD_IO      _IOWR('N', 0x80, struct nvme_uring_cmd)
+#define NVME_URING_CMD_IO_VEC  _IOWR('N', 0x81, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN   _IOWR('N', 0x82, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd)
+
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
index e998764f0262522903f04101f945a73a2f724a36..a5e06dcbba136d618c6dcf61f0cbb6e3bfe9ea2c 100644 (file)
@@ -272,6 +272,15 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SCOPE_THREAD_GROUP      1
 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP     2
 
+/* arm64 Scalable Matrix Extension controls */
+/* Flag values must be in sync with SVE versions */
+#define PR_SME_SET_VL                  63      /* set task vector length */
+# define PR_SME_SET_VL_ONEXEC          (1 << 18) /* defer effect until exec */
+#define PR_SME_GET_VL                  64      /* get task vector length */
+/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */
+# define PR_SME_VL_LEN_MASK            0xffff
+# define PR_SME_VL_INHERIT             (1 << 17) /* inherit across exec */
+
 #define PR_SET_VMA             0x53564d41
 # define PR_SET_VMA_ANON_NAME          0
 
index 283c5a7b3f2c813f72c81181010a67af551026cb..db6c8588c1d0c130360123ed156de2414a11dd92 100644 (file)
@@ -184,7 +184,7 @@ struct rfkill_event_ext {
 #define RFKILL_IOC_NOINPUT     1
 #define RFKILL_IOCTL_NOINPUT   _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
 #define RFKILL_IOC_MAX_SIZE    2
-#define RFKILL_IOCTL_MAX_SIZE  _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32)
+#define RFKILL_IOCTL_MAX_SIZE  _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32)
 
 /* and that's all userspace gets */
 
index 78074254ab98a11634e3a4c6d2491ebae0032e8f..0fdc6ef02b943980143a2787a376f1db459c2fdf 100644 (file)
@@ -23,6 +23,8 @@
 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW         (1UL << 2)
 #define SECCOMP_FILTER_FLAG_NEW_LISTENER       (1UL << 3)
 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH                (1UL << 4)
+/* Received notifications wait in killable state (only respond to fatal signals) */
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
 
 /*
  * All BPF programs must return a 32-bit value.
diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
new file mode 100644 (file)
index 0000000..256aaef
--- /dev/null
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Userspace interface for AMD SEV and SNP guest driver.
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * SEV API specification is available at: https://developer.amd.com/sev/
+ */
+
+#ifndef __UAPI_LINUX_SEV_GUEST_H_
+#define __UAPI_LINUX_SEV_GUEST_H_
+
+#include <linux/types.h>
+
+struct snp_report_req {
+       /* user data that should be included in the report */
+       __u8 user_data[64];
+
+       /* The vmpl level to be included in the report */
+       __u32 vmpl;
+
+       /* Must be zero filled */
+       __u8 rsvd[28];
+};
+
+struct snp_report_resp {
+       /* response data, see SEV-SNP spec for the format */
+       __u8 data[4000];
+};
+
+struct snp_derived_key_req {
+       __u32 root_key_select;
+       __u32 rsvd;
+       __u64 guest_field_select;
+       __u32 vmpl;
+       __u32 guest_svn;
+       __u64 tcb_version;
+};
+
+struct snp_derived_key_resp {
+       /* response data, see SEV-SNP spec for the format */
+       __u8 data[64];
+};
+
+struct snp_guest_request_ioctl {
+       /* message version number (must be non-zero) */
+       __u8 msg_version;
+
+       /* Request and response structure address */
+       __u64 req_data;
+       __u64 resp_data;
+
+       /* firmware error code on failure (see psp-sev.h) */
+       __u64 fw_err;
+};
+
+struct snp_ext_report_req {
+       struct snp_report_req data;
+
+       /* where to copy the certificate blob */
+       __u64 certs_address;
+
+       /* length of the certificate blob */
+       __u32 certs_len;
+};
+
+#define SNP_GUEST_REQ_IOC_TYPE 'S'
+
+/* Get SNP attestation report */
+#define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl)
+
+/* Get a derived key from the root */
+#define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl)
+
+/* Get SNP extended report as defined in the GHCB specification version 2. */
+#define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl)
+
+#endif /* __UAPI_LINUX_SEV_GUEST_H_ */
index 236a85f08ded63ddbff1535917beb9cf5deb8770..9d5f580597039e9cf798bb9e62ad18b248dffb61 100644 (file)
@@ -27,6 +27,7 @@
 #define        SPI_TX_OCTAL            _BITUL(13)      /* transmit with 8 wires */
 #define        SPI_RX_OCTAL            _BITUL(14)      /* receive with 8 wires */
 #define        SPI_3WIRE_HIZ           _BITUL(15)      /* high impedance turnaround */
+#define        SPI_RX_CPHA_FLIP        _BITUL(16)      /* flip CPHA on Rx only xfer */
 
 /*
  * All the bits defined above should be covered by SPI_MODE_USER_MASK.
@@ -36,6 +37,6 @@
  * These bits must not overlap. A static assert check should make sure of that.
  * If adding extra bits, make sure to increase the bit index below as well.
  */
-#define SPI_MODE_USER_MASK     (_BITUL(16) - 1)
+#define SPI_MODE_USER_MASK     (_BITUL(17) - 1)
 
 #endif /* _UAPI_SPI_H */
index 80d76b75bccd9e2f96b20db96189319f9546acd5..7aa2eb76620508fdc915533f74973d76308d3ef5 100644 (file)
  * Virtio Transitional IDs
  */
 
-#define VIRTIO_TRANS_ID_NET            1000 /* transitional virtio net */
-#define VIRTIO_TRANS_ID_BLOCK          1001 /* transitional virtio block */
-#define VIRTIO_TRANS_ID_BALLOON                1002 /* transitional virtio balloon */
-#define VIRTIO_TRANS_ID_CONSOLE                1003 /* transitional virtio console */
-#define VIRTIO_TRANS_ID_SCSI           1004 /* transitional virtio SCSI */
-#define VIRTIO_TRANS_ID_RNG            1005 /* transitional virtio rng */
-#define VIRTIO_TRANS_ID_9P             1009 /* transitional virtio 9p console */
+#define VIRTIO_TRANS_ID_NET            0x1000 /* transitional virtio net */
+#define VIRTIO_TRANS_ID_BLOCK          0x1001 /* transitional virtio block */
+#define VIRTIO_TRANS_ID_BALLOON                0x1002 /* transitional virtio balloon */
+#define VIRTIO_TRANS_ID_CONSOLE                0x1003 /* transitional virtio console */
+#define VIRTIO_TRANS_ID_SCSI           0x1004 /* transitional virtio SCSI */
+#define VIRTIO_TRANS_ID_RNG            0x1005 /* transitional virtio rng */
+#define VIRTIO_TRANS_ID_9P             0x1009 /* transitional virtio 9p console */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index dfd5bf31cfb923fcb8a36d11082fcd478142ae91..7d0f2f0037b8cf0cf3483eb2cb76ad69ff22ef99 100644 (file)
@@ -57,8 +57,6 @@
 #define INVALID_GRANT_REF          ((grant_ref_t)-1)
 #define INVALID_GRANT_HANDLE       ((grant_handle_t)-1)
 
-#define GNTTAB_RESERVED_XENSTORE 1
-
 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
 #define NR_GRANT_FRAMES 4
 
index 3eeabbc7ee097d98d16f4b364ea07052bb328daf..cebbd99f1f843b4000ea5ef345b79cbb0408bbf1 100644 (file)
@@ -19,7 +19,8 @@
 
 /* Some rough guidelines on accessing and updating grant-table entries
  * in a concurrency-safe manner. For more information, Linux contains a
- * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
+ * reference implementation for guest OSes (drivers/xen/grant_table.c, see
+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD
  *
  * NB. WMB is a no-op on current-generation x86 processors. However, a
  *     compiler barrier will still be required.
@@ -80,8 +81,9 @@ typedef uint32_t grant_ref_t;
  */
 
 /*
- * Version 1 of the grant table entry structure is maintained purely
- * for backwards compatibility.  New guests should use version 2.
+ * Version 1 of the grant table entry structure is maintained largely for
+ * backwards compatibility.  New guests are recommended to support using
+ * version 2 to overcome version 1 limitations, but to default to version 1.
  */
 struct grant_entry_v1 {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
@@ -89,12 +91,21 @@ struct grant_entry_v1 {
     /* The domain being granted foreign privileges. [GST] */
     domid_t  domid;
     /*
-     * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
-     * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
+     * GTF_permit_access: GFN that @domid is allowed to map and access. [GST]
+     * GTF_accept_transfer: GFN that @domid is allowed to transfer into. [GST]
+     * GTF_transfer_completed: MFN whose ownership transferred by @domid
+     *                         (non-translated guests only). [XEN]
      */
     uint32_t frame;
 };
 
+/* The first few grant table entries will be preserved across grant table
+ * version changes and may be pre-populated at domain creation by tools.
+ */
+#define GNTTAB_NR_RESERVED_ENTRIES     8
+#define GNTTAB_RESERVED_CONSOLE        0
+#define GNTTAB_RESERVED_XENSTORE       1
+
 /*
  * Type of grant entry.
  *  GTF_invalid: This grant entry grants no privileges.
@@ -111,10 +122,13 @@ struct grant_entry_v1 {
 #define GTF_type_mask       (3U<<0)
 
 /*
- * Subflags for GTF_permit_access.
+ * Subflags for GTF_permit_access and GTF_transitive.
  *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
  *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
  *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * Further subflags for GTF_permit_access only.
+ *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags to be used for
+ *                             mappings of the grant [GST]
  *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
  *                will only be allowed to copy from the grant, and not
  *                map it. [GST]
@@ -125,6 +139,12 @@ struct grant_entry_v1 {
 #define GTF_reading         (1U<<_GTF_reading)
 #define _GTF_writing        (4)
 #define GTF_writing         (1U<<_GTF_writing)
+#define _GTF_PWT            (5)
+#define GTF_PWT             (1U<<_GTF_PWT)
+#define _GTF_PCD            (6)
+#define GTF_PCD             (1U<<_GTF_PCD)
+#define _GTF_PAT            (7)
+#define GTF_PAT             (1U<<_GTF_PAT)
 #define _GTF_sub_page       (8)
 #define GTF_sub_page        (1U<<_GTF_sub_page)
 
@@ -164,8 +184,7 @@ struct grant_entry_header {
 };
 
 /*
- * Version 2 of the grant entry structure, here is a union because three
- * different types are suppotted: full_page, sub_page and transitive.
+ * Version 2 of the grant entry structure.
  */
 union grant_entry_v2 {
     struct grant_entry_header hdr;
@@ -180,9 +199,9 @@ union grant_entry_v2 {
      * field of the same name in the V1 entry structure.
      */
     struct {
-       struct grant_entry_header hdr;
-       uint32_t pad0;
-       uint64_t frame;
+        struct grant_entry_header hdr;
+        uint32_t pad0;
+        uint64_t frame;
     } full_page;
 
     /*
@@ -191,10 +210,10 @@ union grant_entry_v2 {
      * in frame @frame.
      */
     struct {
-       struct grant_entry_header hdr;
-       uint16_t page_off;
-       uint16_t length;
-       uint64_t frame;
+        struct grant_entry_header hdr;
+        uint16_t page_off;
+        uint16_t length;
+        uint64_t frame;
     } sub_page;
 
     /*
@@ -202,12 +221,15 @@ union grant_entry_v2 {
      * grant @gref in domain @trans_domid, as if it was the local
      * domain.  Obviously, the transitive access must be compatible
      * with the original grant.
+     *
+     * The current version of Xen does not allow transitive grants
+     * to be mapped.
      */
     struct {
-       struct grant_entry_header hdr;
-       domid_t trans_domid;
-       uint16_t pad0;
-       grant_ref_t gref;
+        struct grant_entry_header hdr;
+        domid_t trans_domid;
+        uint16_t pad0;
+        grant_ref_t gref;
     } transitive;
 
     uint32_t __spacer[4]; /* Pad to a power of two */
@@ -219,6 +241,21 @@ typedef uint16_t grant_status_t;
  * GRANT TABLE QUERIES AND USES
  */
 
+#define GNTTABOP_map_grant_ref        0
+#define GNTTABOP_unmap_grant_ref      1
+#define GNTTABOP_setup_table          2
+#define GNTTABOP_dump_table           3
+#define GNTTABOP_transfer             4
+#define GNTTABOP_copy                 5
+#define GNTTABOP_query_size           6
+#define GNTTABOP_unmap_and_replace    7
+#define GNTTABOP_set_version          8
+#define GNTTABOP_get_status_frames    9
+#define GNTTABOP_get_version          10
+#define GNTTABOP_swap_grant_ref              11
+#define GNTTABOP_cache_flush         12
+/* ` } */
+
 /*
  * Handle to track a mapping created via a grant reference.
  */
@@ -227,7 +264,7 @@ typedef uint32_t grant_handle_t;
 /*
  * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
  * by devices and/or host CPUs. If successful, <handle> is a tracking number
- * that must be presented later to destroy the mapping(s). On error, <handle>
+ * that must be presented later to destroy the mapping(s). On error, <status>
  * is a negative status code.
  * NOTES:
  *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
@@ -241,7 +278,6 @@ typedef uint32_t grant_handle_t;
  *     host mapping is destroyed by other means then it is *NOT* guaranteed
  *     to be accounted to the correct grant reference!
  */
-#define GNTTABOP_map_grant_ref        0
 struct gnttab_map_grant_ref {
     /* IN parameters. */
     uint64_t host_addr;
@@ -266,7 +302,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
  *  3. After executing a batch of unmaps, it is guaranteed that no stale
  *     mappings will remain in the device or host TLBs.
  */
-#define GNTTABOP_unmap_grant_ref      1
 struct gnttab_unmap_grant_ref {
     /* IN parameters. */
     uint64_t host_addr;
@@ -286,7 +321,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
  *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
  *  3. Xen may not support more than a single grant-table page per domain.
  */
-#define GNTTABOP_setup_table          2
 struct gnttab_setup_table {
     /* IN parameters. */
     domid_t  dom;
@@ -301,7 +335,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
  * GNTTABOP_dump_table: Dump the contents of the grant table to the
  * xen console. Debugging use only.
  */
-#define GNTTABOP_dump_table           3
 struct gnttab_dump_table {
     /* IN parameters. */
     domid_t dom;
@@ -311,17 +344,17 @@ struct gnttab_dump_table {
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
 
 /*
- * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
- * foreign domain has previously registered its interest in the transfer via
- * <domid, ref>.
+ * GNTTABOP_transfer: Transfer <frame> to a foreign domain. The foreign domain
+ * has previously registered its interest in the transfer via <domid, ref>.
  *
  * Note that, even if the transfer fails, the specified page no longer belongs
  * to the calling domain *unless* the error is GNTST_bad_page.
+ *
+ * Note further that only PV guests can use this operation.
  */
-#define GNTTABOP_transfer                4
 struct gnttab_transfer {
     /* IN parameters. */
-    xen_pfn_t mfn;
+    xen_pfn_t     mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
@@ -352,21 +385,20 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
 #define _GNTCOPY_dest_gref        (1)
 #define GNTCOPY_dest_gref         (1<<_GNTCOPY_dest_gref)
 
-#define GNTTABOP_copy                 5
 struct gnttab_copy {
-       /* IN parameters. */
-       struct {
-               union {
-                       grant_ref_t ref;
-                       xen_pfn_t   gmfn;
-               } u;
-               domid_t  domid;
-               uint16_t offset;
-       } source, dest;
-       uint16_t      len;
-       uint16_t      flags;          /* GNTCOPY_* */
-       /* OUT parameters. */
-       int16_t       status;
+    /* IN parameters. */
+    struct gnttab_copy_ptr {
+        union {
+            grant_ref_t ref;
+            xen_pfn_t   gmfn;
+        } u;
+        domid_t  domid;
+        uint16_t offset;
+    } source, dest;
+    uint16_t      len;
+    uint16_t      flags;          /* GNTCOPY_* */
+    /* OUT parameters. */
+    int16_t       status;
 };
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
 
@@ -377,7 +409,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
  *  1. <dom> may be specified as DOMID_SELF.
  *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
  */
-#define GNTTABOP_query_size           6
 struct gnttab_query_size {
     /* IN parameters. */
     domid_t  dom;
@@ -399,7 +430,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
  *  2. After executing a batch of unmaps, it is guaranteed that no stale
  *     mappings will remain in the device or host TLBs.
  */
-#define GNTTABOP_unmap_and_replace    7
 struct gnttab_unmap_and_replace {
     /* IN parameters. */
     uint64_t host_addr;
@@ -412,14 +442,12 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
 
 /*
  * GNTTABOP_set_version: Request a particular version of the grant
- * table shared table structure.  This operation can only be performed
- * once in any given domain.  It must be performed before any grants
- * are activated; otherwise, the domain will be stuck with version 1.
- * The only defined versions are 1 and 2.
+ * table shared table structure.  This operation may be used to toggle
+ * between different versions, but must be performed while no grants
+ * are active.  The only defined versions are 1 and 2.
  */
-#define GNTTABOP_set_version          8
 struct gnttab_set_version {
-    /* IN parameters */
+    /* IN/OUT parameters */
     uint32_t version;
 };
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
@@ -436,7 +464,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
  *  1. <dom> may be specified as DOMID_SELF.
  *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
  */
-#define GNTTABOP_get_status_frames     9
 struct gnttab_get_status_frames {
     /* IN parameters. */
     uint32_t nr_frames;
@@ -451,7 +478,6 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
  * GNTTABOP_get_version: Get the grant table version which is in
  * effect for domain <dom>.
  */
-#define GNTTABOP_get_version          10
 struct gnttab_get_version {
     /* IN parameters */
     domid_t dom;
@@ -461,27 +487,38 @@ struct gnttab_get_version {
 };
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
 
+/*
+ * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries.
+ */
+struct gnttab_swap_grant_ref {
+    /* IN parameters */
+    grant_ref_t ref_a;
+    grant_ref_t ref_b;
+    /* OUT parameters */
+    int16_t status;             /* GNTST_* */
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_swap_grant_ref);
+
 /*
  * Issue one or more cache maintenance operations on a portion of a
  * page granted to the calling domain by a foreign domain.
  */
-#define GNTTABOP_cache_flush          12
 struct gnttab_cache_flush {
     union {
         uint64_t dev_bus_addr;
         grant_ref_t ref;
     } a;
-    uint16_t offset;   /* offset from start of grant */
-    uint16_t length;   /* size within the grant */
-#define GNTTAB_CACHE_CLEAN          (1<<0)
-#define GNTTAB_CACHE_INVAL          (1<<1)
-#define GNTTAB_CACHE_SOURCE_GREF    (1<<31)
+    uint16_t offset; /* offset from start of grant */
+    uint16_t length; /* size within the grant */
+#define GNTTAB_CACHE_CLEAN          (1u<<0)
+#define GNTTAB_CACHE_INVAL          (1u<<1)
+#define GNTTAB_CACHE_SOURCE_GREF    (1u<<31)
     uint32_t op;
 };
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
 
 /*
- * Bitfield values for update_pin_status.flags.
+ * Bitfield values for gnttab_map_grant_ref.flags.
  */
  /* Map the grant entry for access by I/O devices. */
 #define _GNTMAP_device_map      (0)
@@ -531,6 +568,7 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
 #define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
 #define GNTST_address_too_big (-11) /* transfer page address too large.      */
 #define GNTST_eagain          (-12) /* Operation not done; try again.        */
+#define GNTST_no_space        (-13) /* Out of space (handles etc).           */
 
 #define GNTTABOP_error_msgs {                   \
     "okay",                                     \
@@ -545,7 +583,8 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
     "bad page",                                 \
     "copy arguments cross page boundary",       \
     "page address size too large",              \
-    "operation not done; try again"             \
+    "operation not done; try again",            \
+    "out of space",                             \
 }
 
 #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
index 2470ec45ebb20282f10d0d77728ef454166d81a7..ba4c4274b7146ed496629318a100152a1bd938c1 100644 (file)
@@ -72,9 +72,8 @@ typedef unsigned int RING_IDX;
  * of the shared memory area (PAGE_SIZE, for instance). To initialise
  * the front half:
  *
- *     mytag_front_ring_t front_ring;
- *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
- *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *     mytag_front_ring_t ring;
+ *     XEN_FRONT_RING_INIT(&ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
  *
  * Initializing the back follows similarly (note that only the front
  * initializes the shared ring):
@@ -146,6 +145,11 @@ struct __name##_back_ring {                                             \
 
 #define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size)
 
+#define XEN_FRONT_RING_INIT(r, s, size) do {                            \
+    SHARED_RING_INIT(s);                                                \
+    FRONT_RING_INIT(r, s, size);                                        \
+} while (0)
+
 #define BACK_RING_ATTACH(_r, _s, _i, __size) do {                       \
     (_r)->rsp_prod_pvt = (_i);                                          \
     (_r)->req_cons = (_i);                                              \
@@ -170,16 +174,21 @@ struct __name##_back_ring {                                             \
     (RING_FREE_REQUESTS(_r) == 0)
 
 /* Test if there are outstanding messages to be processed on a ring. */
-#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
+#define XEN_RING_NR_UNCONSUMED_RESPONSES(_r)                            \
     ((_r)->sring->rsp_prod - (_r)->rsp_cons)
 
-#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
+#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r) ({                          \
     unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
     unsigned int rsp = RING_SIZE(_r) -                                  \
         ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
     req < rsp ? req : rsp;                                              \
 })
 
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+    (!!XEN_RING_NR_UNCONSUMED_RESPONSES(_r))
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)  \
+    (!!XEN_RING_NR_UNCONSUMED_REQUESTS(_r))
+
 /* Direct access to individual ring elements, by index. */
 #define RING_GET_REQUEST(_r, _idx)                                      \
     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
index 1f6047d3de444800d5cb6a8cb21ba0dd8d666ac7..7ea4dc9611c407fe833683ae6b729c2ff24a3d32 100644 (file)
@@ -43,7 +43,7 @@
  *
  *      A string specifying the backend device: either a 4-tuple "h:c:t:l"
  *      (host, controller, target, lun, all integers), or a WWN (e.g.
- *      "naa.60014054ac780582").
+ *      "naa.60014054ac780582:0").
  *
  * v-dev
  *      Values:         string
  *      response structures.
  */
 
+/*
+ * Xenstore format in practice
+ * ===========================
+ *
+ * The backend driver uses a single_host:many_devices notation to manage domU
+ * devices. Everything is stored in /local/domain/<backend_domid>/backend/vscsi/.
+ * The xenstore layout looks like this (dom0 is assumed to be the backend_domid):
+ *
+ *     <domid>/<vhost>/feature-host = "0"
+ *     <domid>/<vhost>/frontend = "/local/domain/<domid>/device/vscsi/0"
+ *     <domid>/<vhost>/frontend-id = "<domid>"
+ *     <domid>/<vhost>/online = "1"
+ *     <domid>/<vhost>/state = "4"
+ *     <domid>/<vhost>/vscsi-devs/dev-0/p-dev = "8:0:2:1" or "naa.wwn:lun"
+ *     <domid>/<vhost>/vscsi-devs/dev-0/state = "4"
+ *     <domid>/<vhost>/vscsi-devs/dev-0/v-dev = "0:0:0:0"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/p-dev = "8:0:2:2"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/state = "4"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/v-dev = "0:0:1:0"
+ *
+ * The frontend driver maintains its state in
+ * /local/domain/<domid>/device/vscsi/.
+ *
+ *     <vhost>/backend = "/local/domain/0/backend/vscsi/<domid>/<vhost>"
+ *     <vhost>/backend-id = "0"
+ *     <vhost>/event-channel = "20"
+ *     <vhost>/ring-ref = "43"
+ *     <vhost>/state = "4"
+ *     <vhost>/vscsi-devs/dev-0/state = "4"
+ *     <vhost>/vscsi-devs/dev-1/state = "4"
+ *
+ * In addition to the entries for backend and frontend these flags are stored
+ * for the toolstack:
+ *
+ *     <domid>/<vhost>/vscsi-devs/dev-1/p-devname = "/dev/$device"
+ *     <domid>/<vhost>/libxl_ctrl_index = "0"
+ *
+ *
+ * Backend/frontend protocol
+ * =========================
+ *
+ * To create a vhost along with a device:
+ *     <domid>/<vhost>/feature-host = "0"
+ *     <domid>/<vhost>/frontend = "/local/domain/<domid>/device/vscsi/0"
+ *     <domid>/<vhost>/frontend-id = "<domid>"
+ *     <domid>/<vhost>/online = "1"
+ *     <domid>/<vhost>/state = "1"
+ *     <domid>/<vhost>/vscsi-devs/dev-0/p-dev = "8:0:2:1"
+ *     <domid>/<vhost>/vscsi-devs/dev-0/state = "1"
+ *     <domid>/<vhost>/vscsi-devs/dev-0/v-dev = "0:0:0:0"
+ * Wait for <domid>/<vhost>/state + <domid>/<vhost>/vscsi-devs/dev-0/state become 4
+ *
+ * To add another device to a vhost:
+ *     <domid>/<vhost>/state = "7"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/p-dev = "8:0:2:2"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/state = "1"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/v-dev = "0:0:1:0"
+ * Wait for <domid>/<vhost>/state + <domid>/<vhost>/vscsi-devs/dev-1/state become 4
+ *
+ * To remove a device from a vhost:
+ *     <domid>/<vhost>/state = "7"
+ *     <domid>/<vhost>/vscsi-devs/dev-1/state = "5"
+ * Wait for <domid>/<vhost>/state to become 4
+ * Wait for <domid>/<vhost>/vscsi-devs/dev-1/state become 6
+ * Remove <domid>/<vhost>/vscsi-devs/dev-1/{state,p-dev,v-dev,p-devname}
+ * Remove <domid>/<vhost>/vscsi-devs/dev-1/
+ *
+ */
+
 /* Requests from the frontend to the backend */
 
 /*
  * (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment
  * elements referencing the target data buffers is calculated from the lengths
  * of the seg[] elements (the sum of all valid seg[].length divided by the
- * size of one scsiif_request_segment structure).
+ * size of one scsiif_request_segment structure). The frontend may use a mix of
+ * direct and indirect requests.
  */
 #define VSCSIIF_ACT_SCSI_CDB           1
 
 
 /*
  * based on Linux kernel 2.6.18, still valid
+ *
  * Changing these values requires support of multiple protocols via the rings
  * as "old clients" will blindly use these values and the resulting structure
  * sizes.
  */
 #define VSCSIIF_MAX_COMMAND_SIZE       16
 #define VSCSIIF_SENSE_BUFFERSIZE       96
+#define VSCSIIF_PAGE_SIZE              4096
 
 struct scsiif_request_segment {
        grant_ref_t gref;
@@ -167,7 +239,8 @@ struct scsiif_request_segment {
        uint16_t length;
 };
 
-#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment))
+#define VSCSIIF_SG_PER_PAGE    (VSCSIIF_PAGE_SIZE / \
+                                sizeof(struct scsiif_request_segment))
 
 /* Size of one request is 252 bytes */
 struct vscsiif_request {
@@ -207,6 +280,58 @@ struct vscsiif_response {
        uint32_t reserved[36];
 };
 
+/* SCSI I/O status from vscsiif_response->rslt */
+#define XEN_VSCSIIF_RSLT_STATUS(x)  ((x) & 0x00ff)
+
+/* Host I/O status from vscsiif_response->rslt */
+#define XEN_VSCSIIF_RSLT_HOST(x)    (((x) & 0x00ff0000) >> 16)
+#define XEN_VSCSIIF_RSLT_HOST_OK                   0
+/* Couldn't connect before timeout */
+#define XEN_VSCSIIF_RSLT_HOST_NO_CONNECT           1
+/* Bus busy through timeout */
+#define XEN_VSCSIIF_RSLT_HOST_BUS_BUSY             2
+/* Timed out for other reason */
+#define XEN_VSCSIIF_RSLT_HOST_TIME_OUT             3
+/* Bad target */
+#define XEN_VSCSIIF_RSLT_HOST_BAD_TARGET           4
+/* Abort for some other reason */
+#define XEN_VSCSIIF_RSLT_HOST_ABORT                5
+/* Parity error */
+#define XEN_VSCSIIF_RSLT_HOST_PARITY               6
+/* Internal error */
+#define XEN_VSCSIIF_RSLT_HOST_ERROR                7
+/* Reset by somebody */
+#define XEN_VSCSIIF_RSLT_HOST_RESET                8
+/* Unexpected interrupt */
+#define XEN_VSCSIIF_RSLT_HOST_BAD_INTR             9
+/* Force command past mid-layer */
+#define XEN_VSCSIIF_RSLT_HOST_PASSTHROUGH         10
+/* Retry requested */
+#define XEN_VSCSIIF_RSLT_HOST_SOFT_ERROR          11
+/* Hidden retry requested */
+#define XEN_VSCSIIF_RSLT_HOST_IMM_RETRY           12
+/* Requeue command requested */
+#define XEN_VSCSIIF_RSLT_HOST_REQUEUE             13
+/* Transport error disrupted I/O */
+#define XEN_VSCSIIF_RSLT_HOST_TRANSPORT_DISRUPTED 14
+/* Transport class fastfailed */
+#define XEN_VSCSIIF_RSLT_HOST_TRANSPORT_FAILFAST  15
+/* Permanent target failure */
+#define XEN_VSCSIIF_RSLT_HOST_TARGET_FAILURE      16
+/* Permanent nexus failure on path */
+#define XEN_VSCSIIF_RSLT_HOST_NEXUS_FAILURE       17
+/* Space allocation on device failed */
+#define XEN_VSCSIIF_RSLT_HOST_ALLOC_FAILURE       18
+/* Medium error */
+#define XEN_VSCSIIF_RSLT_HOST_MEDIUM_ERROR        19
+/* Transport marginal errors */
+#define XEN_VSCSIIF_RSLT_HOST_TRANSPORT_MARGINAL  20
+
+/* Result values of reset operations */
+#define XEN_VSCSIIF_RSLT_RESET_SUCCESS  0x2002
+#define XEN_VSCSIIF_RSLT_RESET_FAILED   0x2003
+
 DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
 
-#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
+
+#endif  /*__XEN__PUBLIC_IO_SCSI_H__*/
index d40a44f09b162f4955fb36d8a6810331669280e3..b62365478ac00d42c93ba44a710adeaf02d49be1 100644 (file)
@@ -10,7 +10,8 @@
 
 enum xsd_sockmsg_type
 {
-    XS_DEBUG,
+    XS_CONTROL,
+#define XS_DEBUG XS_CONTROL
     XS_DIRECTORY,
     XS_READ,
     XS_GET_PERMS,
@@ -30,8 +31,13 @@ enum xsd_sockmsg_type
     XS_IS_DOMAIN_INTRODUCED,
     XS_RESUME,
     XS_SET_TARGET,
-    XS_RESTRICT,
-    XS_RESET_WATCHES,
+    /* XS_RESTRICT has been removed */
+    XS_RESET_WATCHES = XS_SET_TARGET + 2,
+    XS_DIRECTORY_PART,
+
+    XS_TYPE_COUNT,      /* Number of valid types. */
+
+    XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
 };
 
 #define XS_WRITE_NONE "NONE"
@@ -59,7 +65,8 @@ static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
     XSD_ERROR(EROFS),
     XSD_ERROR(EBUSY),
     XSD_ERROR(EAGAIN),
-    XSD_ERROR(EISCONN)
+    XSD_ERROR(EISCONN),
+    XSD_ERROR(E2BIG)
 };
 
 struct xsd_sockmsg
@@ -87,9 +94,31 @@ struct xenstore_domain_interface {
     char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
     XENSTORE_RING_IDX req_cons, req_prod;
     XENSTORE_RING_IDX rsp_cons, rsp_prod;
+    uint32_t server_features; /* Bitmap of features supported by the server */
+    uint32_t connection;
+    uint32_t error;
 };
 
 /* Violating this is very bad.  See docs/misc/xenstore.txt. */
 #define XENSTORE_PAYLOAD_MAX 4096
 
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+/* The ability to reconnect a ring */
+#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
+/* The presence of the "error" field in the ring page */
+#define XENSTORE_SERVER_FEATURE_ERROR        2
+
+/* Valid values for the connection field */
+#define XENSTORE_CONNECTED 0 /* the steady-state */
+#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
+
+/* Valid values for the error field */
+#define XENSTORE_ERROR_NONE    0 /* No error */
+#define XENSTORE_ERROR_COMM    1 /* Communication problem */
+#define XENSTORE_ERROR_RINGIDX 2 /* Invalid ring index */
+#define XENSTORE_ERROR_PROTO   3 /* Protocol violation (payload too long) */
+
 #endif /* _XS_WIRE_H */
index b13eb86395e0597861b345417ddaa3427fb21946..eaa932b99d8ac2d32a0b0bb73a1a6ce9eff12b95 100644 (file)
@@ -224,8 +224,10 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);
 
 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
                      unsigned int nr_pages, grant_ref_t *grefs);
+void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages,
+                         grant_ref_t *grefs);
 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
                           unsigned int nr_grefs, void **vaddr);
 
index 98182c3c2c4b3eedd0eda488107b656ad0f030e3..f057c49f1d9d8dd1b7c338ade6a793bd700e7633 100644 (file)
@@ -1035,21 +1035,18 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
        softirq_init();
        timekeeping_init();
        kfence_init();
+       time_init();
 
        /*
         * For best initial stack canary entropy, prepare it after:
         * - setup_arch() for any UEFI RNG entropy and boot cmdline access
-        * - timekeeping_init() for ktime entropy used in rand_initialize()
-        * - rand_initialize() to get any arch-specific entropy like RDRAND
-        * - add_latent_entropy() to get any latent entropy
-        * - adding command line entropy
+        * - timekeeping_init() for ktime entropy used in random_init()
+        * - time_init() for making random_get_entropy() work on some platforms
+        * - random_init() to initialize the RNG from from early entropy sources
         */
-       rand_initialize();
-       add_latent_entropy();
-       add_device_randomness(command_line, strlen(command_line));
+       random_init(command_line);
        boot_init_stack_canary();
 
-       time_init();
        perf_event_init();
        profile_init();
        call_function_init();
index ea2ee1181921e13592f19aae0fc7a0d4f7857ac5..f3a2abd6d1a1922c8e19e44e3b26a49064e0f521 100644 (file)
@@ -1959,6 +1959,12 @@ void __audit_uring_exit(int success, long code)
 {
        struct audit_context *ctx = audit_context();
 
+       if (ctx->dummy) {
+               if (ctx->context != AUDIT_CTX_URING)
+                       return;
+               goto out;
+       }
+
        if (ctx->context == AUDIT_CTX_SYSCALL) {
                /*
                 * NOTE: See the note in __audit_uring_entry() about the case
index d56ee177d5f8fb4e5d397f65d14f5736a19d444d..2dfe1079f7727ad836feca67861c3c9e7356fddb 100644 (file)
@@ -27,6 +27,7 @@ config BPF_SYSCALL
        bool "Enable bpf() system call"
        select BPF
        select IRQ_WORK
+       select TASKS_RCU if PREEMPTION
        select TASKS_TRACE_RCU
        select BINARY_PRINTF
        select NET_SOCK_MSG if NET
index 9390bfd9f1cd382e6e08a9d12df051fd3722c636..71a418858a5e0d9861131c600bec5efd62b5ae4a 100644 (file)
@@ -3390,8 +3390,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = {
  */
 void __init cpuset_init_smp(void)
 {
-       cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
-       top_cpuset.mems_allowed = node_states[N_MEMORY];
+       /*
+        * cpus_allowd/mems_allowed set to v2 values in the initial
+        * cpuset_bind() call will be reset to v1 values in another
+        * cpuset_bind() call when v1 cpuset is mounted.
+        */
        top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
 
        cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
diff --git a/kernel/configs/x86_debug.config b/kernel/configs/x86_debug.config
new file mode 100644 (file)
index 0000000..dcd86f3
--- /dev/null
@@ -0,0 +1,18 @@
+CONFIG_X86_DEBUG_FPU=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_KMEMCHECK=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_GCOV_KERNEL=y
+CONFIG_LOCKDEP=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_SCHEDSTATS=y
+CONFIG_VMLINUX_VALIDATION=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
index d0a9aa0b42e8d77c79ec9cbf5098736d1c95c3e6..bbad5e375d3ba1e6d841979cb82195ab6d427ede 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/cpuset.h>
 #include <linux/random.h>
+#include <linux/cc_platform.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -716,14 +717,6 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 /*
  * The cpu hotplug threads manage the bringup and teardown of the cpus
  */
-static void cpuhp_create(unsigned int cpu)
-{
-       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-
-       init_completion(&st->done_up);
-       init_completion(&st->done_down);
-}
-
 static int cpuhp_should_run(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
@@ -883,15 +876,27 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
 
 static struct smp_hotplug_thread cpuhp_threads = {
        .store                  = &cpuhp_state.thread,
-       .create                 = &cpuhp_create,
        .thread_should_run      = cpuhp_should_run,
        .thread_fn              = cpuhp_thread_fun,
        .thread_comm            = "cpuhp/%u",
        .selfparking            = true,
 };
 
+static __init void cpuhp_init_state(void)
+{
+       struct cpuhp_cpu_state *st;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               st = per_cpu_ptr(&cpuhp_state, cpu);
+               init_completion(&st->done_up);
+               init_completion(&st->done_down);
+       }
+}
+
 void __init cpuhp_threads_init(void)
 {
+       cpuhp_init_state();
        BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
@@ -1186,6 +1191,12 @@ out:
 
 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
 {
+       /*
+        * If the platform does not support hotplug, report it explicitly to
+        * differentiate it from a transient offlining failure.
+        */
+       if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
+               return -EOPNOTSUPP;
        if (cpu_hotplug_disabled)
                return -EBUSY;
        return _cpu_down(cpu, 0, target);
index 256cf6db573cd094ed505775f6405ecdbb03457b..4d57c03714f4e139e9eecb10eb3fb23f913b2092 100644 (file)
@@ -243,9 +243,8 @@ static int __init __parse_crashkernel(char *cmdline,
        *crash_base = 0;
 
        ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
-
        if (!ck_cmdline)
-               return -EINVAL;
+               return -ENOENT;
 
        ck_cmdline += strlen(name);
 
index da06a5553835b5e443555ee6b446e93abc2c3ffe..7beceb447211d12cc3f62bfb128619507b7ff55d 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/vmacache.h>
 #include <linux/rcupdate.h>
 #include <linux/irq.h>
+#include <linux/security.h>
 
 #include <asm/cacheflush.h>
 #include <asm/byteorder.h>
@@ -752,6 +753,29 @@ cpu_master_loop:
                                continue;
                        kgdb_connected = 0;
                } else {
+                       /*
+                        * This is a brutal way to interfere with the debugger
+                        * and prevent gdb being used to poke at kernel memory.
+                        * This could cause trouble if lockdown is applied when
+                        * there is already an active gdb session. For now the
+                        * answer is simply "don't do that". Typically lockdown
+                        * *will* be applied before the debug core gets started
+                        * so only developers using kgdb for fairly advanced
+                        * early kernel debug can be biten by this. Hopefully
+                        * they are sophisticated enough to take care of
+                        * themselves, especially with help from the lockdown
+                        * message printed on the console!
+                        */
+                       if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+                               if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+                                       /* Switch back to kdb if possible... */
+                                       dbg_kdb_mode = 1;
+                                       continue;
+                               } else {
+                                       /* ... otherwise just bail */
+                                       break;
+                               }
+                       }
                        error = gdb_serial_stub(ks);
                }
 
index 0852a537dad4c083d651da7b80d3bbf48f181f61..ead4da947127075e825c9085ff20d69a23046dd4 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/proc_fs.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 #include "kdb_private.h"
 
 #undef MODULE_PARAM_PREFIX
@@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu)
 }
 
 /*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+       const int write_flags = KDB_ENABLE_MEM_WRITE |
+                               KDB_ENABLE_REG_WRITE |
+                               KDB_ENABLE_FLOW_CTRL;
+       const int read_flags = KDB_ENABLE_MEM_READ |
+                              KDB_ENABLE_REG_READ;
+
+       bool need_to_lockdown_write = false;
+       bool need_to_lockdown_read = false;
+
+       if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+               need_to_lockdown_write =
+                       security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+       if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+               need_to_lockdown_read =
+                       security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+       /* De-compose KDB_ENABLE_ALL if required */
+       if (need_to_lockdown_write || need_to_lockdown_read)
+               if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+                       kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+       if (need_to_lockdown_write)
+               kdb_cmd_enabled &= ~write_flags;
+
+       if (need_to_lockdown_read)
+               kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
  */
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
                                   bool no_args)
 {
        /* permissions comes from userspace so needs massaging slightly */
@@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
                kdb_curr_task(raw_smp_processor_id());
 
        KDB_DEBUG_STATE("kdb_local 1", reason);
+
+       kdb_check_for_lockdown();
+
        kdb_go_count = 0;
        if (reason == KDB_REASON_DEBUG) {
                /* special case below */
index 93c3b86e781c143f73092f3e48d7ed073471ce82..032f164abe7cefb2011fa50f481bc6b661b88d7f 100644 (file)
@@ -17,7 +17,7 @@
 /* See comment for enter_from_user_mode() in entry-common.h */
 static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
 {
-       arch_check_user_regs(regs);
+       arch_enter_from_user_mode(regs);
        lockdep_hardirqs_off(CALLER_ADDR0);
 
        CT_WARN_ON(ct_state() != CONTEXT_USER);
@@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void)
 {
        instrumentation_begin();
        trace_hardirqs_on_prepare();
-       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       lockdep_hardirqs_on_prepare();
        instrumentation_end();
 
        user_enter_irqoff();
@@ -416,7 +416,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
                        instrumentation_begin();
                        /* Tell the tracer that IRET will enable interrupts */
                        trace_hardirqs_on_prepare();
-                       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+                       lockdep_hardirqs_on_prepare();
                        instrumentation_end();
                        rcu_irq_exit();
                        lockdep_hardirqs_on(CALLER_ADDR0);
@@ -465,7 +465,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
        ftrace_nmi_exit();
        if (irq_state.lockdep) {
                trace_hardirqs_on_prepare();
-               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+               lockdep_hardirqs_on_prepare();
        }
        instrumentation_end();
 
index 7858bafffa9d635d75326df9b46fad015005dae0..950b25c3f210350fae5674fc3863e858650bea58 100644 (file)
@@ -6428,8 +6428,8 @@ static void perf_sigtrap(struct perf_event *event)
        if (current->flags & PF_EXITING)
                return;
 
-       force_sig_perf((void __user *)event->pending_addr,
-                      event->attr.type, event->attr.sig_data);
+       send_sig_perf((void __user *)event->pending_addr,
+                     event->attr.type, event->attr.sig_data);
 }
 
 static void perf_pending_event_disable(struct perf_event *event)
@@ -12217,6 +12217,9 @@ SYSCALL_DEFINE5(perf_event_open,
                 * Do not allow to attach to a group in a different task
                 * or CPU context. If we're moving SW events, we'll fix
                 * this up later, so allow that.
+                *
+                * Racy, not holding group_leader->ctx->mutex, see comment with
+                * perf_event_ctx_lock().
                 */
                if (!move_group && group_leader->ctx != ctx)
                        goto err_context;
@@ -12282,6 +12285,7 @@ SYSCALL_DEFINE5(perf_event_open,
                        } else {
                                perf_event_ctx_unlock(group_leader, gctx);
                                move_group = 0;
+                               goto not_move_group;
                        }
                }
 
@@ -12298,7 +12302,17 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        } else {
                mutex_lock(&ctx->mutex);
+
+               /*
+                * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
+                * see the group_leader && !move_group test earlier.
+                */
+               if (group_leader && group_leader->ctx != ctx) {
+                       err = -EINVAL;
+                       goto err_locked;
+               }
        }
+not_move_group:
 
        if (ctx->task == TASK_TOMBSTONE) {
                err = -ESRCH;
index 35a3beff140b6dccb6b4f1e7971df9eb6aa85bf7..254ab63c110651126edf6c2aad149c1d47dc9a7e 100644 (file)
@@ -1046,6 +1046,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_MEMCG
        tsk->active_memcg = NULL;
 #endif
+
+#ifdef CONFIG_CPU_SUP_INTEL
+       tsk->reported_split_lock = 0;
+#endif
+
        return tsk;
 
 free_stack:
index 183b28c32c832e7169b6953d7c99ef844477b8b3..ce2889f123755f78d0d14880dbe90b1acc8410cb 100644 (file)
@@ -1005,7 +1005,7 @@ retry_private:
        rt_mutex_init_waiter(&rt_waiter);
 
        /*
-        * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
+        * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
         * hold it while doing rt_mutex_start_proxy(), because then it will
         * include hb->lock in the blocking chain, even through we'll not in
         * fact hold it while blocking. This will lead it to report -EDEADLK
index fdf170404650f7e96559cc2df2d783618e5f93bc..d9a5c1d65a79db075f002cbd82b346429c4fb26b 100644 (file)
@@ -258,7 +258,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
        nodemask_t nodemsk = NODE_MASK_NONE;
        struct node_vectors *node_vectors;
 
-       if (!cpumask_weight(cpu_mask))
+       if (cpumask_empty(cpu_mask))
                return 0;
 
        nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
index 54af0deb239b8751679505dfb6295d99dc060cb7..e6b8e564b37f049237a39b2d65c486d5b686fd3d 100644 (file)
@@ -1573,17 +1573,12 @@ static struct device *irq_get_parent_device(struct irq_data *data)
 int irq_chip_pm_get(struct irq_data *data)
 {
        struct device *dev = irq_get_parent_device(data);
-       int retval;
+       int retval = 0;
 
-       if (IS_ENABLED(CONFIG_PM) && dev) {
-               retval = pm_runtime_get_sync(dev);
-               if (retval < 0) {
-                       pm_runtime_put_noidle(dev);
-                       return retval;
-               }
-       }
+       if (IS_ENABLED(CONFIG_PM) && dev)
+               retval = pm_runtime_resume_and_get(dev);
 
-       return 0;
+       return retval;
 }
 
 /**
index 2b43f5f5033d13cca9af2bd4c70c96fe641757c7..bc8e40cf2b65adc4c8ac0e1a56909b4dc2f078dc 100644 (file)
@@ -58,6 +58,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
        BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
        BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI),
        BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND),
+       BIT_MASK_DESCR(IRQCHIP_IMMUTABLE),
 };
 
 static void
index 0cd02efa3a742ed77fbcafb49869bf45e82520fa..dd76323ea3fd7410d56dc648441595e3474e6426 100644 (file)
@@ -181,7 +181,7 @@ struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
                goto err_free_bitmap;
 
        work_ctx->irq_count = num_irqs;
-       init_irq_work(&work_ctx->work, irq_sim_handle_irq);
+       work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq);
 
        return work_ctx->domain;
 
index 0099b87dd8530c35e28a0318421e7dd17f73761b..d323b180b0f371b1176e1926e2d3130497e146b8 100644 (file)
@@ -701,7 +701,6 @@ EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
  */
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 {
-       WARN_ON_ONCE(!in_hardirq());
        return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
index e3e245a4fd70bfecf7e2c2221bafdf54a1fb8726..8c396319d5ac2f4d3619bbdde7fa98dc625e776d 100644 (file)
@@ -222,11 +222,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 {
        struct irq_desc *desc = irq_data_to_desc(data);
        struct irq_chip *chip = irq_data_get_irq_chip(data);
+       const struct cpumask  *prog_mask;
        int ret;
 
+       static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
+       static struct cpumask tmp_mask;
+
        if (!chip || !chip->irq_set_affinity)
                return -EINVAL;
 
+       raw_spin_lock(&tmp_mask_lock);
        /*
         * If this is a managed interrupt and housekeeping is enabled on
         * it check whether the requested affinity mask intersects with
@@ -248,24 +253,34 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
         */
        if (irqd_affinity_is_managed(data) &&
            housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) {
-               const struct cpumask *hk_mask, *prog_mask;
-
-               static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
-               static struct cpumask tmp_mask;
+               const struct cpumask *hk_mask;
 
                hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
 
-               raw_spin_lock(&tmp_mask_lock);
                cpumask_and(&tmp_mask, mask, hk_mask);
                if (!cpumask_intersects(&tmp_mask, cpu_online_mask))
                        prog_mask = mask;
                else
                        prog_mask = &tmp_mask;
-               ret = chip->irq_set_affinity(data, prog_mask, force);
-               raw_spin_unlock(&tmp_mask_lock);
        } else {
-               ret = chip->irq_set_affinity(data, mask, force);
+               prog_mask = mask;
        }
+
+       /*
+        * Make sure we only provide online CPUs to the irqchip,
+        * unless we are being asked to force the affinity (in which
+        * case we do as we are told).
+        */
+       cpumask_and(&tmp_mask, prog_mask, cpu_online_mask);
+       if (!force && !cpumask_empty(&tmp_mask))
+               ret = chip->irq_set_affinity(data, &tmp_mask, force);
+       else if (force)
+               ret = chip->irq_set_affinity(data, mask, force);
+       else
+               ret = -EINVAL;
+
+       raw_spin_unlock(&tmp_mask_lock);
+
        switch (ret) {
        case IRQ_SET_MASK_OK:
        case IRQ_SET_MASK_OK_DONE:
index bbfb26489aa1c4aceb73dcc99d810bfa3cd2e267..1698e77645acf78ec96107e849e880475dc5da52 100644 (file)
@@ -286,7 +286,7 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk)
 int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
                             unsigned int *mapped_cpu)
 {
-       unsigned int bit, cpu, end = m->alloc_end;
+       unsigned int bit, cpu, end;
        struct cpumap *cm;
 
        if (cpumask_empty(msk))
index 2bdfce5edafd025344c38c47c91e97be623fbac4..a9ee535293eb264d52b1b52d861c1a26cd3fd101 100644 (file)
@@ -818,6 +818,21 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag
                irqd_clr_can_reserve(irqd);
                if (vflags & VIRQ_NOMASK_QUIRK)
                        irqd_set_msi_nomask_quirk(irqd);
+
+               /*
+                * If the interrupt is managed but no CPU is available to
+                * service it, shut it down until better times. Note that
+                * we only do this on the !RESERVE path as x86 (the only
+                * architecture using this flag) deals with this in a
+                * different way by using a catch-all vector.
+                */
+               if ((vflags & VIRQ_ACTIVATE) &&
+                   irqd_affinity_is_managed(irqd) &&
+                   !cpumask_intersects(irq_data_get_affinity_mask(irqd),
+                                       cpu_online_mask)) {
+                           irqd_set_managed_shutdown(irqd);
+                           return 0;
+                   }
        }
 
        if (!(vflags & VIRQ_ACTIVATE))
index a36fca063a73aa3bf043917bbe502e3780d8b9a4..767dfacd6ed30fa10403d092daf4bff08cf23010 100644 (file)
@@ -1380,13 +1380,14 @@ static const void *nthreads_gen_params(const void *prev, char *desc)
        else
                nthreads *= 2;
 
-       if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
+       if (!preempt_model_preemptible() ||
+           !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
                /*
                 * Without any preemption, keep 2 CPUs free for other tasks, one
                 * of which is the main test case function checking for
                 * completion or failure.
                 */
-               const long min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0;
+               const long min_unused_cpus = preempt_model_none() ? 2 : 0;
                const long min_required_cpus = 2 + min_unused_cpus;
 
                if (num_online_cpus() < min_required_cpus) {
index 68480f731192ebe325f607953dbdcc70d2dfbe18..be4b54c2c615c696c6533fb388dc32e4473c5e7f 100644 (file)
@@ -1078,7 +1078,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
                return;
        memset(&prstatus, 0, sizeof(prstatus));
        prstatus.common.pr_pid = current->pid;
-       elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+       elf_core_copy_regs(&prstatus.pr_reg, regs);
        buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
                              &prstatus, sizeof(prstatus));
        final_note(buf);
index 50265f69a1354f174ca91d92dc9a440e3cb5a943..544fd4097406892d9280b9acb37303e231d0b141 100644 (file)
@@ -1522,5 +1522,4 @@ struct cgroup_subsys_state *kthread_blkcg(void)
        }
        return NULL;
 }
-EXPORT_SYMBOL(kthread_blkcg);
 #endif
index c06cab6546ed64275f868af46b0280747f4bfc3f..a6e671b8608dbd12acd1c82a5faca189b3771113 100644 (file)
@@ -60,7 +60,6 @@
 
 #include "lockdep_internals.h"
 
-#define CREATE_TRACE_POINTS
 #include <trace/events/lock.h>
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -1380,7 +1379,7 @@ static struct lock_list *alloc_list_entry(void)
  */
 static int add_lock_to_list(struct lock_class *this,
                            struct lock_class *links_to, struct list_head *head,
-                           unsigned long ip, u16 distance, u8 dep,
+                           u16 distance, u8 dep,
                            const struct lock_trace *trace)
 {
        struct lock_list *entry;
@@ -3133,19 +3132,15 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
         * to the previous lock's dependency list:
         */
        ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
-                              &hlock_class(prev)->locks_after,
-                              next->acquire_ip, distance,
-                              calc_dep(prev, next),
-                              *trace);
+                              &hlock_class(prev)->locks_after, distance,
+                              calc_dep(prev, next), *trace);
 
        if (!ret)
                return 0;
 
        ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
-                              &hlock_class(next)->locks_before,
-                              next->acquire_ip, distance,
-                              calc_depb(prev, next),
-                              *trace);
+                              &hlock_class(next)->locks_before, distance,
+                              calc_depb(prev, next), *trace);
        if (!ret)
                return 0;
 
@@ -4236,14 +4231,13 @@ static void __trace_hardirqs_on_caller(void)
 
 /**
  * lockdep_hardirqs_on_prepare - Prepare for enabling interrupts
- * @ip:                Caller address
  *
  * Invoked before a possible transition to RCU idle from exit to user or
  * guest mode. This ensures that all RCU operations are done before RCU
  * stops watching. After the RCU transition lockdep_hardirqs_on() has to be
  * invoked to set the final state.
  */
-void lockdep_hardirqs_on_prepare(unsigned long ip)
+void lockdep_hardirqs_on_prepare(void)
 {
        if (unlikely(!debug_locks))
                return;
@@ -4840,8 +4834,7 @@ EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
 static void
 print_lock_nested_lock_not_held(struct task_struct *curr,
-                               struct held_lock *hlock,
-                               unsigned long ip)
+                               struct held_lock *hlock)
 {
        if (!debug_locks_off())
                return;
@@ -5017,7 +5010,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        chain_key = iterate_chain_key(chain_key, hlock_id(hlock));
 
        if (nest_lock && !__lock_is_held(nest_lock, -1)) {
-               print_lock_nested_lock_not_held(curr, hlock, ip);
+               print_lock_nested_lock_not_held(curr, hlock);
                return 0;
        }
 
index 5e3585950ec8f7d353c7e8ff9b91d21464ee0b75..d973fe6041bf6791c1f8f10c607548060c0fa32a 100644 (file)
@@ -30,6 +30,9 @@
 #include <linux/debug_locks.h>
 #include <linux/osq_lock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/lock.h>
+
 #ifndef CONFIG_PREEMPT_RT
 #include "mutex.h"
 
@@ -599,12 +602,14 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
        preempt_disable();
        mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
+       trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
        if (__mutex_trylock(lock) ||
            mutex_optimistic_spin(lock, ww_ctx, NULL)) {
                /* got the lock, yay! */
                lock_acquired(&lock->dep_map, ip);
                if (ww_ctx)
                        ww_mutex_set_context_fastpath(ww, ww_ctx);
+               trace_contention_end(lock, 0);
                preempt_enable();
                return 0;
        }
@@ -641,6 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
        }
 
        set_current_state(state);
+       trace_contention_begin(lock, LCB_F_MUTEX);
        for (;;) {
                bool first;
 
@@ -680,10 +686,16 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
                 * state back to RUNNING and fall through the next schedule(),
                 * or we must see its unlock and acquire.
                 */
-               if (__mutex_trylock_or_handoff(lock, first) ||
-                   (first && mutex_optimistic_spin(lock, ww_ctx, &waiter)))
+               if (__mutex_trylock_or_handoff(lock, first))
                        break;
 
+               if (first) {
+                       trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
+                       if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
+                               break;
+                       trace_contention_begin(lock, LCB_F_MUTEX);
+               }
+
                raw_spin_lock(&lock->wait_lock);
        }
        raw_spin_lock(&lock->wait_lock);
@@ -707,6 +719,7 @@ acquired:
 skip_wait:
        /* got the lock - cleanup and rejoice! */
        lock_acquired(&lock->dep_map, ip);
+       trace_contention_end(lock, 0);
 
        if (ww_ctx)
                ww_mutex_lock_acquired(ww, ww_ctx);
@@ -719,6 +732,7 @@ err:
        __set_current_state(TASK_RUNNING);
        __mutex_remove_waiter(lock, &waiter);
 err_early_kill:
+       trace_contention_end(lock, ret);
        raw_spin_unlock(&lock->wait_lock);
        debug_mutex_free_waiter(&waiter);
        mutex_release(&lock->dep_map, ip);
index c9fdae94e098ebd92fa612d3ce2cae0bbaf40f34..5fe4c5495ba3c87c8505fe0dde278d0112579985 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/debug.h>
 #include <linux/errno.h>
+#include <trace/events/lock.h>
 
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
                        const char *name, struct lock_class_key *key)
@@ -171,9 +172,11 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
        if (try)
                return false;
 
+       trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
        preempt_enable();
        percpu_rwsem_wait(sem, /* .reader = */ true);
        preempt_disable();
+       trace_contention_end(sem, 0);
 
        return true;
 }
@@ -216,6 +219,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
 {
        might_sleep();
        rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+       trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
 
        /* Notify readers to take the slow path. */
        rcu_sync_enter(&sem->rss);
@@ -237,6 +241,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
 
        /* Wait for all active readers to complete. */
        rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
+       trace_contention_end(sem, 0);
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
index ec36b73f4733b1b065e63ba2440372bee4f4d799..2e1600906c9f5cd868415d20e2d7024c5b1e0531 100644 (file)
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/spinlock.h>
+#include <trace/events/lock.h>
 
 /**
- * queued_read_lock_slowpath - acquire read lock of a queue rwlock
- * @lock: Pointer to queue rwlock structure
+ * queued_read_lock_slowpath - acquire read lock of a queued rwlock
+ * @lock: Pointer to queued rwlock structure
  */
 void queued_read_lock_slowpath(struct qrwlock *lock)
 {
@@ -34,6 +35,8 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
        }
        atomic_sub(_QR_BIAS, &lock->cnts);
 
+       trace_contention_begin(lock, LCB_F_SPIN | LCB_F_READ);
+
        /*
         * Put the reader into the wait queue
         */
@@ -51,17 +54,21 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
         * Signal the next one in queue to become queue head
         */
        arch_spin_unlock(&lock->wait_lock);
+
+       trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_read_lock_slowpath);
 
 /**
- * queued_write_lock_slowpath - acquire write lock of a queue rwlock
- * @lock : Pointer to queue rwlock structure
+ * queued_write_lock_slowpath - acquire write lock of a queued rwlock
+ * @lock : Pointer to queued rwlock structure
  */
 void queued_write_lock_slowpath(struct qrwlock *lock)
 {
        int cnts;
 
+       trace_contention_begin(lock, LCB_F_SPIN | LCB_F_WRITE);
+
        /* Put the writer into the wait queue */
        arch_spin_lock(&lock->wait_lock);
 
@@ -79,5 +86,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
        } while (!atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED));
 unlock:
        arch_spin_unlock(&lock->wait_lock);
+
+       trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_write_lock_slowpath);
index cbff6ba53d563634791e27ad8d11e7a683065679..65a9a10caa6f50de9a87d28174c6ce707035b3f2 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/prefetch.h>
 #include <asm/byteorder.h>
 #include <asm/qspinlock.h>
+#include <trace/events/lock.h>
 
 /*
  * Include queued spinlock statistics code
@@ -401,6 +402,8 @@ pv_queue:
        idx = node->count++;
        tail = encode_tail(smp_processor_id(), idx);
 
+       trace_contention_begin(lock, LCB_F_SPIN);
+
        /*
         * 4 nodes are allocated based on the assumption that there will
         * not be nested NMIs taking spinlocks. That may not be true in
@@ -554,6 +557,8 @@ locked:
        pv_kick_node(lock, next);
 
 release:
+       trace_contention_end(lock, 0);
+
        /*
         * release the node
         */
index 8555c4efe97c47ad52f3fc595463da05264d5388..7779ee8abc2a08b4a9830b375ea65fa2a3ca013f 100644 (file)
@@ -24,6 +24,8 @@
 #include <linux/sched/wake_q.h>
 #include <linux/ww_mutex.h>
 
+#include <trace/events/lock.h>
+
 #include "rtmutex_common.h"
 
 #ifndef WW_RT
@@ -1579,6 +1581,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
 
        set_current_state(state);
 
+       trace_contention_begin(lock, LCB_F_RT);
+
        ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
        if (likely(!ret))
                ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
@@ -1601,6 +1605,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
         * unconditionally. We might have to fix that up.
         */
        fixup_rt_mutex_waiters(lock);
+
+       trace_contention_end(lock, ret);
+
        return ret;
 }
 
@@ -1683,6 +1690,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
        /* Save current state and set state to TASK_RTLOCK_WAIT */
        current_save_and_set_rtlock_wait_state();
 
+       trace_contention_begin(lock, LCB_F_RT);
+
        task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
 
        for (;;) {
@@ -1712,6 +1721,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
         */
        fixup_rt_mutex_waiters(lock);
        debug_rt_mutex_free_waiter(&waiter);
+
+       trace_contention_end(lock, 0);
 }
 
 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
index 6fd3162e4098ffa60795c38d9394c0459512708e..c201aadb9301722357745d632c49ac652e0179d6 100644 (file)
@@ -112,6 +112,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
         * Reader2 to call up_read(), which might be unbound.
         */
 
+       trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ);
+
        /*
         * For rwlocks this returns 0 unconditionally, so the below
         * !ret conditionals are optimized out.
@@ -130,6 +132,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
        raw_spin_unlock_irq(&rtm->wait_lock);
        if (!ret)
                rwbase_rtmutex_unlock(rtm);
+
+       trace_contention_end(rwb, ret);
        return ret;
 }
 
@@ -247,11 +251,13 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
                goto out_unlock;
 
        rwbase_set_and_save_current_state(state);
+       trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE);
        for (;;) {
                /* Optimized out for rwlocks */
                if (rwbase_signal_pending_state(state, current)) {
                        rwbase_restore_current_state();
                        __rwbase_write_unlock(rwb, 0, flags);
+                       trace_contention_end(rwb, -EINTR);
                        return -EINTR;
                }
 
@@ -265,6 +271,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
                set_current_state(state);
        }
        rwbase_restore_current_state();
+       trace_contention_end(rwb, 0);
 
 out_unlock:
        raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
index acde5d6f125463025ab8680da5fa98bffd612cfa..9d1db4a54d34e9db4ba2c8b1a4e3d231b52f1b37 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/export.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
+#include <trace/events/lock.h>
 
 #ifndef CONFIG_PREEMPT_RT
 #include "lock_events.h"
@@ -375,16 +376,19 @@ rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
  *
  * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
  * this function. Modify with care.
+ *
+ * Return: true if wait_list isn't empty and false otherwise
  */
-static inline void
+static inline bool
 rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
 {
        lockdep_assert_held(&sem->wait_lock);
        list_del(&waiter->list);
        if (likely(!list_empty(&sem->wait_list)))
-               return;
+               return true;
 
        atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
+       return false;
 }
 
 /*
@@ -558,6 +562,33 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
        }
 }
 
+/*
+ * Remove a waiter and try to wake up other waiters in the wait queue
+ * This function is called from the out_nolock path of both the reader and
+ * writer slowpaths with wait_lock held. It releases the wait_lock and
+ * optionally wake up waiters before it returns.
+ */
+static inline void
+rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
+                     struct wake_q_head *wake_q)
+                     __releases(&sem->wait_lock)
+{
+       bool first = rwsem_first_waiter(sem) == waiter;
+
+       wake_q_init(wake_q);
+
+       /*
+        * If the wait_list isn't empty and the waiter to be deleted is
+        * the first waiter, we wake up the remaining waiters as they may
+        * be eligible to acquire or spin on the lock.
+        */
+       if (rwsem_del_waiter(sem, waiter) && first)
+               rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
+       raw_spin_unlock_irq(&sem->wait_lock);
+       if (!wake_q_empty(wake_q))
+               wake_up_q(wake_q);
+}
+
 /*
  * This function must be called with the sem->wait_lock held to prevent
  * race conditions between checking the rwsem wait list and setting the
@@ -901,7 +932,7 @@ done:
  */
 static inline void clear_nonspinnable(struct rw_semaphore *sem)
 {
-       if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
+       if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
                atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
 }
 
@@ -925,6 +956,31 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 }
 #endif
 
+/*
+ * Prepare to wake up waiter(s) in the wait queue by putting them into the
+ * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
+ * reader-owned, wake up read lock waiters in queue front or wake up any
+ * front waiter otherwise.
+
+ * This is being called from both reader and writer slow paths.
+ */
+static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
+                                         struct wake_q_head *wake_q)
+{
+       enum rwsem_wake_type wake_type;
+
+       if (count & RWSEM_WRITER_MASK)
+               return;
+
+       if (count & RWSEM_READER_MASK) {
+               wake_type = RWSEM_WAKE_READERS;
+       } else {
+               wake_type = RWSEM_WAKE_ANY;
+               clear_nonspinnable(sem);
+       }
+       rwsem_mark_wake(sem, wake_type, wake_q);
+}
+
 /*
  * Wait for the read lock to be granted
  */
@@ -935,7 +991,6 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
        long rcnt = (count >> RWSEM_READER_SHIFT);
        struct rwsem_waiter waiter;
        DEFINE_WAKE_Q(wake_q);
-       bool wake = false;
 
        /*
         * To prevent a constant stream of readers from starving a sleeping
@@ -977,12 +1032,11 @@ queue:
        if (list_empty(&sem->wait_list)) {
                /*
                 * In case the wait queue is empty and the lock isn't owned
-                * by a writer or has the handoff bit set, this reader can
-                * exit the slowpath and return immediately as its
-                * RWSEM_READER_BIAS has already been set in the count.
+                * by a writer, this reader can exit the slowpath and return
+                * immediately as its RWSEM_READER_BIAS has already been set
+                * in the count.
                 */
-               if (!(atomic_long_read(&sem->count) &
-                    (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+               if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
                        /* Provide lock ACQUIRE */
                        smp_acquire__after_ctrl_dep();
                        raw_spin_unlock_irq(&sem->wait_lock);
@@ -997,22 +1051,13 @@ queue:
        /* we're now waiting on the lock, but no longer actively locking */
        count = atomic_long_add_return(adjustment, &sem->count);
 
-       /*
-        * If there are no active locks, wake the front queued process(es).
-        *
-        * If there are no writers and we are first in the queue,
-        * wake our own waiter to join the existing active readers !
-        */
-       if (!(count & RWSEM_LOCK_MASK)) {
-               clear_nonspinnable(sem);
-               wake = true;
-       }
-       if (wake || (!(count & RWSEM_WRITER_MASK) &&
-                   (adjustment & RWSEM_FLAG_WAITERS)))
-               rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
+       rwsem_cond_wake_waiter(sem, count, &wake_q);
        raw_spin_unlock_irq(&sem->wait_lock);
-       wake_up_q(&wake_q);
+
+       if (!wake_q_empty(&wake_q))
+               wake_up_q(&wake_q);
+
+       trace_contention_begin(sem, LCB_F_READ);
 
        /* wait to be given the lock */
        for (;;) {
@@ -1035,13 +1080,14 @@ queue:
 
        __set_current_state(TASK_RUNNING);
        lockevent_inc(rwsem_rlock);
+       trace_contention_end(sem, 0);
        return sem;
 
 out_nolock:
-       rwsem_del_waiter(sem, &waiter);
-       raw_spin_unlock_irq(&sem->wait_lock);
+       rwsem_del_wake_waiter(sem, &waiter, &wake_q);
        __set_current_state(TASK_RUNNING);
        lockevent_inc(rwsem_rlock_fail);
+       trace_contention_end(sem, -EINTR);
        return ERR_PTR(-EINTR);
 }
 
@@ -1051,7 +1097,6 @@ out_nolock:
 static struct rw_semaphore __sched *
 rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 {
-       long count;
        struct rwsem_waiter waiter;
        DEFINE_WAKE_Q(wake_q);
 
@@ -1075,23 +1120,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 
        /* we're now waiting on the lock */
        if (rwsem_first_waiter(sem) != &waiter) {
-               count = atomic_long_read(&sem->count);
-
-               /*
-                * If there were already threads queued before us and:
-                *  1) there are no active locks, wake the front
-                *     queued process(es) as the handoff bit might be set.
-                *  2) there are no active writers and some readers, the lock
-                *     must be read owned; so we try to wake any read lock
-                *     waiters that were queued ahead of us.
-                */
-               if (count & RWSEM_WRITER_MASK)
-                       goto wait;
-
-               rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
-                                       ? RWSEM_WAKE_READERS
-                                       : RWSEM_WAKE_ANY, &wake_q);
-
+               rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
+                                      &wake_q);
                if (!wake_q_empty(&wake_q)) {
                        /*
                         * We want to minimize wait_lock hold time especially
@@ -1099,16 +1129,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
                         */
                        raw_spin_unlock_irq(&sem->wait_lock);
                        wake_up_q(&wake_q);
-                       wake_q_init(&wake_q);   /* Used again, reinit */
                        raw_spin_lock_irq(&sem->wait_lock);
                }
        } else {
                atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
        }
 
-wait:
        /* wait until we successfully acquire the lock */
        set_current_state(state);
+       trace_contention_begin(sem, LCB_F_WRITE);
+
        for (;;) {
                if (rwsem_try_write_lock(sem, &waiter)) {
                        /* rwsem_try_write_lock() implies ACQUIRE on success */
@@ -1148,17 +1178,15 @@ trylock_again:
        __set_current_state(TASK_RUNNING);
        raw_spin_unlock_irq(&sem->wait_lock);
        lockevent_inc(rwsem_wlock);
+       trace_contention_end(sem, 0);
        return sem;
 
 out_nolock:
        __set_current_state(TASK_RUNNING);
        raw_spin_lock_irq(&sem->wait_lock);
-       rwsem_del_waiter(sem, &waiter);
-       if (!list_empty(&sem->wait_list))
-               rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-       raw_spin_unlock_irq(&sem->wait_lock);
-       wake_up_q(&wake_q);
+       rwsem_del_wake_waiter(sem, &waiter, &wake_q);
        lockevent_inc(rwsem_wlock_fail);
+       trace_contention_end(sem, -EINTR);
        return ERR_PTR(-EINTR);
 }
 
index 9ee381e4d2a4d02f5fc2956233f0c3d6964c1674..f2654d2fe43aa17f56235d4676c4264fb9922fc4 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/ftrace.h>
+#include <trace/events/lock.h>
 
 static noinline void __down(struct semaphore *sem);
 static noinline int __down_interruptible(struct semaphore *sem);
@@ -205,7 +206,7 @@ struct semaphore_waiter {
  * constant, and thus optimised away by the compiler.  Likewise the
  * 'timeout' parameter for the cases without timeouts.
  */
-static inline int __sched __down_common(struct semaphore *sem, long state,
+static inline int __sched ___down_common(struct semaphore *sem, long state,
                                                                long timeout)
 {
        struct semaphore_waiter waiter;
@@ -236,6 +237,18 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
        return -EINTR;
 }
 
+static inline int __sched __down_common(struct semaphore *sem, long state,
+                                       long timeout)
+{
+       int ret;
+
+       trace_contention_begin(sem, 0);
+       ret = ___down_common(sem, state, timeout);
+       trace_contention_end(sem, ret);
+
+       return ret;
+}
+
 static noinline void __sched __down(struct semaphore *sem)
 {
        __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
index eb4dfb932c85996403d6291a56b563a37967a855..8355b19676f845667b2fb2924e8d99eec5384c40 100644 (file)
@@ -48,7 +48,7 @@ unsigned int __read_mostly sysctl_oops_all_cpu_backtrace;
 
 int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
 static unsigned long tainted_mask =
-       IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
+       IS_ENABLED(CONFIG_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
index 5899260a8bef4b24f4292aa555b201e1147b9b3c..874ad834dc8dafeed9afd6677cefbabc147435de 100644 (file)
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 
-ccflags-$(CONFIG_PM_DEBUG)     := -DDEBUG
+ifeq ($(CONFIG_DYNAMIC_DEBUG), y)
+CFLAGS_swap.o                   := -DDEBUG
+CFLAGS_snapshot.o               := -DDEBUG
+CFLAGS_energy_model.o           := -DDEBUG
+endif
 
 KASAN_SANITIZE_snapshot.o      := n
 
index 0153b0ca7b23e86fa65025f24bf659d241e9bfc8..6c373f2960e71d1f402aa487d57ba96fee67cdd6 100644 (file)
@@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
 
-static int em_debug_units_show(struct seq_file *s, void *unused)
+static int em_debug_flags_show(struct seq_file *s, void *unused)
 {
        struct em_perf_domain *pd = s->private;
-       char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
-               "milliWatts" : "bogoWatts";
 
-       seq_printf(s, "%s\n", units);
+       seq_printf(s, "%#lx\n", pd->flags);
 
        return 0;
 }
-DEFINE_SHOW_ATTRIBUTE(em_debug_units);
-
-static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
-{
-       struct em_perf_domain *pd = s->private;
-       int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
-
-       seq_printf(s, "%d\n", enabled);
-
-       return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
+DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
 
 static void em_debug_create_pd(struct device *dev)
 {
@@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev)
                debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
                                    &em_debug_cpus_fops);
 
-       debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
-       debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
-                           &em_debug_skip_inefficiencies_fops);
+       debugfs_create_file("flags", 0444, d, dev->em_pd,
+                           &em_debug_flags_fops);
 
        /* Create a sub-directory for each performance state */
        for (i = 0; i < dev->em_pd->nr_perf_states; i++)
@@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {}
 #endif
 
 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
-                               int nr_states, struct em_data_callback *cb)
+                               int nr_states, struct em_data_callback *cb,
+                               unsigned long flags)
 {
        unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
        struct em_perf_state *table;
@@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
                 * lowest performance state of 'dev' above 'freq' and updates
                 * 'power' and 'freq' accordingly.
                 */
-               ret = cb->active_power(&power, &freq, dev);
+               ret = cb->active_power(dev, &power, &freq);
                if (ret) {
                        dev_err(dev, "EM: invalid perf. state: %d\n",
                                ret);
@@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
        /* Compute the cost of each performance state. */
        fmax = (u64) table[nr_states - 1].frequency;
        for (i = nr_states - 1; i >= 0; i--) {
-               unsigned long power_res = em_scale_power(table[i].power);
+               unsigned long power_res, cost;
+
+               if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
+                       ret = cb->get_cost(dev, table[i].frequency, &cost);
+                       if (ret || !cost || cost > EM_MAX_POWER) {
+                               dev_err(dev, "EM: invalid cost %lu %d\n",
+                                       cost, ret);
+                               goto free_ps_table;
+                       }
+               } else {
+                       power_res = em_scale_power(table[i].power);
+                       cost = div64_u64(fmax * power_res, table[i].frequency);
+               }
+
+               table[i].cost = cost;
 
-               table[i].cost = div64_u64(fmax * power_res,
-                                         table[i].frequency);
                if (table[i].cost >= prev_cost) {
                        table[i].flags = EM_PERF_STATE_INEFFICIENT;
                        dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
@@ -197,7 +196,8 @@ free_ps_table:
 }
 
 static int em_create_pd(struct device *dev, int nr_states,
-                       struct em_data_callback *cb, cpumask_t *cpus)
+                       struct em_data_callback *cb, cpumask_t *cpus,
+                       unsigned long flags)
 {
        struct em_perf_domain *pd;
        struct device *cpu_dev;
@@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states,
                        return -ENOMEM;
        }
 
-       ret = em_create_perf_table(dev, pd, nr_states, cb);
+       ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
        if (ret) {
                kfree(pd);
                return ret;
@@ -259,6 +259,8 @@ static void em_cpufreq_update_efficiencies(struct device *dev)
                        found++;
        }
 
+       cpufreq_cpu_put(policy);
+
        if (!found)
                return;
 
@@ -332,6 +334,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
                                bool milliwatts)
 {
        unsigned long cap, prev_cap = 0;
+       unsigned long flags = 0;
        int cpu, ret;
 
        if (!dev || !nr_states || !cb)
@@ -378,12 +381,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
                }
        }
 
-       ret = em_create_pd(dev, nr_states, cb, cpus);
+       if (milliwatts)
+               flags |= EM_PERF_DOMAIN_MILLIWATTS;
+       else if (cb->get_cost)
+               flags |= EM_PERF_DOMAIN_ARTIFICIAL;
+
+       ret = em_create_pd(dev, nr_states, cb, cpus, flags);
        if (ret)
                goto unlock;
 
-       if (milliwatts)
-               dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
+       dev->em_pd->flags |= flags;
 
        em_cpufreq_update_efficiencies(dev);
 
index 7e646079fbeb2f3ca7a5f4bcea0cfb7ecdcb036d..5242bf2ee469aa70b1de15939385ff17758445fc 100644 (file)
@@ -545,35 +545,6 @@ static int __init pm_debug_messages_setup(char *str)
 }
 __setup("pm_debug_messages", pm_debug_messages_setup);
 
-/**
- * __pm_pr_dbg - Print a suspend debug message to the kernel log.
- * @defer: Whether or not to use printk_deferred() to print the message.
- * @fmt: Message format.
- *
- * The message will be emitted if enabled through the pm_debug_messages
- * sysfs attribute.
- */
-void __pm_pr_dbg(bool defer, const char *fmt, ...)
-{
-       struct va_format vaf;
-       va_list args;
-
-       if (!pm_debug_messages_on)
-               return;
-
-       va_start(args, fmt);
-
-       vaf.fmt = fmt;
-       vaf.va = &args;
-
-       if (defer)
-               printk_deferred(KERN_DEBUG "PM: %pV", &vaf);
-       else
-               printk(KERN_DEBUG "PM: %pV", &vaf);
-
-       va_end(args);
-}
-
 #else /* !CONFIG_PM_SLEEP_DEBUG */
 static inline void pm_print_times_init(void) {}
 #endif /* CONFIG_PM_SLEEP_DEBUG */
index 11b570fcf0494a572b90bd0bc89deb9c621811a5..3068601e585a8fd5066d5b0bfd4c1c1fb7ddaee0 100644 (file)
@@ -6,9 +6,6 @@
  * Originally from swsusp.
  */
 
-
-#undef DEBUG
-
 #include <linux/interrupt.h>
 #include <linux/oom.h>
 #include <linux/suspend.h>
index 330d499376924806ea0ca4c839c1dcd66ae66ed4..2a406753af90495056e81b8ec424ac26c0490350 100644 (file)
@@ -326,7 +326,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
        return ret;
 }
 
-/**
+/*
  * Data types related to memory bitmaps.
  *
  * Memory bitmap is a structure consisting of many linked lists of
@@ -427,6 +427,10 @@ struct memory_bitmap {
 
 /**
  * alloc_rtree_node - Allocate a new node and add it to the radix tree.
+ * @gfp_mask: GFP mask for the allocation.
+ * @safe_needed: Get pages not used before hibernation (restore only)
+ * @ca: Pointer to a linked list of pages ("a chain") to allocate from
+ * @list: Radix Tree node to add.
  *
  * This function is used to allocate inner nodes as well as the
  * leave nodes of the radix tree. It also adds the node to the
@@ -902,7 +906,7 @@ static bool rtree_next_node(struct memory_bitmap *bm)
 }
 
 /**
- * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
+ * memory_bm_next_pfn - Find the next set bit in a memory bitmap.
  * @bm: Memory bitmap.
  *
  * Starting from the last returned position this function searches for the next
@@ -1937,7 +1941,7 @@ static inline int get_highmem_buffer(int safe_needed)
 }
 
 /**
- * alloc_highmem_image_pages - Allocate some highmem pages for the image.
+ * alloc_highmem_pages - Allocate some highmem pages for the image.
  *
  * Try to allocate as many pages as needed, but if the number of free highmem
  * pages is less than that, allocate them all.
@@ -2224,7 +2228,7 @@ static int check_header(struct swsusp_info *info)
 }
 
 /**
- * load header - Check the image header and copy the data from it.
+ * load_header - Check the image header and copy the data from it.
  */
 static int load_header(struct swsusp_info *info)
 {
index bf8e341e75b4fd0a270d386f65de42c94dd69ac2..1c630e573548df34a28a9061e87b961c2a879573 100644 (file)
@@ -77,31 +77,56 @@ config TASKS_RCU_GENERIC
          This option enables generic infrastructure code supporting
          task-based RCU implementations.  Not for manual selection.
 
+config FORCE_TASKS_RCU
+       bool "Force selection of TASKS_RCU"
+       depends on RCU_EXPERT
+       select TASKS_RCU
+       default n
+       help
+         This option force-enables a task-based RCU implementation
+         that uses only voluntary context switch (not preemption!),
+         idle, and user-mode execution as quiescent states.  Not for
+         manual selection in most cases.
+
 config TASKS_RCU
-       def_bool PREEMPTION
+       bool
+       default n
+       select IRQ_WORK
+
+config FORCE_TASKS_RUDE_RCU
+       bool "Force selection of Tasks Rude RCU"
+       depends on RCU_EXPERT
+       select TASKS_RUDE_RCU
+       default n
        help
-         This option enables a task-based RCU implementation that uses
-         only voluntary context switch (not preemption!), idle, and
-         user-mode execution as quiescent states.  Not for manual selection.
+         This option force-enables a task-based RCU implementation
+         that uses only context switch (including preemption) and
+         user-mode execution as quiescent states.  It forces IPIs and
+         context switches on all online CPUs, including idle ones,
+         so use with caution.  Not for manual selection in most cases.
 
 config TASKS_RUDE_RCU
-       def_bool 0
+       bool
+       default n
+       select IRQ_WORK
+
+config FORCE_TASKS_TRACE_RCU
+       bool "Force selection of Tasks Trace RCU"
+       depends on RCU_EXPERT
+       select TASKS_TRACE_RCU
+       default n
        help
          This option enables a task-based RCU implementation that uses
-         only context switch (including preemption) and user-mode
-         execution as quiescent states.  It forces IPIs and context
-         switches on all online CPUs, including idle ones, so use
-         with caution.
+         explicit rcu_read_lock_trace() read-side markers, and allows
+         these readers to appear in the idle loop as well as on the
+         CPU hotplug code paths.  It can force IPIs on online CPUs,
+         including idle ones, so use with caution.  Not for manual
+         selection in most cases.
 
 config TASKS_TRACE_RCU
-       def_bool 0
+       bool
+       default n
        select IRQ_WORK
-       help
-         This option enables a task-based RCU implementation that uses
-         explicit rcu_read_lock_trace() read-side markers, and allows
-         these readers to appear in the idle loop as well as on the CPU
-         hotplug code paths.  It can force IPIs on online CPUs, including
-         idle ones, so use with caution.
 
 config RCU_STALL_COMMON
        def_bool TREE_RCU
@@ -195,6 +220,20 @@ config RCU_BOOST_DELAY
 
          Accept the default if unsure.
 
+config RCU_EXP_KTHREAD
+       bool "Perform RCU expedited work in a real-time kthread"
+       depends on RCU_BOOST && RCU_EXPERT
+       default !PREEMPT_RT && NR_CPUS <= 32
+       help
+         Use this option to further reduce the latencies of expedited
+         grace periods at the expense of being more disruptive.
+
+         This option is disabled by default on PREEMPT_RT=y kernels which
+         disable expedited grace periods after boot by unconditionally
+         setting rcupdate.rcu_normal_after_boot=1.
+
+         Accept the default if unsure.
+
 config RCU_NOCB_CPU
        bool "Offload RCU callback processing from boot-selected CPUs"
        depends on TREE_RCU
@@ -225,7 +264,7 @@ config RCU_NOCB_CPU
 
 config TASKS_TRACE_RCU_READ_MB
        bool "Tasks Trace RCU readers use memory barriers in user and idle"
-       depends on RCU_EXPERT
+       depends on RCU_EXPERT && TASKS_TRACE_RCU
        default PREEMPT_RT || NR_CPUS < 8
        help
          Use this option to further reduce the number of IPIs sent
index 4fd64999300fc229bea06793861adae6042a697b..9b64e55d4f6159f44eac3017ded4b6c67a1803a1 100644 (file)
@@ -28,9 +28,6 @@ config RCU_SCALE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
-       select TASKS_RCU
-       select TASKS_RUDE_RCU
-       select TASKS_TRACE_RCU
        default n
        help
          This option provides a kernel module that runs performance
@@ -47,9 +44,6 @@ config RCU_TORTURE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
-       select TASKS_RCU
-       select TASKS_RUDE_RCU
-       select TASKS_TRACE_RCU
        default n
        help
          This option provides a kernel module that runs torture tests
@@ -66,9 +60,6 @@ config RCU_REF_SCALE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
-       select TASKS_RCU
-       select TASKS_RUDE_RCU
-       select TASKS_TRACE_RCU
        default n
        help
          This option provides a kernel module that runs performance tests
@@ -91,6 +82,20 @@ config RCU_CPU_STALL_TIMEOUT
          RCU grace period persists, additional CPU stall warnings are
          printed at more widely spaced intervals.
 
+config RCU_EXP_CPU_STALL_TIMEOUT
+       int "Expedited RCU CPU stall timeout in milliseconds"
+       depends on RCU_STALL_COMMON
+       range 0 21000
+       default 20 if ANDROID
+       default 0 if !ANDROID
+       help
+         If a given expedited RCU grace period extends more than the
+         specified number of milliseconds, a CPU stall warning is printed.
+         If the RCU grace period persists, additional CPU stall warnings
+         are printed at more widely spaced intervals.  A value of zero
+         says to use the RCU_CPU_STALL_TIMEOUT value converted from
+         seconds to milliseconds.
+
 config RCU_TRACE
        bool "Enable tracing for RCU"
        depends on DEBUG_KERNEL
index 24b5f2c2de87b4c37b0d3c52c3189f26d669b455..152492d52715647f953c97c264954cb3496de786 100644 (file)
@@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
 extern int rcu_cpu_stall_ftrace_dump;
 extern int rcu_cpu_stall_suppress;
 extern int rcu_cpu_stall_timeout;
+extern int rcu_exp_cpu_stall_timeout;
 int rcu_jiffies_till_stall_check(void);
+int rcu_exp_jiffies_till_stall_check(void);
 
 static inline bool rcu_stall_is_suppressed(void)
 {
@@ -523,6 +525,8 @@ static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { ret
 static inline void show_rcu_gp_kthreads(void) { }
 static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
 static inline void rcu_fwd_progress_check(unsigned long j) { }
+static inline void rcu_gp_slow_register(atomic_t *rgssp) { }
+static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
 #else /* #ifdef CONFIG_TINY_RCU */
 bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
 unsigned long rcu_get_gp_seq(void);
@@ -534,14 +538,19 @@ int rcu_get_gp_kthreads_prio(void);
 void rcu_fwd_progress_check(unsigned long j);
 void rcu_force_quiescent_state(void);
 extern struct workqueue_struct *rcu_gp_wq;
+#ifdef CONFIG_RCU_EXP_KTHREAD
+extern struct kthread_worker *rcu_exp_gp_kworker;
+extern struct kthread_worker *rcu_exp_par_gp_kworker;
+#else /* !CONFIG_RCU_EXP_KTHREAD */
 extern struct workqueue_struct *rcu_par_gp_wq;
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+void rcu_gp_slow_register(atomic_t *rgssp);
+void rcu_gp_slow_unregister(atomic_t *rgssp);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 #ifdef CONFIG_RCU_NOCB_CPU
-bool rcu_is_nocb_cpu(int cpu);
 void rcu_bind_current_to_nocb(void);
 #else
-static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
 static inline void rcu_bind_current_to_nocb(void) { }
 #endif
 
index 81145c3ece25fab1f089a2e0eeaee80776d55c23..c54ea2b6a36bc2e969ee59b33b18bec0413fad2d 100644 (file)
@@ -505,10 +505,10 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
                WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]);
 
        /*
-        * Callbacks moved, so clean up the misordered ->tails[] pointers
-        * that now point into the middle of the list of ready-to-invoke
-        * callbacks.  The overall effect is to copy down the later pointers
-        * into the gap that was created by the now-ready segments.
+        * Callbacks moved, so there might be an empty RCU_WAIT_TAIL
+        * and a non-empty RCU_NEXT_READY_TAIL.  If so, copy the
+        * RCU_NEXT_READY_TAIL segment to fill the RCU_WAIT_TAIL gap
+        * created by the now-ready-to-invoke segments.
         */
        for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
                if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
index 5e4f1f83d38e72663d3760f46d5c0084cd3bb6c0..277a5bfb37d4830bfbc1bdf7407e49548dec3fbe 100644 (file)
@@ -268,6 +268,8 @@ static struct rcu_scale_ops srcud_ops = {
        .name           = "srcud"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks scalability testing.
  */
@@ -295,6 +297,16 @@ static struct rcu_scale_ops tasks_ops = {
        .name           = "tasks"
 };
 
+#define TASKS_OPS &tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 /*
  * Definitions for RCU-tasks-trace scalability testing.
  */
@@ -324,6 +336,14 @@ static struct rcu_scale_ops tasks_tracing_ops = {
        .name           = "tasks-tracing"
 };
 
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
 {
        if (!cur_ops->gp_diff)
@@ -797,7 +817,7 @@ rcu_scale_init(void)
        long i;
        int firsterr = 0;
        static struct rcu_scale_ops *scale_ops[] = {
-               &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops
+               &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
        };
 
        if (!torture_init_begin(scale_type, verbose))
index 55d049c39608f581bc8157fc0fbb6264c2ba0bde..7120165a93426c282c7d9f6d3a87a4738ac52905 100644 (file)
@@ -737,6 +737,50 @@ static struct rcu_torture_ops busted_srcud_ops = {
        .name           = "busted_srcud"
 };
 
+/*
+ * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
+ * This implementation does not necessarily work well with CPU hotplug.
+ */
+
+static void synchronize_rcu_trivial(void)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu) {
+               rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
+               WARN_ON_ONCE(raw_smp_processor_id() != cpu);
+       }
+}
+
+static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
+{
+       preempt_disable();
+       return 0;
+}
+
+static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
+{
+       preempt_enable();
+}
+
+static struct rcu_torture_ops trivial_ops = {
+       .ttype          = RCU_TRIVIAL_FLAVOR,
+       .init           = rcu_sync_torture_init,
+       .readlock       = rcu_torture_read_lock_trivial,
+       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+       .readunlock     = rcu_torture_read_unlock_trivial,
+       .readlock_held  = torture_readlock_not_held,
+       .get_gp_seq     = rcu_no_completed,
+       .sync           = synchronize_rcu_trivial,
+       .exp_sync       = synchronize_rcu_trivial,
+       .fqs            = NULL,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .name           = "trivial"
+};
+
+#ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks torture testing.
  */
@@ -780,47 +824,16 @@ static struct rcu_torture_ops tasks_ops = {
        .name           = "tasks"
 };
 
-/*
- * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
- * This implementation does not necessarily work well with CPU hotplug.
- */
+#define TASKS_OPS &tasks_ops,
 
-static void synchronize_rcu_trivial(void)
-{
-       int cpu;
+#else // #ifdef CONFIG_TASKS_RCU
 
-       for_each_online_cpu(cpu) {
-               rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
-               WARN_ON_ONCE(raw_smp_processor_id() != cpu);
-       }
-}
+#define TASKS_OPS
 
-static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
-{
-       preempt_disable();
-       return 0;
-}
+#endif // #else #ifdef CONFIG_TASKS_RCU
 
-static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
-{
-       preempt_enable();
-}
 
-static struct rcu_torture_ops trivial_ops = {
-       .ttype          = RCU_TRIVIAL_FLAVOR,
-       .init           = rcu_sync_torture_init,
-       .readlock       = rcu_torture_read_lock_trivial,
-       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
-       .readunlock     = rcu_torture_read_unlock_trivial,
-       .readlock_held  = torture_readlock_not_held,
-       .get_gp_seq     = rcu_no_completed,
-       .sync           = synchronize_rcu_trivial,
-       .exp_sync       = synchronize_rcu_trivial,
-       .fqs            = NULL,
-       .stats          = NULL,
-       .irq_capable    = 1,
-       .name           = "trivial"
-};
+#ifdef CONFIG_TASKS_RUDE_RCU
 
 /*
  * Definitions for rude RCU-tasks torture testing.
@@ -851,6 +864,17 @@ static struct rcu_torture_ops tasks_rude_ops = {
        .name           = "tasks-rude"
 };
 
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_RUDE_RCU
+
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 /*
  * Definitions for tracing RCU-tasks torture testing.
  */
@@ -893,6 +917,15 @@ static struct rcu_torture_ops tasks_tracing_ops = {
        .name           = "tasks-tracing"
 };
 
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_TRACE_RCU
+
+
 static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
 {
        if (!cur_ops->gp_diff)
@@ -1178,7 +1211,7 @@ rcu_torture_writer(void *arg)
                         " GP expediting controlled from boot/sysfs for %s.\n",
                         torture_type, cur_ops->name);
        if (WARN_ONCE(nsynctypes == 0,
-                     "rcu_torture_writer: No update-side primitives.\n")) {
+                     "%s: No update-side primitives.\n", __func__)) {
                /*
                 * No updates primitives, so don't try updating.
                 * The resulting test won't be testing much, hence the
@@ -1186,6 +1219,7 @@ rcu_torture_writer(void *arg)
                 */
                rcu_torture_writer_state = RTWS_STOPPING;
                torture_kthread_stopping("rcu_torture_writer");
+               return 0;
        }
 
        do {
@@ -1322,6 +1356,17 @@ rcu_torture_fakewriter(void *arg)
        VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
        set_user_nice(current, MAX_NICE);
 
+       if (WARN_ONCE(nsynctypes == 0,
+                     "%s: No update-side primitives.\n", __func__)) {
+               /*
+                * No updates primitives, so don't try updating.
+                * The resulting test won't be testing much, hence the
+                * above WARN_ONCE().
+                */
+               torture_kthread_stopping("rcu_torture_fakewriter");
+               return 0;
+       }
+
        do {
                torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand);
                if (cur_ops->cb_barrier != NULL &&
@@ -2916,10 +2961,12 @@ rcu_torture_cleanup(void)
                        pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
                        cur_ops->cb_barrier();
                }
+               rcu_gp_slow_unregister(NULL);
                return;
        }
        if (!cur_ops) {
                torture_cleanup_end();
+               rcu_gp_slow_unregister(NULL);
                return;
        }
 
@@ -3016,6 +3063,7 @@ rcu_torture_cleanup(void)
        else
                rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
        torture_cleanup_end();
+       rcu_gp_slow_unregister(&rcu_fwd_cb_nodelay);
 }
 
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -3096,9 +3144,9 @@ rcu_torture_init(void)
        int flags = 0;
        unsigned long gp_seq = 0;
        static struct rcu_torture_ops *torture_ops[] = {
-               &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
-               &busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
-               &tasks_tracing_ops, &trivial_ops,
+               &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
+               TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
+               &trivial_ops,
        };
 
        if (!torture_init_begin(torture_type, verbose))
@@ -3320,6 +3368,7 @@ rcu_torture_init(void)
        if (object_debug)
                rcu_test_debug_objects();
        torture_init_end();
+       rcu_gp_slow_register(&rcu_fwd_cb_nodelay);
        return 0;
 
 unwind:
index 5489ff7f478e4372858693a56be3e95fd148e577..909644abee67fc72a47566c063e1f7493c0bbf95 100644 (file)
@@ -207,6 +207,8 @@ static struct ref_scale_ops srcu_ops = {
        .name           = "srcu"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 // Definitions for RCU Tasks ref scale testing: Empty read markers.
 // These definitions also work for RCU Rude readers.
 static void rcu_tasks_ref_scale_read_section(const int nloops)
@@ -232,6 +234,16 @@ static struct ref_scale_ops rcu_tasks_ops = {
        .name           = "rcu-tasks"
 };
 
+#define RCU_TASKS_OPS &rcu_tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define RCU_TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 // Definitions for RCU Tasks Trace ref scale testing.
 static void rcu_trace_ref_scale_read_section(const int nloops)
 {
@@ -261,6 +273,14 @@ static struct ref_scale_ops rcu_trace_ops = {
        .name           = "rcu-trace"
 };
 
+#define RCU_TRACE_OPS &rcu_trace_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define RCU_TRACE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
 // Definitions for reference count
 static atomic_t refcnt;
 
@@ -790,7 +810,7 @@ ref_scale_init(void)
        long i;
        int firsterr = 0;
        static struct ref_scale_ops *scale_ops[] = {
-               &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
+               &rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
                &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
        };
 
index 6833d888718169fe307536c738764b0dbbd5e26d..50ba70f019dea0996b1ec28e96191eb9e73c98ff 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/srcu.h>
 
 #include "rcu.h"
@@ -38,6 +39,35 @@ module_param(exp_holdoff, ulong, 0444);
 static ulong counter_wrap_check = (ULONG_MAX >> 2);
 module_param(counter_wrap_check, ulong, 0444);
 
+/*
+ * Control conversion to SRCU_SIZE_BIG:
+ *    0: Don't convert at all.
+ *    1: Convert at init_srcu_struct() time.
+ *    2: Convert when rcutorture invokes srcu_torture_stats_print().
+ *    3: Decide at boot time based on system shape (default).
+ * 0x1x: Convert when excessive contention encountered.
+ */
+#define SRCU_SIZING_NONE       0
+#define SRCU_SIZING_INIT       1
+#define SRCU_SIZING_TORTURE    2
+#define SRCU_SIZING_AUTO       3
+#define SRCU_SIZING_CONTEND    0x10
+#define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x)
+#define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE))
+#define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT))
+#define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE))
+#define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND)
+static int convert_to_big = SRCU_SIZING_AUTO;
+module_param(convert_to_big, int, 0444);
+
+/* Number of CPUs to trigger init_srcu_struct()-time transition to big. */
+static int big_cpu_lim __read_mostly = 128;
+module_param(big_cpu_lim, int, 0444);
+
+/* Contention events per jiffy to initiate transition to big. */
+static int small_contention_lim __read_mostly = 100;
+module_param(small_contention_lim, int, 0444);
+
 /* Early-boot callback-management, so early that no lock is required! */
 static LIST_HEAD(srcu_boot_list);
 static bool __read_mostly srcu_init_done;
@@ -48,39 +78,90 @@ static void process_srcu(struct work_struct *work);
 static void srcu_delay_timer(struct timer_list *t);
 
 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
-#define spin_lock_rcu_node(p)                                  \
-do {                                                                   \
-       spin_lock(&ACCESS_PRIVATE(p, lock));                    \
-       smp_mb__after_unlock_lock();                                    \
+#define spin_lock_rcu_node(p)                                                  \
+do {                                                                           \
+       spin_lock(&ACCESS_PRIVATE(p, lock));                                    \
+       smp_mb__after_unlock_lock();                                            \
 } while (0)
 
 #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
 
-#define spin_lock_irq_rcu_node(p)                                      \
-do {                                                                   \
-       spin_lock_irq(&ACCESS_PRIVATE(p, lock));                        \
-       smp_mb__after_unlock_lock();                                    \
+#define spin_lock_irq_rcu_node(p)                                              \
+do {                                                                           \
+       spin_lock_irq(&ACCESS_PRIVATE(p, lock));                                \
+       smp_mb__after_unlock_lock();                                            \
 } while (0)
 
-#define spin_unlock_irq_rcu_node(p)                                    \
+#define spin_unlock_irq_rcu_node(p)                                            \
        spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
 
-#define spin_lock_irqsave_rcu_node(p, flags)                   \
-do {                                                                   \
-       spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);     \
-       smp_mb__after_unlock_lock();                                    \
+#define spin_lock_irqsave_rcu_node(p, flags)                                   \
+do {                                                                           \
+       spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);                     \
+       smp_mb__after_unlock_lock();                                            \
 } while (0)
 
-#define spin_unlock_irqrestore_rcu_node(p, flags)                      \
-       spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
+#define spin_trylock_irqsave_rcu_node(p, flags)                                        \
+({                                                                             \
+       bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+                                                                               \
+       if (___locked)                                                          \
+               smp_mb__after_unlock_lock();                                    \
+       ___locked;                                                              \
+})
+
+#define spin_unlock_irqrestore_rcu_node(p, flags)                              \
+       spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)                 \
 
 /*
- * Initialize SRCU combining tree.  Note that statically allocated
+ * Initialize SRCU per-CPU data.  Note that statically allocated
  * srcu_struct structures might already have srcu_read_lock() and
  * srcu_read_unlock() running against them.  So if the is_static parameter
  * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
  */
-static void init_srcu_struct_nodes(struct srcu_struct *ssp)
+static void init_srcu_struct_data(struct srcu_struct *ssp)
+{
+       int cpu;
+       struct srcu_data *sdp;
+
+       /*
+        * Initialize the per-CPU srcu_data array, which feeds into the
+        * leaves of the srcu_node tree.
+        */
+       WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
+                    ARRAY_SIZE(sdp->srcu_unlock_count));
+       for_each_possible_cpu(cpu) {
+               sdp = per_cpu_ptr(ssp->sda, cpu);
+               spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+               rcu_segcblist_init(&sdp->srcu_cblist);
+               sdp->srcu_cblist_invoking = false;
+               sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
+               sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
+               sdp->mynode = NULL;
+               sdp->cpu = cpu;
+               INIT_WORK(&sdp->work, srcu_invoke_callbacks);
+               timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
+               sdp->ssp = ssp;
+       }
+}
+
+/* Invalid seq state, used during snp node initialization */
+#define SRCU_SNP_INIT_SEQ              0x2
+
+/*
+ * Check whether sequence number corresponding to snp node,
+ * is invalid.
+ */
+static inline bool srcu_invl_snp_seq(unsigned long s)
+{
+       return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ;
+}
+
+/*
+ * Allocated and initialize SRCU combining tree.  Returns @true if
+ * allocation succeeded and @false otherwise.
+ */
+static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
 {
        int cpu;
        int i;
@@ -92,6 +173,9 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
 
        /* Initialize geometry if it has not already been initialized. */
        rcu_init_geometry();
+       ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags);
+       if (!ssp->node)
+               return false;
 
        /* Work out the overall tree geometry. */
        ssp->level[0] = &ssp->node[0];
@@ -105,10 +189,10 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
                WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
                             ARRAY_SIZE(snp->srcu_data_have_cbs));
                for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
-                       snp->srcu_have_cbs[i] = 0;
+                       snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ;
                        snp->srcu_data_have_cbs[i] = 0;
                }
-               snp->srcu_gp_seq_needed_exp = 0;
+               snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
                snp->grplo = -1;
                snp->grphi = -1;
                if (snp == &ssp->node[0]) {
@@ -129,39 +213,31 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
         * Initialize the per-CPU srcu_data array, which feeds into the
         * leaves of the srcu_node tree.
         */
-       WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
-                    ARRAY_SIZE(sdp->srcu_unlock_count));
        level = rcu_num_lvls - 1;
        snp_first = ssp->level[level];
        for_each_possible_cpu(cpu) {
                sdp = per_cpu_ptr(ssp->sda, cpu);
-               spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
-               rcu_segcblist_init(&sdp->srcu_cblist);
-               sdp->srcu_cblist_invoking = false;
-               sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
-               sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
                sdp->mynode = &snp_first[cpu / levelspread[level]];
                for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
                        if (snp->grplo < 0)
                                snp->grplo = cpu;
                        snp->grphi = cpu;
                }
-               sdp->cpu = cpu;
-               INIT_WORK(&sdp->work, srcu_invoke_callbacks);
-               timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
-               sdp->ssp = ssp;
                sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
        }
+       smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
+       return true;
 }
 
 /*
  * Initialize non-compile-time initialized fields, including the
- * associated srcu_node and srcu_data structures.  The is_static
- * parameter is passed through to init_srcu_struct_nodes(), and
- * also tells us that ->sda has already been wired up to srcu_data.
+ * associated srcu_node and srcu_data structures.  The is_static parameter
+ * tells us that ->sda has already been wired up to srcu_data.
  */
 static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 {
+       ssp->srcu_size_state = SRCU_SIZE_SMALL;
+       ssp->node = NULL;
        mutex_init(&ssp->srcu_cb_mutex);
        mutex_init(&ssp->srcu_gp_mutex);
        ssp->srcu_idx = 0;
@@ -170,13 +246,25 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
        mutex_init(&ssp->srcu_barrier_mutex);
        atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
        INIT_DELAYED_WORK(&ssp->work, process_srcu);
+       ssp->sda_is_static = is_static;
        if (!is_static)
                ssp->sda = alloc_percpu(struct srcu_data);
        if (!ssp->sda)
                return -ENOMEM;
-       init_srcu_struct_nodes(ssp);
+       init_srcu_struct_data(ssp);
        ssp->srcu_gp_seq_needed_exp = 0;
        ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+       if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
+               if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) {
+                       if (!ssp->sda_is_static) {
+                               free_percpu(ssp->sda);
+                               ssp->sda = NULL;
+                               return -ENOMEM;
+                       }
+               } else {
+                       WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG);
+               }
+       }
        smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
        return 0;
 }
@@ -213,6 +301,86 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/*
+ * Initiate a transition to SRCU_SIZE_BIG with lock held.
+ */
+static void __srcu_transition_to_big(struct srcu_struct *ssp)
+{
+       lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
+       smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC);
+}
+
+/*
+ * Initiate an idempotent transition to SRCU_SIZE_BIG.
+ */
+static void srcu_transition_to_big(struct srcu_struct *ssp)
+{
+       unsigned long flags;
+
+       /* Double-checked locking on ->srcu_size-state. */
+       if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL)
+               return;
+       spin_lock_irqsave_rcu_node(ssp, flags);
+       if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) {
+               spin_unlock_irqrestore_rcu_node(ssp, flags);
+               return;
+       }
+       __srcu_transition_to_big(ssp);
+       spin_unlock_irqrestore_rcu_node(ssp, flags);
+}
+
+/*
+ * Check to see if the just-encountered contention event justifies
+ * a transition to SRCU_SIZE_BIG.
+ */
+static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
+{
+       unsigned long j;
+
+       if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state)
+               return;
+       j = jiffies;
+       if (ssp->srcu_size_jiffies != j) {
+               ssp->srcu_size_jiffies = j;
+               ssp->srcu_n_lock_retries = 0;
+       }
+       if (++ssp->srcu_n_lock_retries <= small_contention_lim)
+               return;
+       __srcu_transition_to_big(ssp);
+}
+
+/*
+ * Acquire the specified srcu_data structure's ->lock, but check for
+ * excessive contention, which results in initiation of a transition
+ * to SRCU_SIZE_BIG.  But only if the srcutree.convert_to_big module
+ * parameter permits this.
+ */
+static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
+{
+       struct srcu_struct *ssp = sdp->ssp;
+
+       if (spin_trylock_irqsave_rcu_node(sdp, *flags))
+               return;
+       spin_lock_irqsave_rcu_node(ssp, *flags);
+       spin_lock_irqsave_check_contention(ssp);
+       spin_unlock_irqrestore_rcu_node(ssp, *flags);
+       spin_lock_irqsave_rcu_node(sdp, *flags);
+}
+
+/*
+ * Acquire the specified srcu_struct structure's ->lock, but check for
+ * excessive contention, which results in initiation of a transition
+ * to SRCU_SIZE_BIG.  But only if the srcutree.convert_to_big module
+ * parameter permits this.
+ */
+static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
+{
+       if (spin_trylock_irqsave_rcu_node(ssp, *flags))
+               return;
+       spin_lock_irqsave_rcu_node(ssp, *flags);
+       spin_lock_irqsave_check_contention(ssp);
+}
+
 /*
  * First-use initialization of statically allocated srcu_struct
  * structure.  Wiring up the combining tree is more than can be
@@ -343,7 +511,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
        return sum;
 }
 
-#define SRCU_INTERVAL          1
+#define SRCU_INTERVAL          1       // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL      10      // Maximum incremental delay from slow readers.
+#define SRCU_MAX_NODELAY_PHASE 1       // Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_MAX_NODELAY       100     // Maximum consecutive no-delay instances.
 
 /*
  * Return grace-period delay, zero if there are expedited grace
@@ -351,10 +522,18 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
  */
 static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 {
-       if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
-                        READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
-               return 0;
-       return SRCU_INTERVAL;
+       unsigned long jbase = SRCU_INTERVAL;
+
+       if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+               jbase = 0;
+       if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
+               jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
+       if (!jbase) {
+               WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+               if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
+                       jbase = 1;
+       }
+       return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
 }
 
 /**
@@ -382,13 +561,20 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
                        return; /* Forgot srcu_barrier(), so just leak it! */
        }
        if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+           WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) ||
            WARN_ON(srcu_readers_active(ssp))) {
-               pr_info("%s: Active srcu_struct %p state: %d\n",
-                       __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)));
+               pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
+                       __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)),
+                       rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed);
                return; /* Caller forgot to stop doing call_srcu()? */
        }
-       free_percpu(ssp->sda);
-       ssp->sda = NULL;
+       if (!ssp->sda_is_static) {
+               free_percpu(ssp->sda);
+               ssp->sda = NULL;
+       }
+       kfree(ssp->node);
+       ssp->node = NULL;
+       ssp->srcu_size_state = SRCU_SIZE_SMALL;
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
@@ -434,9 +620,13 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 static void srcu_gp_start(struct srcu_struct *ssp)
 {
-       struct srcu_data *sdp = this_cpu_ptr(ssp->sda);
+       struct srcu_data *sdp;
        int state;
 
+       if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+               sdp = per_cpu_ptr(ssp->sda, 0);
+       else
+               sdp = this_cpu_ptr(ssp->sda);
        lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
        WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
        spin_lock_rcu_node(sdp);  /* Interrupts already disabled. */
@@ -445,6 +635,8 @@ static void srcu_gp_start(struct srcu_struct *ssp)
        (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
                                       rcu_seq_snap(&ssp->srcu_gp_seq));
        spin_unlock_rcu_node(sdp);  /* Interrupts remain disabled. */
+       WRITE_ONCE(ssp->srcu_gp_start, jiffies);
+       WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
        smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
        rcu_seq_start(&ssp->srcu_gp_seq);
        state = rcu_seq_state(ssp->srcu_gp_seq);
@@ -517,7 +709,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        int idx;
        unsigned long mask;
        struct srcu_data *sdp;
+       unsigned long sgsne;
        struct srcu_node *snp;
+       int ss_state;
 
        /* Prevent more than one additional grace period. */
        mutex_lock(&ssp->srcu_cb_mutex);
@@ -526,7 +720,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        spin_lock_irq_rcu_node(ssp);
        idx = rcu_seq_state(ssp->srcu_gp_seq);
        WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-       cbdelay = srcu_get_delay(ssp);
+       cbdelay = !!srcu_get_delay(ssp);
        WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
        rcu_seq_end(&ssp->srcu_gp_seq);
        gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -537,38 +731,45 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        /* A new grace period can start at this point.  But only one. */
 
        /* Initiate callback invocation as needed. */
-       idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
-       srcu_for_each_node_breadth_first(ssp, snp) {
-               spin_lock_irq_rcu_node(snp);
-               cbs = false;
-               last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
-               if (last_lvl)
-                       cbs = snp->srcu_have_cbs[idx] == gpseq;
-               snp->srcu_have_cbs[idx] = gpseq;
-               rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
-               if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
-                       WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
-               mask = snp->srcu_data_have_cbs[idx];
-               snp->srcu_data_have_cbs[idx] = 0;
-               spin_unlock_irq_rcu_node(snp);
-               if (cbs)
-                       srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
-
-               /* Occasionally prevent srcu_data counter wrap. */
-               if (!(gpseq & counter_wrap_check) && last_lvl)
-                       for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
-                               sdp = per_cpu_ptr(ssp->sda, cpu);
-                               spin_lock_irqsave_rcu_node(sdp, flags);
-                               if (ULONG_CMP_GE(gpseq,
-                                                sdp->srcu_gp_seq_needed + 100))
-                                       sdp->srcu_gp_seq_needed = gpseq;
-                               if (ULONG_CMP_GE(gpseq,
-                                                sdp->srcu_gp_seq_needed_exp + 100))
-                                       sdp->srcu_gp_seq_needed_exp = gpseq;
-                               spin_unlock_irqrestore_rcu_node(sdp, flags);
-                       }
+       ss_state = smp_load_acquire(&ssp->srcu_size_state);
+       if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
+               srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay);
+       } else {
+               idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
+               srcu_for_each_node_breadth_first(ssp, snp) {
+                       spin_lock_irq_rcu_node(snp);
+                       cbs = false;
+                       last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
+                       if (last_lvl)
+                               cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq;
+                       snp->srcu_have_cbs[idx] = gpseq;
+                       rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+                       sgsne = snp->srcu_gp_seq_needed_exp;
+                       if (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, gpseq))
+                               WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
+                       if (ss_state < SRCU_SIZE_BIG)
+                               mask = ~0;
+                       else
+                               mask = snp->srcu_data_have_cbs[idx];
+                       snp->srcu_data_have_cbs[idx] = 0;
+                       spin_unlock_irq_rcu_node(snp);
+                       if (cbs)
+                               srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
+               }
        }
 
+       /* Occasionally prevent srcu_data counter wrap. */
+       if (!(gpseq & counter_wrap_check))
+               for_each_possible_cpu(cpu) {
+                       sdp = per_cpu_ptr(ssp->sda, cpu);
+                       spin_lock_irqsave_rcu_node(sdp, flags);
+                       if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
+                               sdp->srcu_gp_seq_needed = gpseq;
+                       if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
+                               sdp->srcu_gp_seq_needed_exp = gpseq;
+                       spin_unlock_irqrestore_rcu_node(sdp, flags);
+               }
+
        /* Callback initiation done, allow grace periods after next. */
        mutex_unlock(&ssp->srcu_cb_mutex);
 
@@ -583,6 +784,14 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        } else {
                spin_unlock_irq_rcu_node(ssp);
        }
+
+       /* Transition to big if needed. */
+       if (ss_state != SRCU_SIZE_SMALL && ss_state != SRCU_SIZE_BIG) {
+               if (ss_state == SRCU_SIZE_ALLOC)
+                       init_srcu_struct_nodes(ssp, GFP_KERNEL);
+               else
+                       smp_store_release(&ssp->srcu_size_state, ss_state + 1);
+       }
 }
 
 /*
@@ -596,20 +805,24 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
                                  unsigned long s)
 {
        unsigned long flags;
+       unsigned long sgsne;
 
-       for (; snp != NULL; snp = snp->srcu_parent) {
-               if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
-                   ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
-                       return;
-               spin_lock_irqsave_rcu_node(snp, flags);
-               if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
+       if (snp)
+               for (; snp != NULL; snp = snp->srcu_parent) {
+                       sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
+                       if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
+                           (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
+                               return;
+                       spin_lock_irqsave_rcu_node(snp, flags);
+                       sgsne = snp->srcu_gp_seq_needed_exp;
+                       if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) {
+                               spin_unlock_irqrestore_rcu_node(snp, flags);
+                               return;
+                       }
+                       WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
                        spin_unlock_irqrestore_rcu_node(snp, flags);
-                       return;
                }
-               WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-               spin_unlock_irqrestore_rcu_node(snp, flags);
-       }
-       spin_lock_irqsave_rcu_node(ssp, flags);
+       spin_lock_irqsave_ssp_contention(ssp, &flags);
        if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
                WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
        spin_unlock_irqrestore_rcu_node(ssp, flags);
@@ -630,39 +843,47 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 {
        unsigned long flags;
        int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
-       struct srcu_node *snp = sdp->mynode;
+       unsigned long sgsne;
+       struct srcu_node *snp;
+       struct srcu_node *snp_leaf;
        unsigned long snp_seq;
 
-       /* Each pass through the loop does one level of the srcu_node tree. */
-       for (; snp != NULL; snp = snp->srcu_parent) {
-               if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode)
-                       return; /* GP already done and CBs recorded. */
-               spin_lock_irqsave_rcu_node(snp, flags);
-               if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
+       /* Ensure that snp node tree is fully initialized before traversing it */
+       if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+               snp_leaf = NULL;
+       else
+               snp_leaf = sdp->mynode;
+
+       if (snp_leaf)
+               /* Each pass through the loop does one level of the srcu_node tree. */
+               for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
+                       if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf)
+                               return; /* GP already done and CBs recorded. */
+                       spin_lock_irqsave_rcu_node(snp, flags);
                        snp_seq = snp->srcu_have_cbs[idx];
-                       if (snp == sdp->mynode && snp_seq == s)
-                               snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-                       spin_unlock_irqrestore_rcu_node(snp, flags);
-                       if (snp == sdp->mynode && snp_seq != s) {
-                               srcu_schedule_cbs_sdp(sdp, do_norm
-                                                          ? SRCU_INTERVAL
-                                                          : 0);
+                       if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) {
+                               if (snp == snp_leaf && snp_seq == s)
+                                       snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+                               spin_unlock_irqrestore_rcu_node(snp, flags);
+                               if (snp == snp_leaf && snp_seq != s) {
+                                       srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0);
+                                       return;
+                               }
+                               if (!do_norm)
+                                       srcu_funnel_exp_start(ssp, snp, s);
                                return;
                        }
-                       if (!do_norm)
-                               srcu_funnel_exp_start(ssp, snp, s);
-                       return;
+                       snp->srcu_have_cbs[idx] = s;
+                       if (snp == snp_leaf)
+                               snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+                       sgsne = snp->srcu_gp_seq_needed_exp;
+                       if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s)))
+                               WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
+                       spin_unlock_irqrestore_rcu_node(snp, flags);
                }
-               snp->srcu_have_cbs[idx] = s;
-               if (snp == sdp->mynode)
-                       snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-               if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
-                       WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-               spin_unlock_irqrestore_rcu_node(snp, flags);
-       }
 
        /* Top of tree, must ensure the grace period will be started. */
-       spin_lock_irqsave_rcu_node(ssp, flags);
+       spin_lock_irqsave_ssp_contention(ssp, &flags);
        if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
                /*
                 * Record need for grace period s.  Pair with load
@@ -678,9 +899,15 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
            rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
                WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
                srcu_gp_start(ssp);
+
+               // And how can that list_add() in the "else" clause
+               // possibly be safe for concurrent execution?  Well,
+               // it isn't.  And it does not have to be.  After all, it
+               // can only be executed during early boot when there is only
+               // the one boot CPU running with interrupts still disabled.
                if (likely(srcu_init_done))
                        queue_delayed_work(rcu_gp_wq, &ssp->work,
-                                          srcu_get_delay(ssp));
+                                          !!srcu_get_delay(ssp));
                else if (list_empty(&ssp->work.work.entry))
                        list_add(&ssp->work.work.entry, &srcu_boot_list);
        }
@@ -814,11 +1041,17 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
        bool needgp = false;
        unsigned long s;
        struct srcu_data *sdp;
+       struct srcu_node *sdp_mynode;
+       int ss_state;
 
        check_init_srcu_struct(ssp);
        idx = srcu_read_lock(ssp);
-       sdp = raw_cpu_ptr(ssp->sda);
-       spin_lock_irqsave_rcu_node(sdp, flags);
+       ss_state = smp_load_acquire(&ssp->srcu_size_state);
+       if (ss_state < SRCU_SIZE_WAIT_CALL)
+               sdp = per_cpu_ptr(ssp->sda, 0);
+       else
+               sdp = raw_cpu_ptr(ssp->sda);
+       spin_lock_irqsave_sdp_contention(sdp, &flags);
        if (rhp)
                rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
        rcu_segcblist_advance(&sdp->srcu_cblist,
@@ -834,10 +1067,17 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
                needexp = true;
        }
        spin_unlock_irqrestore_rcu_node(sdp, flags);
+
+       /* Ensure that snp node tree is fully initialized before traversing it */
+       if (ss_state < SRCU_SIZE_WAIT_BARRIER)
+               sdp_mynode = NULL;
+       else
+               sdp_mynode = sdp->mynode;
+
        if (needgp)
                srcu_funnel_gp_start(ssp, sdp, s, do_norm);
        else if (needexp)
-               srcu_funnel_exp_start(ssp, sdp->mynode, s);
+               srcu_funnel_exp_start(ssp, sdp_mynode, s);
        srcu_read_unlock(ssp, idx);
        return s;
 }
@@ -1097,6 +1337,28 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
                complete(&ssp->srcu_barrier_completion);
 }
 
+/*
+ * Enqueue an srcu_barrier() callback on the specified srcu_data
+ * structure's ->cblist.  but only if that ->cblist already has at least one
+ * callback enqueued.  Note that if a CPU already has callbacks enqueue,
+ * it must have already registered the need for a future grace period,
+ * so all we need do is enqueue a callback that will use the same grace
+ * period as the last callback already in the queue.
+ */
+static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
+{
+       spin_lock_irq_rcu_node(sdp);
+       atomic_inc(&ssp->srcu_barrier_cpu_cnt);
+       sdp->srcu_barrier_head.func = srcu_barrier_cb;
+       debug_rcu_head_queue(&sdp->srcu_barrier_head);
+       if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
+                                  &sdp->srcu_barrier_head)) {
+               debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
+               atomic_dec(&ssp->srcu_barrier_cpu_cnt);
+       }
+       spin_unlock_irq_rcu_node(sdp);
+}
+
 /**
  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
  * @ssp: srcu_struct on which to wait for in-flight callbacks.
@@ -1104,7 +1366,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
 void srcu_barrier(struct srcu_struct *ssp)
 {
        int cpu;
-       struct srcu_data *sdp;
+       int idx;
        unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
 
        check_init_srcu_struct(ssp);
@@ -1120,27 +1382,13 @@ void srcu_barrier(struct srcu_struct *ssp)
        /* Initial count prevents reaching zero until all CBs are posted. */
        atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
 
-       /*
-        * Each pass through this loop enqueues a callback, but only
-        * on CPUs already having callbacks enqueued.  Note that if
-        * a CPU already has callbacks enqueue, it must have already
-        * registered the need for a future grace period, so all we
-        * need do is enqueue a callback that will use the same
-        * grace period as the last callback already in the queue.
-        */
-       for_each_possible_cpu(cpu) {
-               sdp = per_cpu_ptr(ssp->sda, cpu);
-               spin_lock_irq_rcu_node(sdp);
-               atomic_inc(&ssp->srcu_barrier_cpu_cnt);
-               sdp->srcu_barrier_head.func = srcu_barrier_cb;
-               debug_rcu_head_queue(&sdp->srcu_barrier_head);
-               if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
-                                          &sdp->srcu_barrier_head)) {
-                       debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
-                       atomic_dec(&ssp->srcu_barrier_cpu_cnt);
-               }
-               spin_unlock_irq_rcu_node(sdp);
-       }
+       idx = srcu_read_lock(ssp);
+       if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+               srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
+       else
+               for_each_possible_cpu(cpu)
+                       srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
+       srcu_read_unlock(ssp, idx);
 
        /* Remove the initial count, at which point reaching zero can happen. */
        if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
@@ -1214,6 +1462,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
                srcu_flip(ssp);
                spin_lock_irq_rcu_node(ssp);
                rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+               ssp->srcu_n_exp_nodelay = 0;
                spin_unlock_irq_rcu_node(ssp);
        }
 
@@ -1228,6 +1477,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
                        mutex_unlock(&ssp->srcu_gp_mutex);
                        return; /* readers present, retry later. */
                }
+               ssp->srcu_n_exp_nodelay = 0;
                srcu_gp_end(ssp);  /* Releases ->srcu_gp_mutex. */
        }
 }
@@ -1318,12 +1568,28 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
  */
 static void process_srcu(struct work_struct *work)
 {
+       unsigned long curdelay;
+       unsigned long j;
        struct srcu_struct *ssp;
 
        ssp = container_of(work, struct srcu_struct, work.work);
 
        srcu_advance_state(ssp);
-       srcu_reschedule(ssp, srcu_get_delay(ssp));
+       curdelay = srcu_get_delay(ssp);
+       if (curdelay) {
+               WRITE_ONCE(ssp->reschedule_count, 0);
+       } else {
+               j = jiffies;
+               if (READ_ONCE(ssp->reschedule_jiffies) == j) {
+                       WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
+                       if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+                               curdelay = 1;
+               } else {
+                       WRITE_ONCE(ssp->reschedule_count, 1);
+                       WRITE_ONCE(ssp->reschedule_jiffies, j);
+               }
+       }
+       srcu_reschedule(ssp, curdelay);
 }
 
 void srcutorture_get_gp_data(enum rcutorture_type test_type,
@@ -1337,43 +1603,69 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 }
 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
 
+static const char * const srcu_size_state_name[] = {
+       "SRCU_SIZE_SMALL",
+       "SRCU_SIZE_ALLOC",
+       "SRCU_SIZE_WAIT_BARRIER",
+       "SRCU_SIZE_WAIT_CALL",
+       "SRCU_SIZE_WAIT_CBS1",
+       "SRCU_SIZE_WAIT_CBS2",
+       "SRCU_SIZE_WAIT_CBS3",
+       "SRCU_SIZE_WAIT_CBS4",
+       "SRCU_SIZE_BIG",
+       "SRCU_SIZE_???",
+};
+
 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 {
        int cpu;
        int idx;
        unsigned long s0 = 0, s1 = 0;
+       int ss_state = READ_ONCE(ssp->srcu_size_state);
+       int ss_state_idx = ss_state;
 
        idx = ssp->srcu_idx & 0x1;
-       pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
-                tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx);
-       for_each_possible_cpu(cpu) {
-               unsigned long l0, l1;
-               unsigned long u0, u1;
-               long c0, c1;
-               struct srcu_data *sdp;
-
-               sdp = per_cpu_ptr(ssp->sda, cpu);
-               u0 = data_race(sdp->srcu_unlock_count[!idx]);
-               u1 = data_race(sdp->srcu_unlock_count[idx]);
-
-               /*
-                * Make sure that a lock is always counted if the corresponding
-                * unlock is counted.
-                */
-               smp_rmb();
-
-               l0 = data_race(sdp->srcu_lock_count[!idx]);
-               l1 = data_race(sdp->srcu_lock_count[idx]);
-
-               c0 = l0 - u0;
-               c1 = l1 - u1;
-               pr_cont(" %d(%ld,%ld %c)",
-                       cpu, c0, c1,
-                       "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
-               s0 += c0;
-               s1 += c1;
+       if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
+               ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
+       pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
+                tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state,
+                srcu_size_state_name[ss_state_idx]);
+       if (!ssp->sda) {
+               // Called after cleanup_srcu_struct(), perhaps.
+               pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n");
+       } else {
+               pr_cont(" per-CPU(idx=%d):", idx);
+               for_each_possible_cpu(cpu) {
+                       unsigned long l0, l1;
+                       unsigned long u0, u1;
+                       long c0, c1;
+                       struct srcu_data *sdp;
+
+                       sdp = per_cpu_ptr(ssp->sda, cpu);
+                       u0 = data_race(sdp->srcu_unlock_count[!idx]);
+                       u1 = data_race(sdp->srcu_unlock_count[idx]);
+
+                       /*
+                        * Make sure that a lock is always counted if the corresponding
+                        * unlock is counted.
+                        */
+                       smp_rmb();
+
+                       l0 = data_race(sdp->srcu_lock_count[!idx]);
+                       l1 = data_race(sdp->srcu_lock_count[idx]);
+
+                       c0 = l0 - u0;
+                       c1 = l1 - u1;
+                       pr_cont(" %d(%ld,%ld %c)",
+                               cpu, c0, c1,
+                               "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
+                       s0 += c0;
+                       s1 += c1;
+               }
+               pr_cont(" T(%ld,%ld)\n", s0, s1);
        }
-       pr_cont(" T(%ld,%ld)\n", s0, s1);
+       if (SRCU_SIZING_IS_TORTURE())
+               srcu_transition_to_big(ssp);
 }
 EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
 
@@ -1390,6 +1682,17 @@ void __init srcu_init(void)
 {
        struct srcu_struct *ssp;
 
+       /* Decide on srcu_struct-size strategy. */
+       if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) {
+               if (nr_cpu_ids >= big_cpu_lim) {
+                       convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention.
+                       pr_info("%s: Setting srcu_struct sizes to big.\n", __func__);
+               } else {
+                       convert_to_big = SRCU_SIZING_NONE | SRCU_SIZING_CONTEND;
+                       pr_info("%s: Setting srcu_struct sizes based on contention.\n", __func__);
+               }
+       }
+
        /*
         * Once that is set, call_srcu() can follow the normal path and
         * queue delayed work. This must follow RCU workqueues creation
@@ -1400,6 +1703,8 @@ void __init srcu_init(void)
                ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
                                      work.work.entry);
                list_del_init(&ssp->work.work.entry);
+               if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL)
+                       ssp->srcu_size_state = SRCU_SIZE_ALLOC;
                queue_work(rcu_gp_wq, &ssp->work.work);
        }
 }
index 33d896d8590233e59fc8b73f626a45f20429e318..5cefc702158fefe6c0d8e1d1dba054501cd89183 100644 (file)
@@ -111,7 +111,7 @@ static void rcu_sync_func(struct rcu_head *rhp)
  * a slowpath during the update.  After this function returns, all
  * subsequent calls to rcu_sync_is_idle() will return false, which
  * tells readers to stay off their fastpaths.  A later call to
- * rcu_sync_exit() re-enables reader slowpaths.
+ * rcu_sync_exit() re-enables reader fastpaths.
  *
  * When called in isolation, rcu_sync_enter() must wait for a grace
  * period, however, closely spaced calls to rcu_sync_enter() can
index 99cf3a13954cfb17828fbbeeb884f11614a526a9..3925e32159b5a588c0afe195fe40be40c7e687d2 100644 (file)
@@ -46,7 +46,7 @@ struct rcu_tasks_percpu {
 
 /**
  * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
- * @cbs_wq: Wait queue allowing new callback to get kthread's attention.
+ * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
  * @cbs_gbl_lock: Lock protecting callback list.
  * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  * @gp_func: This flavor's grace-period-wait function.
@@ -77,7 +77,7 @@ struct rcu_tasks_percpu {
  * @kname: This flavor's kthread name.
  */
 struct rcu_tasks {
-       struct wait_queue_head cbs_wq;
+       struct rcuwait cbs_wait;
        raw_spinlock_t cbs_gbl_lock;
        int gp_state;
        int gp_sleep;
@@ -113,11 +113,11 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
 #define DEFINE_RCU_TASKS(rt_name, gp, call, n)                                         \
 static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {                        \
        .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock),            \
-       .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup),                        \
+       .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),                   \
 };                                                                                     \
 static struct rcu_tasks rt_name =                                                      \
 {                                                                                      \
-       .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq),                        \
+       .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait),                                \
        .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock),                 \
        .gp_func = gp,                                                                  \
        .call_func = call,                                                              \
@@ -143,6 +143,11 @@ module_param(rcu_task_ipi_delay, int, 0644);
 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
+#define RCU_TASK_STALL_INFO (HZ * 10)
+static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO;
+module_param(rcu_task_stall_info, int, 0644);
+static int rcu_task_stall_info_mult __read_mostly = 3;
+module_param(rcu_task_stall_info_mult, int, 0444);
 
 static int rcu_task_enqueue_lim __read_mostly = -1;
 module_param(rcu_task_enqueue_lim, int, 0444);
@@ -261,14 +266,16 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
        struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
 
        rtp = rtpcp->rtpp;
-       wake_up(&rtp->cbs_wq);
+       rcuwait_wake_up(&rtp->cbs_wait);
 }
 
 // Enqueue a callback for the specified flavor of Tasks RCU.
 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
                                   struct rcu_tasks *rtp)
 {
+       int chosen_cpu;
        unsigned long flags;
+       int ideal_cpu;
        unsigned long j;
        bool needadjust = false;
        bool needwake;
@@ -278,8 +285,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
        rhp->func = func;
        local_irq_save(flags);
        rcu_read_lock();
-       rtpcp = per_cpu_ptr(rtp->rtpcpu,
-                           smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift));
+       ideal_cpu = smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift);
+       chosen_cpu = cpumask_next(ideal_cpu - 1, cpu_possible_mask);
+       rtpcp = per_cpu_ptr(rtp->rtpcpu, chosen_cpu);
        if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled.
                raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
                j = jiffies;
@@ -460,7 +468,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
                }
        }
 
-       if (rcu_segcblist_empty(&rtpcp->cblist))
+       if (rcu_segcblist_empty(&rtpcp->cblist) || !cpu_possible(cpu))
                return;
        raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
        rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
@@ -509,7 +517,9 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
 
                /* If there were none, wait a bit and start over. */
-               wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp)));
+               rcuwait_wait_event(&rtp->cbs_wait,
+                                  (needgpcb = rcu_tasks_need_gpcb(rtp)),
+                                  TASK_IDLE);
 
                if (needgpcb & 0x2) {
                        // Wait for one grace period.
@@ -548,8 +558,15 @@ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 static void __init rcu_tasks_bootup_oddness(void)
 {
 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
+       int rtsimc;
+
        if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
                pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+       rtsimc = clamp(rcu_task_stall_info_mult, 1, 10);
+       if (rtsimc != rcu_task_stall_info_mult) {
+               pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc);
+               rcu_task_stall_info_mult = rtsimc;
+       }
 #endif /* #ifdef CONFIG_TASKS_RCU */
 #ifdef CONFIG_TASKS_RCU
        pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
@@ -568,7 +585,17 @@ static void __init rcu_tasks_bootup_oddness(void)
 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 {
-       struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each...
+       int cpu;
+       bool havecbs = false;
+
+       for_each_possible_cpu(cpu) {
+               struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+               if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
+                       havecbs = true;
+                       break;
+               }
+       }
        pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
                rtp->kname,
                tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
@@ -576,7 +603,7 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
                data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
                data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
                ".k"[!!data_race(rtp->kthread_ptr)],
-               ".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))],
+               ".C"[havecbs],
                s);
 }
 #endif // #ifndef CONFIG_TINY_RCU
@@ -592,10 +619,15 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t);
 /* Wait for one RCU-tasks grace period. */
 static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 {
-       struct task_struct *g, *t;
-       unsigned long lastreport;
-       LIST_HEAD(holdouts);
+       struct task_struct *g;
        int fract;
+       LIST_HEAD(holdouts);
+       unsigned long j;
+       unsigned long lastinfo;
+       unsigned long lastreport;
+       bool reported = false;
+       int rtsi;
+       struct task_struct *t;
 
        set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
        rtp->pregp_func();
@@ -621,30 +653,50 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
         * is empty, we are done.
         */
        lastreport = jiffies;
+       lastinfo = lastreport;
+       rtsi = READ_ONCE(rcu_task_stall_info);
 
        // Start off with initial wait and slowly back off to 1 HZ wait.
        fract = rtp->init_fract;
 
        while (!list_empty(&holdouts)) {
+               ktime_t exp;
                bool firstreport;
                bool needreport;
                int rtst;
 
-               /* Slowly back off waiting for holdouts */
+               // Slowly back off waiting for holdouts
                set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
-               schedule_timeout_idle(fract);
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+                       schedule_timeout_idle(fract);
+               } else {
+                       exp = jiffies_to_nsecs(fract);
+                       __set_current_state(TASK_IDLE);
+                       schedule_hrtimeout_range(&exp, jiffies_to_nsecs(HZ / 2), HRTIMER_MODE_REL_HARD);
+               }
 
                if (fract < HZ)
                        fract++;
 
                rtst = READ_ONCE(rcu_task_stall_timeout);
                needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
-               if (needreport)
+               if (needreport) {
                        lastreport = jiffies;
+                       reported = true;
+               }
                firstreport = true;
                WARN_ON(signal_pending(current));
                set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
                rtp->holdouts_func(&holdouts, needreport, &firstreport);
+
+               // Print pre-stall informational messages if needed.
+               j = jiffies;
+               if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
+                       lastinfo = j;
+                       rtsi = rtsi * rcu_task_stall_info_mult;
+                       pr_info("%s: %s grace period %lu is %lu jiffies old.\n",
+                               __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
+               }
        }
 
        set_tasks_gp_state(rtp, RTGS_POST_GP);
@@ -950,6 +1002,9 @@ static void rcu_tasks_be_rude(struct work_struct *work)
 // Wait for one rude RCU-tasks grace period.
 static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
 {
+       if (num_online_cpus() <= 1)
+               return; // Fastpath for only one CPU.
+
        rtp->n_ipis += cpumask_weight(cpu_online_mask);
        schedule_on_each_cpu(rcu_tasks_be_rude);
 }
index a4b8189455d5eca9694101d779346a269597994b..c25ba442044a6e9452a43f617a93abd4a8b6ab08 100644 (file)
@@ -1679,6 +1679,8 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
        rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
        if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
                WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
+       if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
+               WRITE_ONCE(rdp->last_sched_clock, jiffies);
        WRITE_ONCE(rdp->gpwrap, false);
        rcu_gpnum_ovf(rnp, rdp);
        return ret;
@@ -1705,11 +1707,37 @@ static void note_gp_changes(struct rcu_data *rdp)
                rcu_gp_kthread_wake();
 }
 
+static atomic_t *rcu_gp_slow_suppress;
+
+/* Register a counter to suppress debugging grace-period delays. */
+void rcu_gp_slow_register(atomic_t *rgssp)
+{
+       WARN_ON_ONCE(rcu_gp_slow_suppress);
+
+       WRITE_ONCE(rcu_gp_slow_suppress, rgssp);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_register);
+
+/* Unregister a counter, with NULL for not caring which. */
+void rcu_gp_slow_unregister(atomic_t *rgssp)
+{
+       WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress);
+
+       WRITE_ONCE(rcu_gp_slow_suppress, NULL);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister);
+
+static bool rcu_gp_slow_is_suppressed(void)
+{
+       atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress);
+
+       return rgssp && atomic_read(rgssp);
+}
+
 static void rcu_gp_slow(int delay)
 {
-       if (delay > 0 &&
-           !(rcu_seq_ctr(rcu_state.gp_seq) %
-             (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
+       if (!rcu_gp_slow_is_suppressed() && delay > 0 &&
+           !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
                schedule_timeout_idle(delay);
 }
 
@@ -2096,14 +2124,29 @@ static noinline void rcu_gp_cleanup(void)
        /* Advance CBs to reduce false positives below. */
        offloaded = rcu_rdp_is_offloaded(rdp);
        if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
+
+               // We get here if a grace period was needed (“needgp”)
+               // and the above call to rcu_accelerate_cbs() did not set
+               // the RCU_GP_FLAG_INIT bit in ->gp_state (which records
+               // the need for another grace period).  The purpose
+               // of the “offloaded” check is to avoid invoking
+               // rcu_accelerate_cbs() on an offloaded CPU because we do not
+               // hold the ->nocb_lock needed to safely access an offloaded
+               // ->cblist.  We do not want to acquire that lock because
+               // it can be heavily contended during callback floods.
+
                WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
                WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
-               trace_rcu_grace_period(rcu_state.name,
-                                      rcu_state.gp_seq,
-                                      TPS("newreq"));
+               trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
        } else {
-               WRITE_ONCE(rcu_state.gp_flags,
-                          rcu_state.gp_flags & RCU_GP_FLAG_INIT);
+
+               // We get here either if there is no need for an
+               // additional grace period or if rcu_accelerate_cbs() has
+               // already set the RCU_GP_FLAG_INIT bit in ->gp_flags. 
+               // So all we need to do is to clear all of the other
+               // ->gp_flags bits.
+
+               WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT);
        }
        raw_spin_unlock_irq_rcu_node(rnp);
 
@@ -2609,6 +2652,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
  */
 void rcu_sched_clock_irq(int user)
 {
+       unsigned long j;
+
+       if (IS_ENABLED(CONFIG_PROVE_RCU)) {
+               j = jiffies;
+               WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
+               __this_cpu_write(rcu_data.last_sched_clock, j);
+       }
        trace_rcu_utilization(TPS("Start scheduler-tick"));
        lockdep_assert_irqs_disabled();
        raw_cpu_inc(rcu_data.ticks_this_gp);
@@ -2624,6 +2674,8 @@ void rcu_sched_clock_irq(int user)
        rcu_flavor_sched_clock_irq(user);
        if (rcu_pending(user))
                invoke_rcu_core();
+       if (user)
+               rcu_tasks_classic_qs(current, false);
        lockdep_assert_irqs_disabled();
 
        trace_rcu_utilization(TPS("End scheduler-tick"));
@@ -3717,7 +3769,9 @@ static int rcu_blocking_is_gp(void)
 {
        int ret;
 
-       if (IS_ENABLED(CONFIG_PREEMPTION))
+       // Invoking preempt_model_*() too early gets a splat.
+       if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE ||
+           preempt_model_full() || preempt_model_rt())
                return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
        might_sleep();  /* Check for RCU read-side critical section. */
        preempt_disable();
@@ -4179,6 +4233,7 @@ rcu_boot_init_percpu_data(int cpu)
        rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
        rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
        rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
+       rdp->last_sched_clock = jiffies;
        rdp->cpu = cpu;
        rcu_boot_init_nocb_percpu_data(rdp);
 }
@@ -4471,6 +4526,51 @@ static int rcu_pm_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
 
+#ifdef CONFIG_RCU_EXP_KTHREAD
+struct kthread_worker *rcu_exp_gp_kworker;
+struct kthread_worker *rcu_exp_par_gp_kworker;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+       const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
+       const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
+       struct sched_param param = { .sched_priority = kthread_prio };
+
+       rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
+       if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+               pr_err("Failed to create %s!\n", gp_kworker_name);
+               return;
+       }
+
+       rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
+       if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
+               pr_err("Failed to create %s!\n", par_gp_kworker_name);
+               kthread_destroy_worker(rcu_exp_gp_kworker);
+               return;
+       }
+
+       sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
+       sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
+                                  &param);
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+struct workqueue_struct *rcu_par_gp_wq;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+       rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+       WARN_ON(!rcu_par_gp_wq);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
 /*
  * Spawn the kthreads that handle RCU's grace periods.
  */
@@ -4480,6 +4580,7 @@ static int __init rcu_spawn_gp_kthread(void)
        struct rcu_node *rnp;
        struct sched_param sp;
        struct task_struct *t;
+       struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
        rcu_scheduler_fully_active = 1;
        t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
@@ -4497,9 +4598,17 @@ static int __init rcu_spawn_gp_kthread(void)
        smp_store_release(&rcu_state.gp_kthread, t);  /* ^^^ */
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        wake_up_process(t);
-       rcu_spawn_nocb_kthreads();
-       rcu_spawn_boost_kthreads();
+       /* This is a pre-SMP initcall, we expect a single CPU */
+       WARN_ON(num_online_cpus() > 1);
+       /*
+        * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu()
+        * due to rcu_scheduler_fully_active.
+        */
+       rcu_spawn_cpu_nocb_kthread(smp_processor_id());
+       rcu_spawn_one_boost_kthread(rdp->mynode);
        rcu_spawn_core_kthreads();
+       /* Create kthread worker for expedited GPs */
+       rcu_start_exp_gp_kworkers();
        return 0;
 }
 early_initcall(rcu_spawn_gp_kthread);
@@ -4745,7 +4854,6 @@ static void __init rcu_dump_rcu_node_tree(void)
 }
 
 struct workqueue_struct *rcu_gp_wq;
-struct workqueue_struct *rcu_par_gp_wq;
 
 static void __init kfree_rcu_batch_init(void)
 {
@@ -4782,7 +4890,7 @@ static void __init kfree_rcu_batch_init(void)
 
 void __init rcu_init(void)
 {
-       int cpu;
+       int cpu = smp_processor_id();
 
        rcu_early_boot_tests();
 
@@ -4802,17 +4910,15 @@ void __init rcu_init(void)
         * or the scheduler are operational.
         */
        pm_notifier(rcu_pm_notify, 0);
-       for_each_online_cpu(cpu) {
-               rcutree_prepare_cpu(cpu);
-               rcu_cpu_starting(cpu);
-               rcutree_online_cpu(cpu);
-       }
+       WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
+       rcutree_prepare_cpu(cpu);
+       rcu_cpu_starting(cpu);
+       rcutree_online_cpu(cpu);
 
        /* Create workqueue for Tree SRCU and for expedited GPs. */
        rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
        WARN_ON(!rcu_gp_wq);
-       rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
-       WARN_ON(!rcu_par_gp_wq);
+       rcu_alloc_par_gp_wq();
 
        /* Fill in default value for rcutree.qovld boot parameter. */
        /* -After- the rcu_node ->lock fields are initialized! */
index 926673ebe355f123c1b0a76d6aaed4c211f9dcd6..2ccf5845957df4201a814de0540b8fbabc6e9412 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/cache.h>
+#include <linux/kthread.h>
 #include <linux/spinlock.h>
 #include <linux/rtmutex.h>
 #include <linux/threads.h>
 /* Communicate arguments to a workqueue handler. */
 struct rcu_exp_work {
        unsigned long rew_s;
+#ifdef CONFIG_RCU_EXP_KTHREAD
+       struct kthread_work rew_work;
+#else
        struct work_struct rew_work;
+#endif /* CONFIG_RCU_EXP_KTHREAD */
 };
 
 /* RCU's kthread states for tracing. */
@@ -254,6 +259,7 @@ struct rcu_data {
        unsigned long rcu_onl_gp_seq;   /* ->gp_seq at last online. */
        short rcu_onl_gp_flags;         /* ->gp_flags at last online. */
        unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
+       unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
 
        int cpu;
 };
@@ -364,6 +370,7 @@ struct rcu_state {
        arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
                                                /* Synchronize offline with */
                                                /*  GP pre-initialization. */
+       int nocb_is_setup;                      /* nocb is setup from boot */
 };
 
 /* Values for rcu_state structure's gp_flags field. */
@@ -421,7 +428,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
 static bool rcu_is_callbacks_kthread(void);
 static void rcu_cpu_kthread_setup(unsigned int cpu);
 static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
-static void __init rcu_spawn_boost_kthreads(void);
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
 static void rcu_preempt_deferred_qs(struct task_struct *t);
@@ -439,7 +445,6 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
 static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_cpu_nocb_kthread(int cpu);
-static void __init rcu_spawn_nocb_kthreads(void);
 static void show_rcu_nocb_state(struct rcu_data *rdp);
 static void rcu_nocb_lock(struct rcu_data *rdp);
 static void rcu_nocb_unlock(struct rcu_data *rdp);
index 60197ea24ceb9c73a17382b3c031be7fff582557..0f70f62039a909067eb7cc9a7884bb09a6e3e612 100644 (file)
@@ -334,15 +334,13 @@ fastpath:
  * Select the CPUs within the specified rcu_node that the upcoming
  * expedited grace period needs to wait for.
  */
-static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
 {
        int cpu;
        unsigned long flags;
        unsigned long mask_ofl_test;
        unsigned long mask_ofl_ipi;
        int ret;
-       struct rcu_exp_work *rewp =
-               container_of(wp, struct rcu_exp_work, rew_work);
        struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
 
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -417,13 +415,119 @@ retry_ipi:
                rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
 }
 
+static void rcu_exp_sel_wait_wake(unsigned long s);
+
+#ifdef CONFIG_RCU_EXP_KTHREAD
+static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
+{
+       struct rcu_exp_work *rewp =
+               container_of(wp, struct rcu_exp_work, rew_work);
+
+       __sync_rcu_exp_select_node_cpus(rewp);
+}
+
+static inline bool rcu_gp_par_worker_started(void)
+{
+       return !!READ_ONCE(rcu_exp_par_gp_kworker);
+}
+
+static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
+{
+       kthread_init_work(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+       /*
+        * Use rcu_exp_par_gp_kworker, because flushing a work item from
+        * another work item on the same kthread worker can result in
+        * deadlock.
+        */
+       kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
+}
+
+static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
+{
+       kthread_flush_work(&rnp->rew.rew_work);
+}
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct kthread_work *wp)
+{
+       struct rcu_exp_work *rewp;
+
+       rewp = container_of(wp, struct rcu_exp_work, rew_work);
+       rcu_exp_sel_wait_wake(rewp->rew_s);
+}
+
+static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
+{
+       kthread_init_work(&rew->rew_work, wait_rcu_exp_gp);
+       kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
+}
+
+static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+{
+       struct rcu_exp_work *rewp =
+               container_of(wp, struct rcu_exp_work, rew_work);
+
+       __sync_rcu_exp_select_node_cpus(rewp);
+}
+
+static inline bool rcu_gp_par_worker_started(void)
+{
+       return !!READ_ONCE(rcu_par_gp_wq);
+}
+
+static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
+{
+       int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
+
+       INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+       /* If all offline, queue the work on an unbound CPU. */
+       if (unlikely(cpu > rnp->grphi - rnp->grplo))
+               cpu = WORK_CPU_UNBOUND;
+       else
+               cpu += rnp->grplo;
+       queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
+}
+
+static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
+{
+       flush_work(&rnp->rew.rew_work);
+}
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct work_struct *wp)
+{
+       struct rcu_exp_work *rewp;
+
+       rewp = container_of(wp, struct rcu_exp_work, rew_work);
+       rcu_exp_sel_wait_wake(rewp->rew_s);
+}
+
+static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
+{
+       INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
+       queue_work(rcu_gp_wq, &rew->rew_work);
+}
+
+static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
+{
+       destroy_work_on_stack(&rew->rew_work);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
 /*
  * Select the nodes that the upcoming expedited grace period needs
  * to wait for.
  */
 static void sync_rcu_exp_select_cpus(void)
 {
-       int cpu;
        struct rcu_node *rnp;
 
        trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
@@ -435,28 +539,21 @@ static void sync_rcu_exp_select_cpus(void)
                rnp->exp_need_flush = false;
                if (!READ_ONCE(rnp->expmask))
                        continue; /* Avoid early boot non-existent wq. */
-               if (!READ_ONCE(rcu_par_gp_wq) ||
+               if (!rcu_gp_par_worker_started() ||
                    rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
                    rcu_is_last_leaf_node(rnp)) {
-                       /* No workqueues yet or last leaf, do direct call. */
+                       /* No worker started yet or last leaf, do direct call. */
                        sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
                        continue;
                }
-               INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
-               cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
-               /* If all offline, queue the work on an unbound CPU. */
-               if (unlikely(cpu > rnp->grphi - rnp->grplo))
-                       cpu = WORK_CPU_UNBOUND;
-               else
-                       cpu += rnp->grplo;
-               queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
+               sync_rcu_exp_select_cpus_queue_work(rnp);
                rnp->exp_need_flush = true;
        }
 
-       /* Wait for workqueue jobs (if any) to complete. */
+       /* Wait for jobs (if any) to complete. */
        rcu_for_each_leaf_node(rnp)
                if (rnp->exp_need_flush)
-                       flush_work(&rnp->rew.rew_work);
+                       sync_rcu_exp_select_cpus_flush_work(rnp);
 }
 
 /*
@@ -496,7 +593,7 @@ static void synchronize_rcu_expedited_wait(void)
        struct rcu_node *rnp_root = rcu_get_root();
 
        trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
-       jiffies_stall = rcu_jiffies_till_stall_check();
+       jiffies_stall = rcu_exp_jiffies_till_stall_check();
        jiffies_start = jiffies;
        if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
                if (synchronize_rcu_expedited_wait_once(1))
@@ -571,7 +668,7 @@ static void synchronize_rcu_expedited_wait(void)
                                dump_cpu_task(cpu);
                        }
                }
-               jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+               jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
        }
 }
 
@@ -622,17 +719,6 @@ static void rcu_exp_sel_wait_wake(unsigned long s)
        rcu_exp_wait_wake(s);
 }
 
-/*
- * Work-queue handler to drive an expedited grace period forward.
- */
-static void wait_rcu_exp_gp(struct work_struct *wp)
-{
-       struct rcu_exp_work *rewp;
-
-       rewp = container_of(wp, struct rcu_exp_work, rew_work);
-       rcu_exp_sel_wait_wake(rewp->rew_s);
-}
-
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -848,20 +934,19 @@ void synchronize_rcu_expedited(void)
        } else {
                /* Marshall arguments & schedule the expedited grace period. */
                rew.rew_s = s;
-               INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
-               queue_work(rcu_gp_wq, &rew.rew_work);
+               synchronize_rcu_expedited_queue_work(&rew);
        }
 
        /* Wait for expedited grace period to complete. */
        rnp = rcu_get_root();
        wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
                   sync_exp_work_done(s));
-       smp_mb(); /* Workqueue actions happen before return. */
+       smp_mb(); /* Work actions happen before return. */
 
        /* Let the next expedited grace period start. */
        mutex_unlock(&rcu_state.exp_mutex);
 
        if (likely(!boottime))
-               destroy_work_on_stack(&rew.rew_work);
+               synchronize_rcu_expedited_destroy_work(&rew);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
index 636d0546a4e932e57096e225ddd9698d9cd9eab4..46694e13398a3ee44746723c44f49e40312906eb 100644 (file)
@@ -60,9 +60,6 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
  * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
  * If the list is invalid, a warning is emitted and all CPUs are offloaded.
  */
-
-static bool rcu_nocb_is_setup;
-
 static int __init rcu_nocb_setup(char *str)
 {
        alloc_bootmem_cpumask_var(&rcu_nocb_mask);
@@ -72,7 +69,7 @@ static int __init rcu_nocb_setup(char *str)
                        cpumask_setall(rcu_nocb_mask);
                }
        }
-       rcu_nocb_is_setup = true;
+       rcu_state.nocb_is_setup = true;
        return 1;
 }
 __setup("rcu_nocbs", rcu_nocb_setup);
@@ -215,14 +212,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
        init_swait_queue_head(&rnp->nocb_gp_wq[1]);
 }
 
-/* Is the specified CPU a no-CBs CPU? */
-bool rcu_is_nocb_cpu(int cpu)
-{
-       if (cpumask_available(rcu_nocb_mask))
-               return cpumask_test_cpu(cpu, rcu_nocb_mask);
-       return false;
-}
-
 static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
                           struct rcu_data *rdp,
                           bool force, unsigned long flags)
@@ -1180,10 +1169,10 @@ void __init rcu_init_nohz(void)
                                return;
                        }
                }
-               rcu_nocb_is_setup = true;
+               rcu_state.nocb_is_setup = true;
        }
 
-       if (!rcu_nocb_is_setup)
+       if (!rcu_state.nocb_is_setup)
                return;
 
 #if defined(CONFIG_NO_HZ_FULL)
@@ -1241,7 +1230,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
        struct task_struct *t;
        struct sched_param sp;
 
-       if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
+       if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup)
                return;
 
        /* If there already is an rcuo kthread, then nothing to do. */
@@ -1277,22 +1266,6 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
        WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
 }
 
-/*
- * Once the scheduler is running, spawn rcuo kthreads for all online
- * no-CBs CPUs.  This assumes that the early_initcall()s happen before
- * non-boot CPUs come online -- if this changes, we will need to add
- * some mutual exclusion.
- */
-static void __init rcu_spawn_nocb_kthreads(void)
-{
-       int cpu;
-
-       if (rcu_nocb_is_setup) {
-               for_each_online_cpu(cpu)
-                       rcu_spawn_cpu_nocb_kthread(cpu);
-       }
-}
-
 /* How many CB CPU IDs per GP kthread?  Default of -1 for sqrt(nr_cpu_ids). */
 static int rcu_nocb_gp_stride = -1;
 module_param(rcu_nocb_gp_stride, int, 0444);
@@ -1549,10 +1522,6 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 {
 }
 
-static void __init rcu_spawn_nocb_kthreads(void)
-{
-}
-
 static void show_rcu_nocb_state(struct rcu_data *rdp)
 {
 }
index 8360d86db1c028b39ecb0fe40cf6462aefd12f41..c8ba0fe17267c1c0b68e438616c08a00877ea409 100644 (file)
@@ -486,6 +486,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
        t->rcu_read_unlock_special.s = 0;
        if (special.b.need_qs) {
                if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+                       rdp->cpu_no_qs.b.norm = false;
                        rcu_report_qs_rdp(rdp);
                        udelay(rcu_unlock_delay);
                } else {
@@ -660,7 +661,13 @@ static void rcu_read_unlock_special(struct task_struct *t)
                            expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
                                // Get scheduler to re-evaluate and call hooks.
                                // If !IRQ_WORK, FQS scan will eventually IPI.
-                               init_irq_work(&rdp->defer_qs_iw, rcu_preempt_deferred_qs_handler);
+                               if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) &&
+                                   IS_ENABLED(CONFIG_PREEMPT_RT))
+                                       rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(
+                                                               rcu_preempt_deferred_qs_handler);
+                               else
+                                       init_irq_work(&rdp->defer_qs_iw,
+                                                     rcu_preempt_deferred_qs_handler);
                                rdp->defer_qs_iw_pending = true;
                                irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
                        }
@@ -1124,7 +1131,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
        __releases(rnp->lock)
 {
        raw_lockdep_assert_held_rcu_node(rnp);
-       if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
+       if (!rnp->boost_kthread_task ||
+           (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) {
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
                return;
        }
@@ -1226,18 +1234,6 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
        free_cpumask_var(cm);
 }
 
-/*
- * Spawn boost kthreads -- called as soon as the scheduler is running.
- */
-static void __init rcu_spawn_boost_kthreads(void)
-{
-       struct rcu_node *rnp;
-
-       rcu_for_each_leaf_node(rnp)
-               if (rcu_rnp_online_cpus(rnp))
-                       rcu_spawn_one_boost_kthread(rnp);
-}
-
 #else /* #ifdef CONFIG_RCU_BOOST */
 
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1263,10 +1259,6 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 }
 
-static void __init rcu_spawn_boost_kthreads(void)
-{
-}
-
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 /*
index 0c5d8516516af5780c266c87b4705b84739eeb16..a001e1e7a99269c9968059a00cff25ea496dbc99 100644 (file)
@@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly;
 #define RCU_STALL_MIGHT_DIV            8
 #define RCU_STALL_MIGHT_MIN            (2 * HZ)
 
+int rcu_exp_jiffies_till_stall_check(void)
+{
+       int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout);
+       int exp_stall_delay_delta = 0;
+       int till_stall_check;
+
+       // Zero says to use rcu_cpu_stall_timeout, but in milliseconds.
+       if (!cpu_stall_timeout)
+               cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check());
+
+       // Limit check must be consistent with the Kconfig limits for
+       // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
+       // The minimum clamped value is "2UL", because at least one full
+       // tick has to be guaranteed.
+       till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
+
+       if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
+               WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
+
+#ifdef CONFIG_PROVE_RCU
+       /* Add extra ~25% out of till_stall_check. */
+       exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1;
+#endif
+
+       return till_stall_check + exp_stall_delay_delta;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check);
+
 /* Limit-check stall timeouts specified at boottime and runtime. */
 int rcu_jiffies_till_stall_check(void)
 {
@@ -565,9 +593,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 
        for_each_possible_cpu(cpu)
                totqlen += rcu_get_n_cbs_cpu(cpu);
-       pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
+       pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
               smp_processor_id(), (long)(jiffies - gps),
-              (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+              (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
        if (ndetected) {
                rcu_dump_cpu_stacks();
 
@@ -626,9 +654,9 @@ static void print_cpu_stall(unsigned long gps)
        raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
        for_each_possible_cpu(cpu)
                totqlen += rcu_get_n_cbs_cpu(cpu);
-       pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
+       pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
                jiffies - gps,
-               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
 
        rcu_check_gp_kthread_expired_fqs_timer();
        rcu_check_gp_kthread_starvation();
index 180ff9c41fa87e228e5df9f435b7d4f0a1dfb3e6..fc7fef57560646d5a8e64e757434e4d4ad988840 100644 (file)
@@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
 module_param(rcu_cpu_stall_suppress, int, 0644);
 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_timeout, int, 0644);
+int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
+module_param(rcu_exp_cpu_stall_timeout, int, 0644);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 
 // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
index dcb0410950e45cd6eb0d436c185f1dd281631614..5d113aa59e7732ecb285ac746a5eee4fcb111932 100644 (file)
@@ -267,9 +267,10 @@ static void scf_handler(void *scfc_in)
        }
        this_cpu_inc(scf_invoked_count);
        if (longwait <= 0) {
-               if (!(r & 0xffc0))
+               if (!(r & 0xffc0)) {
                        udelay(r & 0x3f);
-               goto out;
+                       goto out;
+               }
        }
        if (r & 0xfff)
                goto out;
index e0104b45029ad10c79c74aae4663de21304dd064..d9dc9ab3773f2b9935f26cdfbf890b84132d2d82 100644 (file)
@@ -15,6 +15,7 @@
 /* Headers: */
 #include <linux/sched/clock.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/hotplug.h>
 #include <linux/sched/posix-timers.h>
 #include <linux/sched/rt.h>
 
@@ -31,6 +32,7 @@
 #include <uapi/linux/sched/types.h>
 
 #include "sched.h"
+#include "smp.h"
 
 #include "autogroup.h"
 #include "stats.h"
index eec0849b2aae54c5667e2b7ded24b65987848b63..99bdd96f454f4eba861b11b0aae6991d348dce0e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/isolation.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/nohz.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/rseq_api.h>
 #include <linux/sched/task_stack.h>
index d9272d9061a3b54ba9521273904388a543e5a91b..e374c0c923daec23b6ea5db5667465ce604b2ae0 100644 (file)
@@ -287,7 +287,7 @@ again:
        clock = wrap_max(clock, min_clock);
        clock = wrap_min(clock, max_clock);
 
-       if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
+       if (!try_cmpxchg64(&scd->clock, &old_clock, clock))
                goto again;
 
        return clock;
@@ -349,7 +349,7 @@ again:
                val = remote_clock;
        }
 
-       if (cmpxchg64(ptr, old_val, val) != old_val)
+       if (!try_cmpxchg64(ptr, &old_val, val))
                goto again;
 
        return val;
index 51efaabac3e4303c536e88f9de5df8eae2f72eeb..a247f8d9d4175627d9bed0fa8d590789ba9de6d0 100644 (file)
 #include <linux/topology.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/cond_resched.h>
+#include <linux/sched/cputime.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/init.h>
 #include <linux/sched/isolation.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/mm.h>
@@ -610,10 +613,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
                swap(rq1, rq2);
 
        raw_spin_rq_lock(rq1);
-       if (__rq_lockp(rq1) == __rq_lockp(rq2))
-               return;
+       if (__rq_lockp(rq1) != __rq_lockp(rq2))
+               raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
 
-       raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+       double_rq_clock_clear_update(rq1, rq2);
 }
 #endif
 
@@ -2190,7 +2193,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
        if (p->sched_class == rq->curr->sched_class)
                rq->curr->sched_class->check_preempt_curr(rq, p, flags);
-       else if (p->sched_class > rq->curr->sched_class)
+       else if (sched_class_above(p->sched_class, rq->curr->sched_class))
                resched_curr(rq);
 
        /*
@@ -2408,7 +2411,7 @@ static int migration_cpu_stop(void *data)
         * __migrate_task() such that we will not miss enforcing cpus_ptr
         * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
         */
-       flush_smp_call_function_from_idle();
+       flush_smp_call_function_queue();
 
        raw_spin_lock(&p->pi_lock);
        rq_lock(rq, &rf);
@@ -5689,7 +5692,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
         * higher scheduling class, because otherwise those lose the
         * opportunity to pull in more work from other CPUs.
         */
-       if (likely(prev->sched_class <= &fair_sched_class &&
+       if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
                   rq->nr_running == rq->cfs.h_nr_running)) {
 
                p = pick_next_task_fair(rq, prev, rf);
@@ -6382,7 +6385,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
                migrate_disable_switch(rq, prev);
                psi_sched_switch(prev, next, !task_on_rq_queued(prev));
 
-               trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
+               trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
 
                /* Also unlocks the rq: */
                rq = context_switch(rq, prev, next, &rf);
@@ -8415,6 +8418,18 @@ static void __init preempt_dynamic_init(void)
        }
 }
 
+#define PREEMPT_MODEL_ACCESSOR(mode) \
+       bool preempt_model_##mode(void)                                          \
+       {                                                                        \
+               WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \
+               return preempt_dynamic_mode == preempt_dynamic_##mode;           \
+       }                                                                        \
+       EXPORT_SYMBOL_GPL(preempt_model_##mode)
+
+PREEMPT_MODEL_ACCESSOR(none);
+PREEMPT_MODEL_ACCESSOR(voluntary);
+PREEMPT_MODEL_ACCESSOR(full);
+
 #else /* !CONFIG_PREEMPT_DYNAMIC */
 
 static inline void preempt_dynamic_init(void) { }
@@ -9457,11 +9472,11 @@ void __init sched_init(void)
        int i;
 
        /* Make sure the linker didn't screw up */
-       BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
-              &fair_sched_class + 1 != &rt_sched_class ||
-              &rt_sched_class + 1   != &dl_sched_class);
+       BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
+              &fair_sched_class != &rt_sched_class + 1 ||
+              &rt_sched_class   != &dl_sched_class + 1);
 #ifdef CONFIG_SMP
-       BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
+       BUG_ON(&dl_sched_class != &stop_sched_class + 1);
 #endif
 
        wait_bit_init();
index fb4255ae0b2c856ca677df3c84aab1ddceb134ae..936817ae142f92dd458054d340e9c3b03feeddcd 100644 (file)
@@ -1220,8 +1220,6 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
        return (dl_se->runtime <= 0);
 }
 
-extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
-
 /*
  * This function implements the GRUB accounting rule:
  * according to the GRUB reclaiming algorithm, the runtime is
@@ -1832,6 +1830,7 @@ out:
 
 static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
 {
+       struct rq_flags rf;
        struct rq *rq;
 
        if (READ_ONCE(p->__state) != TASK_WAKING)
@@ -1843,7 +1842,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
         * from try_to_wake_up(). Hence, p->pi_lock is locked, but
         * rq->lock is not... So, lock it
         */
-       raw_spin_rq_lock(rq);
+       rq_lock(rq, &rf);
        if (p->dl.dl_non_contending) {
                update_rq_clock(rq);
                sub_running_bw(&p->dl, &rq->dl);
@@ -1859,7 +1858,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
                        put_task_struct(p);
        }
        sub_rq_bw(&p->dl, &rq->dl);
-       raw_spin_rq_unlock(rq);
+       rq_unlock(rq, &rf);
 }
 
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
@@ -2319,13 +2318,7 @@ retry:
 
        deactivate_task(rq, next_task, 0);
        set_task_cpu(next_task, later_rq->cpu);
-
-       /*
-        * Update the later_rq clock here, because the clock is used
-        * by the cpufreq_update_util() inside __add_running_bw().
-        */
-       update_rq_clock(later_rq);
-       activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
+       activate_task(later_rq, next_task, 0);
        ret = 1;
 
        resched_curr(later_rq);
index a68482d66535588d0b8dd9b0738482fda17f2fb6..906b2c7c48d1f4765ef6387926316f3832514e89 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/sched/cond_resched.h>
 #include <linux/sched/cputime.h>
 #include <linux/sched/isolation.h>
+#include <linux/sched/nohz.h>
 
 #include <linux/cpuidle.h>
 #include <linux/interrupt.h>
@@ -313,19 +314,6 @@ const struct sched_class fair_sched_class;
 #define for_each_sched_entity(se) \
                for (; se; se = se->parent)
 
-static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
-{
-       if (!path)
-               return;
-
-       if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
-               autogroup_path(cfs_rq->tg, path, len);
-       else if (cfs_rq && cfs_rq->tg->css.cgroup)
-               cgroup_path(cfs_rq->tg->css.cgroup, path, len);
-       else
-               strlcpy(path, "(null)", len);
-}
-
 static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
        struct rq *rq = rq_of(cfs_rq);
@@ -493,12 +481,6 @@ static int se_is_idle(struct sched_entity *se)
 #define for_each_sched_entity(se) \
                for (; se; se = NULL)
 
-static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
-{
-       if (path)
-               strlcpy(path, "(null)", len);
-}
-
 static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
        return true;
@@ -4846,11 +4828,11 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 
        cfs_rq->throttle_count--;
        if (!cfs_rq->throttle_count) {
-               cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
-                                            cfs_rq->throttled_clock_task;
+               cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
+                                            cfs_rq->throttled_clock_pelt;
 
                /* Add cfs_rq with load or one or more already running entities to the list */
-               if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
+               if (!cfs_rq_is_decayed(cfs_rq))
                        list_add_leaf_cfs_rq(cfs_rq);
        }
 
@@ -4864,7 +4846,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
 
        /* group is entering throttled state, stop time */
        if (!cfs_rq->throttle_count) {
-               cfs_rq->throttled_clock_task = rq_clock_task(rq);
+               cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
                list_del_leaf_cfs_rq(cfs_rq);
        }
        cfs_rq->throttle_count++;
@@ -5308,7 +5290,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
        pcfs_rq = tg->parent->cfs_rq[cpu];
 
        cfs_rq->throttle_count = pcfs_rq->throttle_count;
-       cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+       cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu));
 }
 
 /* conditionally throttle active cfs_rq's from put_prev_entity() */
@@ -6544,108 +6526,19 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 }
 
 /*
- * cpu_util_without: compute cpu utilization without any contributions from *p
- * @cpu: the CPU which utilization is requested
- * @p: the task which utilization should be discounted
- *
- * The utilization of a CPU is defined by the utilization of tasks currently
- * enqueued on that CPU as well as tasks which are currently sleeping after an
- * execution on that CPU.
- *
- * This method returns the utilization of the specified CPU by discounting the
- * utilization of the specified task, whenever the task is currently
- * contributing to the CPU utilization.
- */
-static unsigned long cpu_util_without(int cpu, struct task_struct *p)
-{
-       struct cfs_rq *cfs_rq;
-       unsigned int util;
-
-       /* Task has no contribution or is new */
-       if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
-               return cpu_util_cfs(cpu);
-
-       cfs_rq = &cpu_rq(cpu)->cfs;
-       util = READ_ONCE(cfs_rq->avg.util_avg);
-
-       /* Discount task's util from CPU's util */
-       lsub_positive(&util, task_util(p));
-
-       /*
-        * Covered cases:
-        *
-        * a) if *p is the only task sleeping on this CPU, then:
-        *      cpu_util (== task_util) > util_est (== 0)
-        *    and thus we return:
-        *      cpu_util_without = (cpu_util - task_util) = 0
-        *
-        * b) if other tasks are SLEEPING on this CPU, which is now exiting
-        *    IDLE, then:
-        *      cpu_util >= task_util
-        *      cpu_util > util_est (== 0)
-        *    and thus we discount *p's blocked utilization to return:
-        *      cpu_util_without = (cpu_util - task_util) >= 0
-        *
-        * c) if other tasks are RUNNABLE on that CPU and
-        *      util_est > cpu_util
-        *    then we use util_est since it returns a more restrictive
-        *    estimation of the spare capacity on that CPU, by just
-        *    considering the expected utilization of tasks already
-        *    runnable on that CPU.
-        *
-        * Cases a) and b) are covered by the above code, while case c) is
-        * covered by the following code when estimated utilization is
-        * enabled.
-        */
-       if (sched_feat(UTIL_EST)) {
-               unsigned int estimated =
-                       READ_ONCE(cfs_rq->avg.util_est.enqueued);
-
-               /*
-                * Despite the following checks we still have a small window
-                * for a possible race, when an execl's select_task_rq_fair()
-                * races with LB's detach_task():
-                *
-                *   detach_task()
-                *     p->on_rq = TASK_ON_RQ_MIGRATING;
-                *     ---------------------------------- A
-                *     deactivate_task()                   \
-                *       dequeue_task()                     + RaceTime
-                *         util_est_dequeue()              /
-                *     ---------------------------------- B
-                *
-                * The additional check on "current == p" it's required to
-                * properly fix the execl regression and it helps in further
-                * reducing the chances for the above race.
-                */
-               if (unlikely(task_on_rq_queued(p) || current == p))
-                       lsub_positive(&estimated, _task_util_est(p));
-
-               util = max(util, estimated);
-       }
-
-       /*
-        * Utilization (estimated) can exceed the CPU capacity, thus let's
-        * clamp to the maximum CPU capacity to ensure consistency with
-        * cpu_util.
-        */
-       return min_t(unsigned long, util, capacity_orig_of(cpu));
-}
-
-/*
- * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
- * to @dst_cpu.
+ * Predicts what cpu_util(@cpu) would return if @p was removed from @cpu
+ * (@dst_cpu = -1) or migrated to @dst_cpu.
  */
 static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
 {
        struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
-       unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
+       unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
 
        /*
-        * If @p migrates from @cpu to another, remove its contribution. Or,
-        * if @p migrates from another CPU to @cpu, add its contribution. In
-        * the other cases, @cpu is not impacted by the migration, so the
-        * util_avg should already be correct.
+        * If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
+        * contribution. If @p migrates from another CPU to @cpu add its
+        * contribution. In all the other cases @cpu is not impacted by the
+        * migration so its util_avg is already correct.
         */
        if (task_cpu(p) == cpu && dst_cpu != cpu)
                lsub_positive(&util, task_util(p));
@@ -6653,16 +6546,40 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
                util += task_util(p);
 
        if (sched_feat(UTIL_EST)) {
+               unsigned long util_est;
+
                util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
 
                /*
-                * During wake-up, the task isn't enqueued yet and doesn't
-                * appear in the cfs_rq->avg.util_est.enqueued of any rq,
-                * so just add it (if needed) to "simulate" what will be
-                * cpu_util after the task has been enqueued.
+                * During wake-up @p isn't enqueued yet and doesn't contribute
+                * to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.
+                * If @dst_cpu == @cpu add it to "simulate" cpu_util after @p
+                * has been enqueued.
+                *
+                * During exec (@dst_cpu = -1) @p is enqueued and does
+                * contribute to cpu_rq(cpu)->cfs.util_est.enqueued.
+                * Remove it to "simulate" cpu_util without @p's contribution.
+                *
+                * Despite the task_on_rq_queued(@p) check there is still a
+                * small window for a possible race when an exec
+                * select_task_rq_fair() races with LB's detach_task().
+                *
+                *   detach_task()
+                *     deactivate_task()
+                *       p->on_rq = TASK_ON_RQ_MIGRATING;
+                *       -------------------------------- A
+                *       dequeue_task()                    \
+                *         dequeue_task_fair()              + Race Time
+                *           util_est_dequeue()            /
+                *       -------------------------------- B
+                *
+                * The additional check "current == p" is required to further
+                * reduce the race window.
                 */
                if (dst_cpu == cpu)
                        util_est += _task_util_est(p);
+               else if (unlikely(task_on_rq_queued(p) || current == p))
+                       lsub_positive(&util_est, _task_util_est(p));
 
                util = max(util, util_est);
        }
@@ -6670,6 +6587,28 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
        return min(util, capacity_orig_of(cpu));
 }
 
+/*
+ * cpu_util_without: compute cpu utilization without any contributions from *p
+ * @cpu: the CPU which utilization is requested
+ * @p: the task which utilization should be discounted
+ *
+ * The utilization of a CPU is defined by the utilization of tasks currently
+ * enqueued on that CPU as well as tasks which are currently sleeping after an
+ * execution on that CPU.
+ *
+ * This method returns the utilization of the specified CPU by discounting the
+ * utilization of the specified task, whenever the task is currently
+ * contributing to the CPU utilization.
+ */
+static unsigned long cpu_util_without(int cpu, struct task_struct *p)
+{
+       /* Task has no contribution or is new */
+       if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
+               return cpu_util_cfs(cpu);
+
+       return cpu_util_next(cpu, p, -1);
+}
+
 /*
  * compute_energy(): Estimates the energy that @pd would consume if @p was
  * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
@@ -9460,8 +9399,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
                local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) /
                                  local->group_capacity;
 
-               sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
-                               sds->total_capacity;
                /*
                 * If the local group is more loaded than the selected
                 * busiest group don't try to pull any tasks.
@@ -9470,6 +9407,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
                        env->imbalance = 0;
                        return;
                }
+
+               sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
+                               sds->total_capacity;
        }
 
        /*
@@ -9495,7 +9435,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
  * busiest \ local has_spare fully_busy misfit asym imbalanced overloaded
  * has_spare        nr_idle   balanced   N/A    N/A  balanced   balanced
  * fully_busy       nr_idle   nr_idle    N/A    N/A  balanced   balanced
- * misfit_task      force     N/A        N/A    N/A  force      force
+ * misfit_task      force     N/A        N/A    N/A  N/A        N/A
  * asym_packing     force     force      N/A    N/A  force      force
  * imbalanced       force     force      N/A    N/A  force      force
  * overloaded       force     force      N/A    N/A  force      avg_load
@@ -11881,101 +11821,3 @@ __init void init_sched_fair_class(void)
 #endif /* SMP */
 
 }
-
-/*
- * Helper functions to facilitate extracting info from tracepoints.
- */
-
-const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq)
-{
-#ifdef CONFIG_SMP
-       return cfs_rq ? &cfs_rq->avg : NULL;
-#else
-       return NULL;
-#endif
-}
-EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_avg);
-
-char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len)
-{
-       if (!cfs_rq) {
-               if (str)
-                       strlcpy(str, "(null)", len);
-               else
-                       return NULL;
-       }
-
-       cfs_rq_tg_path(cfs_rq, str, len);
-       return str;
-}
-EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_path);
-
-int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq)
-{
-       return cfs_rq ? cpu_of(rq_of(cfs_rq)) : -1;
-}
-EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_cpu);
-
-const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq)
-{
-#ifdef CONFIG_SMP
-       return rq ? &rq->avg_rt : NULL;
-#else
-       return NULL;
-#endif
-}
-EXPORT_SYMBOL_GPL(sched_trace_rq_avg_rt);
-
-const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq)
-{
-#ifdef CONFIG_SMP
-       return rq ? &rq->avg_dl : NULL;
-#else
-       return NULL;
-#endif
-}
-EXPORT_SYMBOL_GPL(sched_trace_rq_avg_dl);
-
-const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq)
-{
-#if defined(CONFIG_SMP) && defined(CONFIG_HAVE_SCHED_AVG_IRQ)
-       return rq ? &rq->avg_irq : NULL;
-#else
-       return NULL;
-#endif
-}
-EXPORT_SYMBOL_GPL(sched_trace_rq_avg_irq);
-
-int sched_trace_rq_cpu(struct rq *rq)
-{
-       return rq ? cpu_of(rq) : -1;
-}
-EXPORT_SYMBOL_GPL(sched_trace_rq_cpu);
-
-int sched_trace_rq_cpu_capacity(struct rq *rq)
-{
-       return rq ?
-#ifdef CONFIG_SMP
-               rq->cpu_capacity
-#else
-               SCHED_CAPACITY_SCALE
-#endif
-               : -1;
-}
-EXPORT_SYMBOL_GPL(sched_trace_rq_cpu_capacity);
-
-const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
-{
-#ifdef CONFIG_SMP
-       return rd ? rd->span : NULL;
-#else
-       return NULL;
-#endif
-}
-EXPORT_SYMBOL_GPL(sched_trace_rd_span);
-
-int sched_trace_rq_nr_running(struct rq *rq)
-{
-        return rq ? rq->nr_running : -1;
-}
-EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running);
index ecb0d705287753f080d347bcd3ae1961a5292f61..328cccbee4441ab44b07dd60b78c7646d20d087f 100644 (file)
@@ -102,7 +102,7 @@ void __cpuidle default_idle_call(void)
                 * last -- this is very similar to the entry code.
                 */
                trace_hardirqs_on_prepare();
-               lockdep_hardirqs_on_prepare(_THIS_IP_);
+               lockdep_hardirqs_on_prepare();
                rcu_idle_enter();
                lockdep_hardirqs_on(_THIS_IP_);
 
@@ -327,7 +327,7 @@ static void do_idle(void)
         * RCU relies on this call to be done outside of an RCU read-side
         * critical section.
         */
-       flush_smp_call_function_from_idle();
+       flush_smp_call_function_queue();
        schedule_idle();
 
        if (unlikely(klp_patch_pending(current)))
index c336f5f481bca25c781991eea01f956fa60c320d..4ff2ed4f8fa157fc20866581a7b388329ae51157 100644 (file)
@@ -145,9 +145,9 @@ static inline u64 rq_clock_pelt(struct rq *rq)
 static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
 {
        if (unlikely(cfs_rq->throttle_count))
-               return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
+               return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time;
 
-       return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
+       return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
 }
 #else
 static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
index a4fa3aadfcba6f06b04d9958959622caa70ff1e0..a337f3e3599733ce7ebe6d8576942f633edb1d41 100644 (file)
@@ -1060,14 +1060,17 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
        mutex_unlock(&group->avgs_lock);
 
        for (full = 0; full < 2; full++) {
-               unsigned long avg[3];
-               u64 total;
+               unsigned long avg[3] = { 0, };
+               u64 total = 0;
                int w;
 
-               for (w = 0; w < 3; w++)
-                       avg[w] = group->avg[res * 2 + full][w];
-               total = div_u64(group->total[PSI_AVGS][res * 2 + full],
-                               NSEC_PER_USEC);
+               /* CPU FULL is undefined at the system level */
+               if (!(group == &psi_system && res == PSI_CPU && full)) {
+                       for (w = 0; w < 3; w++)
+                               avg[w] = group->avg[res * 2 + full][w];
+                       total = div_u64(group->total[PSI_AVGS][res * 2 + full],
+                                       NSEC_PER_USEC);
+               }
 
                seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
                           full ? "full" : "some",
@@ -1117,7 +1120,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
        t->state = state;
        t->threshold = threshold_us * NSEC_PER_USEC;
        t->win.size = window_us * NSEC_PER_USEC;
-       window_reset(&t->win, 0, 0, 0);
+       window_reset(&t->win, sched_clock(),
+                       group->total[PSI_POLL][t->state], 0);
 
        t->event = 0;
        t->last_event_time = 0;
index a32c46889af89900de533c90011fd8c6c116fa10..7891c0f0e1ff732eff0aafd61057b528158a80c4 100644 (file)
@@ -871,6 +871,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                int enqueue = 0;
                struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
                struct rq *rq = rq_of_rt_rq(rt_rq);
+               struct rq_flags rf;
                int skip;
 
                /*
@@ -885,7 +886,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                if (skip)
                        continue;
 
-               raw_spin_rq_lock(rq);
+               rq_lock(rq, &rf);
                update_rq_clock(rq);
 
                if (rt_rq->rt_time) {
@@ -923,7 +924,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 
                if (enqueue)
                        sched_rt_rq_enqueue(rt_rq);
-               raw_spin_rq_unlock(rq);
+               rq_unlock(rq, &rf);
        }
 
        if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
index 8dccb34eb1908b07379284d9708c37274cca2cec..2ce18584dca3c3f73a5f8669edadc58405ff4af1 100644 (file)
@@ -603,8 +603,8 @@ struct cfs_rq {
        s64                     runtime_remaining;
 
        u64                     throttled_clock;
-       u64                     throttled_clock_task;
-       u64                     throttled_clock_task_time;
+       u64                     throttled_clock_pelt;
+       u64                     throttled_clock_pelt_time;
        int                     throttled;
        int                     throttle_count;
        struct list_head        throttled_list;
@@ -1827,12 +1827,7 @@ static inline void dirty_sched_domain_sysctl(int cpu)
 #endif
 
 extern int sched_update_scaling(void);
-
-extern void flush_smp_call_function_from_idle(void);
-
-#else /* !CONFIG_SMP: */
-static inline void flush_smp_call_function_from_idle(void) { }
-#endif
+#endif /* CONFIG_SMP */
 
 #include "stats.h"
 
@@ -2182,6 +2177,8 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
  *
  *   include/asm-generic/vmlinux.lds.h
  *
+ * *CAREFUL* they are laid out in *REVERSE* order!!!
+ *
  * Also enforce alignment on the instance, not the type, to guarantee layout.
  */
 #define DEFINE_SCHED_CLASS(name) \
@@ -2190,17 +2187,16 @@ const struct sched_class name##_sched_class \
        __section("__" #name "_sched_class")
 
 /* Defined in include/asm-generic/vmlinux.lds.h */
-extern struct sched_class __begin_sched_classes[];
-extern struct sched_class __end_sched_classes[];
-
-#define sched_class_highest (__end_sched_classes - 1)
-#define sched_class_lowest  (__begin_sched_classes - 1)
+extern struct sched_class __sched_class_highest[];
+extern struct sched_class __sched_class_lowest[];
 
 #define for_class_range(class, _from, _to) \
-       for (class = (_from); class != (_to); class--)
+       for (class = (_from); class < (_to); class++)
 
 #define for_each_class(class) \
-       for_class_range(class, sched_class_highest, sched_class_lowest)
+       for_class_range(class, __sched_class_highest, __sched_class_lowest)
+
+#define sched_class_above(_a, _b)      ((_a) < (_b))
 
 extern const struct sched_class stop_sched_class;
 extern const struct sched_class dl_sched_class;
@@ -2309,6 +2305,7 @@ extern void resched_cpu(int cpu);
 
 extern struct rt_bandwidth def_rt_bandwidth;
 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
 
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
@@ -2478,6 +2475,24 @@ unsigned long arch_scale_freq_capacity(int cpu)
 }
 #endif
 
+#ifdef CONFIG_SCHED_DEBUG
+/*
+ * In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
+ * acquire rq lock instead of rq_lock(). So at the end of these two functions
+ * we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
+ * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
+ */
+static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
+{
+       rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+       /* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
+#ifdef CONFIG_SMP
+       rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
+}
+#else
+static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
+#endif
 
 #ifdef CONFIG_SMP
 
@@ -2543,14 +2558,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
 {
-       if (__rq_lockp(this_rq) == __rq_lockp(busiest))
-               return 0;
-
-       if (likely(raw_spin_rq_trylock(busiest)))
+       if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
+           likely(raw_spin_rq_trylock(busiest))) {
+               double_rq_clock_clear_update(this_rq, busiest);
                return 0;
+       }
 
        if (rq_order_less(this_rq, busiest)) {
                raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
+               double_rq_clock_clear_update(this_rq, busiest);
                return 0;
        }
 
@@ -2644,6 +2660,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
        BUG_ON(rq1 != rq2);
        raw_spin_rq_lock(rq1);
        __acquire(rq2->lock);   /* Fake it out ;) */
+       double_rq_clock_clear_update(rq1, rq2);
 }
 
 /*
index 9620e323162c8b8c1ac6edb928d22d8cbf301285..2eb23dd0f28560c33b2dcda70f19f846332a08de 100644 (file)
@@ -7,3 +7,9 @@
 extern void sched_ttwu_pending(void *arg);
 
 extern void send_call_function_single_ipi(int cpu);
+
+#ifdef CONFIG_SMP
+extern void flush_smp_call_function_queue(void);
+#else
+static inline void flush_smp_call_function_queue(void) { }
+#endif
index b5ac87f6dbd41b7eee646c268860d997b9eaead1..e9852d1b4a5ec7114f0e5f68d1bb90beef0b8b0a 100644 (file)
@@ -200,6 +200,8 @@ static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
  *        the filter can be freed.
  * @cache: cache of arch/syscall mappings to actions
  * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
+ * @wait_killable_recv: Put notifying process in killable state once the
+ *                     notification is received by the userspace listener.
  * @prev: points to a previously installed, or inherited, filter
  * @prog: the BPF program to evaluate
  * @notif: the struct that holds all notification related information
@@ -220,6 +222,7 @@ struct seccomp_filter {
        refcount_t refs;
        refcount_t users;
        bool log;
+       bool wait_killable_recv;
        struct action_cache cache;
        struct seccomp_filter *prev;
        struct bpf_prog *prog;
@@ -893,6 +896,10 @@ static long seccomp_attach_filter(unsigned int flags,
        if (flags & SECCOMP_FILTER_FLAG_LOG)
                filter->log = true;
 
+       /* Set wait killable flag, if present. */
+       if (flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
+               filter->wait_killable_recv = true;
+
        /*
         * If there is an existing filter, make it the prev and don't drop its
         * task reference.
@@ -1080,6 +1087,12 @@ static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_kn
        complete(&addfd->completion);
 }
 
+static bool should_sleep_killable(struct seccomp_filter *match,
+                                 struct seccomp_knotif *n)
+{
+       return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT;
+}
+
 static int seccomp_do_user_notification(int this_syscall,
                                        struct seccomp_filter *match,
                                        const struct seccomp_data *sd)
@@ -1100,7 +1113,7 @@ static int seccomp_do_user_notification(int this_syscall,
        n.data = sd;
        n.id = seccomp_next_notify_id(match);
        init_completion(&n.ready);
-       list_add(&n.list, &match->notif->notifications);
+       list_add_tail(&n.list, &match->notif->notifications);
        INIT_LIST_HEAD(&n.addfd);
 
        up(&match->notif->request);
@@ -1110,11 +1123,25 @@ static int seccomp_do_user_notification(int this_syscall,
         * This is where we wait for a reply from userspace.
         */
        do {
+               bool wait_killable = should_sleep_killable(match, &n);
+
                mutex_unlock(&match->notify_lock);
-               err = wait_for_completion_interruptible(&n.ready);
+               if (wait_killable)
+                       err = wait_for_completion_killable(&n.ready);
+               else
+                       err = wait_for_completion_interruptible(&n.ready);
                mutex_lock(&match->notify_lock);
-               if (err != 0)
+
+               if (err != 0) {
+                       /*
+                        * Check to see if the notifcation got picked up and
+                        * whether we should switch to wait killable.
+                        */
+                       if (!wait_killable && should_sleep_killable(match, &n))
+                               continue;
+
                        goto interrupted;
+               }
 
                addfd = list_first_entry_or_null(&n.addfd,
                                                 struct seccomp_kaddfd, list);
@@ -1484,6 +1511,9 @@ out:
                mutex_lock(&filter->notify_lock);
                knotif = find_notification(filter, unotif.id);
                if (knotif) {
+                       /* Reset the process to make sure it's not stuck */
+                       if (should_sleep_killable(filter, knotif))
+                               complete(&knotif->ready);
                        knotif->state = SECCOMP_NOTIFY_INIT;
                        up(&filter->notif->request);
                }
@@ -1829,6 +1859,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
            ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
                return -EINVAL;
 
+       /*
+        * The SECCOMP_FILTER_FLAG_WAIT_KILLABLE_SENT flag doesn't make sense
+        * without the SECCOMP_FILTER_FLAG_NEW_LISTENER flag.
+        */
+       if ((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) &&
+           ((flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) == 0))
+               return -EINVAL;
+
        /* Prepare the new filter before holding any locks. */
        prepared = seccomp_prepare_user_filter(filter);
        if (IS_ERR(prepared))
index 30cd1ca43bcd5be968a4abd5f8090e3ce9e06f99..e43bc2a692f5e3efe1466ec1e6a9476356edb99c 100644 (file)
@@ -1805,7 +1805,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
 }
 #endif
 
-int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
+int send_sig_perf(void __user *addr, u32 type, u64 sig_data)
 {
        struct kernel_siginfo info;
 
@@ -1817,7 +1817,18 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
        info.si_perf_data = sig_data;
        info.si_perf_type = type;
 
-       return force_sig_info(&info);
+       /*
+        * Signals generated by perf events should not terminate the whole
+        * process if SIGTRAP is blocked, however, delivering the signal
+        * asynchronously is better than not delivering at all. But tell user
+        * space if the signal was asynchronous, so it can clearly be
+        * distinguished from normal synchronous ones.
+        */
+       info.si_perf_flags = sigismember(&current->blocked, info.si_signo) ?
+                                    TRAP_PERF_FLAG_ASYNC :
+                                    0;
+
+       return send_sig_info(info.si_signo, &info, current);
 }
 
 /**
@@ -3432,6 +3443,7 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
                to->si_addr = ptr_to_compat(from->si_addr);
                to->si_perf_data = from->si_perf_data;
                to->si_perf_type = from->si_perf_type;
+               to->si_perf_flags = from->si_perf_flags;
                break;
        case SIL_CHLD:
                to->si_pid = from->si_pid;
@@ -3509,6 +3521,7 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
                to->si_addr = compat_ptr(from->si_addr);
                to->si_perf_data = from->si_perf_data;
                to->si_perf_type = from->si_perf_type;
+               to->si_perf_flags = from->si_perf_flags;
                break;
        case SIL_CHLD:
                to->si_pid    = from->si_pid;
@@ -4722,6 +4735,7 @@ static inline void siginfo_buildtime_checks(void)
        CHECK_OFFSET(si_pkey);
        CHECK_OFFSET(si_perf_data);
        CHECK_OFFSET(si_perf_type);
+       CHECK_OFFSET(si_perf_flags);
 
        /* sigpoll */
        CHECK_OFFSET(si_band);
index 65a630f62363c2a04588e0f7bcf328548b820ca1..dd215f43942644946c6053013640a06dec7a6811 100644 (file)
@@ -96,7 +96,7 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
 
-static void flush_smp_call_function_queue(bool warn_cpu_offline);
+static void __flush_smp_call_function_queue(bool warn_cpu_offline);
 
 int smpcfd_prepare_cpu(unsigned int cpu)
 {
@@ -141,7 +141,7 @@ int smpcfd_dying_cpu(unsigned int cpu)
         * ensure that the outgoing CPU doesn't go offline with work
         * still pending.
         */
-       flush_smp_call_function_queue(false);
+       __flush_smp_call_function_queue(false);
        irq_work_run();
        return 0;
 }
@@ -183,7 +183,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
 static DEFINE_PER_CPU(void *, cur_csd_info);
 static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
 
-#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
+module_param(csd_lock_timeout, ulong, 0444);
+
 static atomic_t csd_bug_count = ATOMIC_INIT(0);
 static u64 cfd_seq;
 
@@ -329,6 +331,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
        u64 ts2, ts_delta;
        call_single_data_t *cpu_cur_csd;
        unsigned int flags = READ_ONCE(csd->node.u_flags);
+       unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
 
        if (!(flags & CSD_FLAG_LOCK)) {
                if (!unlikely(*bug_id))
@@ -341,7 +344,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
 
        ts2 = sched_clock();
        ts_delta = ts2 - *ts1;
-       if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
+       if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
                return false;
 
        firsttime = !*bug_id;
@@ -541,11 +544,11 @@ void generic_smp_call_function_single_interrupt(void)
 {
        cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
                      smp_processor_id(), CFD_SEQ_GOTIPI);
-       flush_smp_call_function_queue(true);
+       __flush_smp_call_function_queue(true);
 }
 
 /**
- * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
+ * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
  *
  * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
  *                   offline CPU. Skip this check if set to 'false'.
@@ -558,7 +561,7 @@ void generic_smp_call_function_single_interrupt(void)
  * Loop through the call_single_queue and run all the queued callbacks.
  * Must be called with interrupts disabled.
  */
-static void flush_smp_call_function_queue(bool warn_cpu_offline)
+static void __flush_smp_call_function_queue(bool warn_cpu_offline)
 {
        call_single_data_t *csd, *csd_next;
        struct llist_node *entry, *prev;
@@ -681,8 +684,22 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
                      smp_processor_id(), CFD_SEQ_HDLEND);
 }
 
-void flush_smp_call_function_from_idle(void)
+
+/**
+ * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
+ *                                from task context (idle, migration thread)
+ *
+ * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
+ * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
+ * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
+ * handle queued SMP function calls before scheduling.
+ *
+ * The migration thread has to ensure that an eventually pending wakeup has
+ * been handled before it migrates a task.
+ */
+void flush_smp_call_function_queue(void)
 {
+       unsigned int was_pending;
        unsigned long flags;
 
        if (llist_empty(this_cpu_ptr(&call_single_queue)))
@@ -691,9 +708,11 @@ void flush_smp_call_function_from_idle(void)
        cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
                      smp_processor_id(), CFD_SEQ_IDLE);
        local_irq_save(flags);
-       flush_smp_call_function_queue(true);
+       /* Get the already pending soft interrupts for RT enabled kernels */
+       was_pending = local_softirq_pending();
+       __flush_smp_call_function_queue(true);
        if (local_softirq_pending())
-               do_softirq();
+               do_softirq_post_smp_call_flush(was_pending);
 
        local_irq_restore(flags);
 }
index f6bc0bc8a2aab322c0f6e89f1d53a4302878e438..b9f54544e7499bb068a809436dbc1e1913f1a3ce 100644 (file)
@@ -392,6 +392,13 @@ int cpu_check_up_prepare(int cpu)
                 */
                return -EAGAIN;
 
+       case CPU_UP_PREPARE:
+               /*
+                * Timeout while waiting for the CPU to show up. Allow to try
+                * again later.
+                */
+               return 0;
+
        default:
 
                /* Should not happen.  Famous last words. */
index fac801815554a6a2d37cbe37ca68b6a864fe1ed4..9f0aef8aa9ff815b2a2df8bd280e558050ab83d9 100644 (file)
@@ -294,6 +294,19 @@ static inline void invoke_softirq(void)
                wakeup_softirqd();
 }
 
+/*
+ * flush_smp_call_function_queue() can raise a soft interrupt in a function
+ * call. On RT kernels this is undesired and the only known functionality
+ * in the block layer which does this is disabled on RT. If soft interrupts
+ * get raised which haven't been raised before the flush, warn so it can be
+ * investigated.
+ */
+void do_softirq_post_smp_call_flush(unsigned int was_pending)
+{
+       if (WARN_ON_ONCE(was_pending != local_softirq_pending()))
+               invoke_softirq();
+}
+
 #else /* CONFIG_PREEMPT_RT */
 
 /*
index ddb5a7f48d69e665e5d381901f1bdc9c808f819c..c2c33d2202e9a16546e5cd6b49aeccdc116cd218 100644 (file)
@@ -70,59 +70,81 @@ late_initcall(stackleak_sysctls_init);
 #define skip_erasing() false
 #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
 
-asmlinkage void noinstr stackleak_erase(void)
+static __always_inline void __stackleak_erase(bool on_task_stack)
 {
-       /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
-       unsigned long kstack_ptr = current->lowest_stack;
-       unsigned long boundary = (unsigned long)end_of_stack(current);
-       unsigned int poison_count = 0;
-       const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
-
-       if (skip_erasing())
-               return;
-
-       /* Check that 'lowest_stack' value is sane */
-       if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
-               kstack_ptr = boundary;
+       const unsigned long task_stack_low = stackleak_task_low_bound(current);
+       const unsigned long task_stack_high = stackleak_task_high_bound(current);
+       unsigned long erase_low, erase_high;
 
-       /* Search for the poison value in the kernel stack */
-       while (kstack_ptr > boundary && poison_count <= depth) {
-               if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
-                       poison_count++;
-               else
-                       poison_count = 0;
-
-               kstack_ptr -= sizeof(unsigned long);
-       }
-
-       /*
-        * One 'long int' at the bottom of the thread stack is reserved and
-        * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
-        */
-       if (kstack_ptr == boundary)
-               kstack_ptr += sizeof(unsigned long);
+       erase_low = stackleak_find_top_of_poison(task_stack_low,
+                                                current->lowest_stack);
 
 #ifdef CONFIG_STACKLEAK_METRICS
-       current->prev_lowest_stack = kstack_ptr;
+       current->prev_lowest_stack = erase_low;
 #endif
 
        /*
-        * Now write the poison value to the kernel stack. Start from
-        * 'kstack_ptr' and move up till the new 'boundary'. We assume that
-        * the stack pointer doesn't change when we write poison.
+        * Write poison to the task's stack between 'erase_low' and
+        * 'erase_high'.
+        *
+        * If we're running on a different stack (e.g. an entry trampoline
+        * stack) we can erase everything below the pt_regs at the top of the
+        * task stack.
+        *
+        * If we're running on the task stack itself, we must not clobber any
+        * stack used by this function and its caller. We assume that this
+        * function has a fixed-size stack frame, and the current stack pointer
+        * doesn't change while we write poison.
         */
-       if (on_thread_stack())
-               boundary = current_stack_pointer;
+       if (on_task_stack)
+               erase_high = current_stack_pointer;
        else
-               boundary = current_top_of_stack();
+               erase_high = task_stack_high;
 
-       while (kstack_ptr < boundary) {
-               *(unsigned long *)kstack_ptr = STACKLEAK_POISON;
-               kstack_ptr += sizeof(unsigned long);
+       while (erase_low < erase_high) {
+               *(unsigned long *)erase_low = STACKLEAK_POISON;
+               erase_low += sizeof(unsigned long);
        }
 
        /* Reset the 'lowest_stack' value for the next syscall */
-       current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
+       current->lowest_stack = task_stack_high;
+}
+
+/*
+ * Erase and poison the portion of the task stack used since the last erase.
+ * Can be called from the task stack or an entry stack when the task stack is
+ * no longer in use.
+ */
+asmlinkage void noinstr stackleak_erase(void)
+{
+       if (skip_erasing())
+               return;
+
+       __stackleak_erase(on_thread_stack());
+}
+
+/*
+ * Erase and poison the portion of the task stack used since the last erase.
+ * Can only be called from the task stack.
+ */
+asmlinkage void noinstr stackleak_erase_on_task_stack(void)
+{
+       if (skip_erasing())
+               return;
+
+       __stackleak_erase(true);
+}
+
+/*
+ * Erase and poison the portion of the task stack used since the last erase.
+ * Can only be called from a stack other than the task stack.
+ */
+asmlinkage void noinstr stackleak_erase_off_task_stack(void)
+{
+       if (skip_erasing())
+               return;
+
+       __stackleak_erase(false);
 }
 
 void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
@@ -139,8 +161,7 @@ void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
        /* 'lowest_stack' should be aligned on the register width boundary */
        sp = ALIGN(sp, sizeof(unsigned long));
        if (sp < current->lowest_stack &&
-           sp >= (unsigned long)task_stack_page(current) +
-                                               sizeof(unsigned long)) {
+           sp >= stackleak_task_low_bound(current)) {
                current->lowest_stack = sp;
        }
 }
index cbc30271ea4dcbe5c234835838f2d4b8d62a8a4b..cedb17ba158a9bf57f3510fbfc8141adbd12b74b 100644 (file)
@@ -535,8 +535,6 @@ void stop_machine_park(int cpu)
        kthread_park(stopper->thread);
 }
 
-extern void sched_set_stop_task(int cpu, struct task_struct *stop);
-
 static void cpu_stop_create(unsigned int cpu)
 {
        sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
@@ -633,6 +631,27 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 }
 EXPORT_SYMBOL_GPL(stop_machine);
 
+#ifdef CONFIG_SCHED_SMT
+int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+
+       struct multi_stop_data msdata = {
+               .fn = fn,
+               .data = data,
+               .num_threads = cpumask_weight(smt_mask),
+               .active_cpus = smt_mask,
+       };
+
+       lockdep_assert_cpus_held();
+
+       /* Set the initial state and stop all online cpus. */
+       set_state(&msdata, MULTI_STOP_PREPARE);
+       return stop_cpus(smt_mask, multi_cpu_stop, &msdata);
+}
+EXPORT_SYMBOL_GPL(stop_core_cpuslocked);
+#endif
+
 /**
  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
  * @fn: the function to run
index 374f83e952397ec90f322697f444124520c82945..b911fa6d81ab7a19bc3d40dfb796e361adbbcc8d 100644 (file)
 #ifndef SVE_GET_VL
 # define SVE_GET_VL()          (-EINVAL)
 #endif
+#ifndef SME_SET_VL
+# define SME_SET_VL(a)         (-EINVAL)
+#endif
+#ifndef SME_GET_VL
+# define SME_GET_VL()          (-EINVAL)
+#endif
 #ifndef PAC_RESET_KEYS
 # define PAC_RESET_KEYS(a, b)  (-EINVAL)
 #endif
@@ -2541,6 +2547,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
        case PR_SVE_GET_VL:
                error = SVE_GET_VL();
                break;
+       case PR_SME_SET_VL:
+               error = SME_SET_VL(arg2);
+               break;
+       case PR_SME_GET_VL:
+               error = SME_GET_VL();
+               break;
        case PR_GET_SPECULATION_CTRL:
                if (arg3 || arg4 || arg5)
                        return -EINVAL;
index 830aaf8ca08ee0cf3ca5f816fc347a0d4189ba3d..5b7b1a82ae6a4eb34644f488ad3bc976c682bfe0 100644 (file)
@@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = SYSCTL_ONE,
        },
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-       {
-               .procname       = "timer_migration",
-               .data           = &sysctl_timer_migration,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = timer_migration_handler,
-               .extra1         = SYSCTL_ZERO,
-               .extra2         = SYSCTL_ONE,
-       },
-#endif
 #ifdef CONFIG_BPF_SYSCALL
        {
                .procname       = "unprivileged_bpf_disabled",
index c59e1a49bc406c7a2cb0e05a36053dd3ad13a9ba..dff75bcde1514c82fc6580bcf407cd2ff7ae03cb 100644 (file)
@@ -12,12 +12,22 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
  * @notify: how to notify the targeted task
  *
  * Queue @work for task_work_run() below and notify the @task if @notify
- * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
- * it will interrupt the targeted task and run the task_work. @TWA_RESUME
- * work is run only when the task exits the kernel and returns to user mode,
- * or before entering guest mode. Fails if the @task is exiting/exited and thus
- * it can't process this @work. Otherwise @work->func() will be called when the
- * @task goes through one of the aforementioned transitions, or exits.
+ * is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI.
+ *
+ * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
+ * task and run the task_work, regardless of whether the task is currently
+ * running in the kernel or userspace.
+ * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a
+ * reschedule IPI to force the targeted task to reschedule and run task_work.
+ * This can be advantageous if there's no strict requirement that the
+ * task_work be run as soon as possible, just whenever the task enters the
+ * kernel anyway.
+ * @TWA_RESUME work is run only when the task exits the kernel and returns to
+ * user mode, or before entering guest mode.
+ *
+ * Fails if the @task is exiting/exited and thus it can't process this @work.
+ * Otherwise @work->func() will be called when the @task goes through one of
+ * the aforementioned transitions, or exits.
  *
  * If the targeted task is exiting, then an error is returned and the work item
  * is not queued. It's up to the caller to arrange for an alternative mechanism
@@ -53,6 +63,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
        case TWA_SIGNAL:
                set_notify_signal(task);
                break;
+       case TWA_SIGNAL_NO_IPI:
+               __set_notify_signal(task);
+               break;
        default:
                WARN_ON_ONCE(1);
                break;
index 003ccf338d2017c26441d2492be331fbad30d283..5d85014d59b5f829e34e257e1cd2b69ae6d5bef8 100644 (file)
@@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev,
 {
        char name[CS_NAME_LEN];
        ssize_t ret = sysfs_get_uname(buf, name, count);
-       struct clock_event_device *ce;
+       struct clock_event_device *ce = NULL, *iter;
 
        if (ret < 0)
                return ret;
@@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev,
        ret = -ENODEV;
        mutex_lock(&clockevents_mutex);
        raw_spin_lock_irq(&clockevents_lock);
-       list_for_each_entry(ce, &clockevent_devices, list) {
-               if (!strcmp(ce->name, name)) {
-                       ret = __clockevents_try_unbind(ce, dev->id);
+       list_for_each_entry(iter, &clockevent_devices, list) {
+               if (!strcmp(iter->name, name)) {
+                       ret = __clockevents_try_unbind(iter, dev->id);
+                       ce = iter;
                        break;
                }
        }
index 95d7ca35bdf2ce92b0a9929fd20e4356599bb032..cee5da1e54c4121d771bf5fa19c07584e000bf49 100644 (file)
@@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
        cpus_read_lock();
        preempt_disable();
        clocksource_verify_choose_cpus();
-       if (cpumask_weight(&cpus_chosen) == 0) {
+       if (cpumask_empty(&cpus_chosen)) {
                preempt_enable();
                cpus_read_unlock();
                pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
index b1b9b12899f5e43571e9cc39740f545cd6fad795..8464c5acc91338baee78b465933d59daf5740d5d 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/jiffies.h>
 #include <linux/ktime.h>
 #include <linux/kernel.h>
+#include <linux/math.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
@@ -199,15 +200,13 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 
        r = rate;
        if (r >= 4000000) {
-               r /= 1000000;
+               r = DIV_ROUND_CLOSEST(r, 1000000);
                r_unit = 'M';
+       } else if (r >= 4000) {
+               r = DIV_ROUND_CLOSEST(r, 1000);
+               r_unit = 'k';
        } else {
-               if (r >= 1000) {
-                       r /= 1000;
-                       r_unit = 'k';
-               } else {
-                       r_unit = ' ';
-               }
+               r_unit = ' ';
        }
 
        /* Calculate the ns resolution of this counter */
index d257721c68b8fc74d85601b713cded1bffded8d7..58a11f859ac79d7ea92d52a4e0d235201b0c65ac 100644 (file)
@@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
        if (unlikely(expires == KTIME_MAX)) {
                if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
                        hrtimer_cancel(&ts->sched_timer);
+               else
+                       tick_program_event(KTIME_MAX, 1);
                return;
        }
 
@@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev)
        tick_sched_do_timer(ts, now);
        tick_sched_handle(ts, regs);
 
-       /* No need to reprogram if we are running tickless  */
-       if (unlikely(ts->tick_stopped))
+       if (unlikely(ts->tick_stopped)) {
+               /*
+                * The clockevent device is not reprogrammed, so change the
+                * clock event device to ONESHOT_STOPPED to avoid spurious
+                * interrupts on devices which might not be truly one shot.
+                */
+               tick_program_event(KTIME_MAX, 1);
                return;
+       }
 
        hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
        tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
index 3b1398fbddaf8ed7fadd7ae11dbe703c917a17fe..8e4b3c32fcf9d95911a8f26e3171d93759cb5949 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
+#include <linux/timex.h>
 #include <linux/tick.h>
 #include <linux/stop_machine.h>
 #include <linux/pvclock_gtod.h>
@@ -429,6 +430,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
        memcpy(base + 1, base, sizeof(*base));
 }
 
+static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
+{
+       u64 delta, cycles = tk_clock_read(tkr);
+
+       delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+       return timekeeping_delta_to_ns(tkr, delta);
+}
+
 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
        struct tk_read_base *tkr;
@@ -439,12 +448,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
                seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
                now = ktime_to_ns(tkr->base);
-
-               now += timekeeping_delta_to_ns(tkr,
-                               clocksource_delta(
-                                       tk_clock_read(tkr),
-                                       tkr->cycle_last,
-                                       tkr->mask));
+               now += fast_tk_get_delta_ns(tkr);
        } while (read_seqcount_latch_retry(&tkf->seq, seq));
 
        return now;
@@ -528,10 +532,27 @@ u64 notrace ktime_get_boot_fast_ns(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
 
-       return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+       return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
 }
 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
 
+/**
+ * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
+ *
+ * The same limitations as described for ktime_get_boot_fast_ns() apply. The
+ * mono time and the TAI offset are not read atomically which may yield wrong
+ * readouts. However, an update of the TAI offset is an rare event e.g., caused
+ * by settime or adjtimex with an offset. The user of this function has to deal
+ * with the possibility of wrong timestamps in post processing.
+ */
+u64 notrace ktime_get_tai_fast_ns(void)
+{
+       struct timekeeper *tk = &tk_core.timekeeper;
+
+       return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
+}
+EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
+
 static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
 {
        struct tk_read_base *tkr;
@@ -543,10 +564,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
                tkr = tkf->base + (seq & 0x01);
                basem = ktime_to_ns(tkr->base);
                baser = ktime_to_ns(tkr->base_real);
-
-               delta = timekeeping_delta_to_ns(tkr,
-                               clocksource_delta(tk_clock_read(tkr),
-                               tkr->cycle_last, tkr->mask));
+               delta = fast_tk_get_delta_ns(tkr);
        } while (read_seqcount_latch_retry(&tkf->seq, seq));
 
        if (mono)
@@ -2380,6 +2398,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc)
        return 0;
 }
 
+/**
+ * random_get_entropy_fallback - Returns the raw clock source value,
+ * used by random.c for platforms with no valid random_get_entropy().
+ */
+unsigned long random_get_entropy_fallback(void)
+{
+       struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       if (unlikely(timekeeping_suspended || !clock))
+               return 0;
+       return clock->read(clock);
+}
+EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
 
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
index 9dd2a39cb3b0058e4756ea129cd968195010c00e..717fcb9fb14aa8306234b57aa4e45c466a3a720c 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 #include <linux/compat.h>
 #include <linux/random.h>
+#include <linux/sysctl.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work);
 static DECLARE_WORK(timer_update_work, timer_update_keys);
 
 #ifdef CONFIG_SMP
-unsigned int sysctl_timer_migration = 1;
+static unsigned int sysctl_timer_migration = 1;
 
 DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
 
@@ -234,7 +235,42 @@ static void timers_update_migration(void)
        else
                static_branch_disable(&timers_migration_enabled);
 }
-#else
+
+#ifdef CONFIG_SYSCTL
+static int timer_migration_handler(struct ctl_table *table, int write,
+                           void *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret;
+
+       mutex_lock(&timer_keys_mutex);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (!ret && write)
+               timers_update_migration();
+       mutex_unlock(&timer_keys_mutex);
+       return ret;
+}
+
+static struct ctl_table timer_sysctl[] = {
+       {
+               .procname       = "timer_migration",
+               .data           = &sysctl_timer_migration,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = timer_migration_handler,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE,
+       },
+       {}
+};
+
+static int __init timer_sysctl_init(void)
+{
+       register_sysctl("kernel", timer_sysctl);
+       return 0;
+}
+device_initcall(timer_sysctl_init);
+#endif /* CONFIG_SYSCTL */
+#else /* CONFIG_SMP */
 static inline void timers_update_migration(void) { }
 #endif /* !CONFIG_SMP */
 
@@ -251,19 +287,6 @@ void timers_update_nohz(void)
        schedule_work(&timer_update_work);
 }
 
-int timer_migration_handler(struct ctl_table *table, int write,
-                           void *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret;
-
-       mutex_lock(&timer_keys_mutex);
-       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-       if (!ret && write)
-               timers_update_migration();
-       mutex_unlock(&timer_keys_mutex);
-       return ret;
-}
-
 static inline bool is_timers_nohz_active(void)
 {
        return static_branch_unlikely(&timers_nohz_active);
@@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
         *
         * Round up with level granularity to prevent this.
         */
-       expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+       expires = (expires >> LVL_SHIFT(lvl)) + 1;
        *bucket_expiry = expires << LVL_SHIFT(lvl);
        return LVL_OFFS(lvl) + (expires & LVL_MASK);
 }
@@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
 
 static const struct debug_obj_descr timer_debug_descr;
 
+struct timer_hint {
+       void    (*function)(struct timer_list *t);
+       long    offset;
+};
+
+#define TIMER_HINT(fn, container, timr, hintfn)                        \
+       {                                                       \
+               .function = fn,                                 \
+               .offset   = offsetof(container, hintfn) -       \
+                           offsetof(container, timr)           \
+       }
+
+static const struct timer_hint timer_hints[] = {
+       TIMER_HINT(delayed_work_timer_fn,
+                  struct delayed_work, timer, work.func),
+       TIMER_HINT(kthread_delayed_work_timer_fn,
+                  struct kthread_delayed_work, timer, work.func),
+};
+
 static void *timer_debug_hint(void *addr)
 {
-       return ((struct timer_list *) addr)->function;
+       struct timer_list *timer = addr;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
+               if (timer_hints[i].function == timer->function) {
+                       void (**fn)(void) = addr + timer_hints[i].offset;
+
+                       return *fn;
+               }
+       }
+
+       return timer->function;
 }
 
 static bool timer_is_static_object(void *addr)
@@ -1780,8 +1833,6 @@ void update_process_times(int user_tick)
 {
        struct task_struct *p = current;
 
-       PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
-
        /* Note: this timer irq context must be accounted for as well. */
        account_process_tick(p, user_tick);
        run_local_timers();
@@ -1953,6 +2004,7 @@ int timers_prepare_cpu(unsigned int cpu)
                base = per_cpu_ptr(&timer_bases[b], cpu);
                base->clk = jiffies;
                base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+               base->next_expiry_recalc = false;
                base->timers_pending = false;
                base->is_idle = false;
        }
index 2c43e327a619f77b4e5124851f10f58d643f8f46..debbbb0832866892bdcedfb97100f0882d19ccd3 100644 (file)
@@ -144,6 +144,7 @@ config TRACING
        select BINARY_PRINTF
        select EVENT_TRACING
        select TRACE_CLOCK
+       select TASKS_RCU if PREEMPTION
 
 config GENERIC_TRACER
        bool
@@ -728,6 +729,7 @@ config FTRACE_MCOUNT_USE_OBJTOOL
        depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
        depends on !FTRACE_MCOUNT_USE_CC
        depends on FTRACE_MCOUNT_RECORD
+       select OBJTOOL
 
 config FTRACE_MCOUNT_USE_RECORDMCOUNT
        def_bool y
index 4d5629196d01dcb1592bb2ad483b56cdf8966f9d..10a32b0f2deb6d8e168a75b756837e5750bd13f4 100644 (file)
@@ -145,13 +145,14 @@ static void trace_note_time(struct blk_trace *bt)
        local_irq_restore(flags);
 }
 
-void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
-       const char *fmt, ...)
+void __blk_trace_note_message(struct blk_trace *bt,
+               struct cgroup_subsys_state *css, const char *fmt, ...)
 {
        int n;
        va_list args;
        unsigned long flags;
        char *buf;
+       u64 cgid = 0;
 
        if (unlikely(bt->trace_state != Blktrace_running &&
                     !blk_tracer_enabled))
@@ -170,17 +171,16 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
        n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
        va_end(args);
 
-       if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
-               blkcg = NULL;
 #ifdef CONFIG_BLK_CGROUP
-       trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
-                  blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
-#else
-       trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
+       if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+               cgid = cgroup_id(css->cgroup);
+       else
+               cgid = 1;
 #endif
+       trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid);
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(__trace_note_message);
+EXPORT_SYMBOL_GPL(__blk_trace_note_message);
 
 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
                         pid_t pid)
@@ -411,7 +411,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
                return PTR_ERR(msg);
 
        bt = filp->private_data;
-       __trace_note_message(bt, NULL, "%s", msg);
+       __blk_trace_note_message(bt, NULL, "%s", msg);
        kfree(msg);
 
        return count;
@@ -783,6 +783,7 @@ void blk_trace_shutdown(struct request_queue *q)
 #ifdef CONFIG_BLK_CGROUP
 static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
 {
+       struct cgroup_subsys_state *blkcg_css;
        struct blk_trace *bt;
 
        /* We don't use the 'bt' value here except as an optimization... */
@@ -790,9 +791,10 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
        if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
                return 0;
 
-       if (!bio->bi_blkg)
+       blkcg_css = bio_blkcg_css(bio);
+       if (!blkcg_css)
                return 0;
-       return cgroup_id(bio_blkcg(bio)->css.cgroup);
+       return cgroup_id(blkcg_css->cgroup);
 }
 #else
 static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
index 8f4fb328133abffc747f7f3794c82ca653a25e5d..3fd5284f648797fb44d7c45031404d76f9eb7a2c 100644 (file)
@@ -30,6 +30,24 @@ int ftrace_graph_active;
 /* Both enabled by default (can be cleared by function_graph tracer flags */
 static bool fgraph_sleep_time = true;
 
+/*
+ * archs can override this function if they must do something
+ * to enable hook for graph tracer.
+ */
+int __weak ftrace_enable_ftrace_graph_caller(void)
+{
+       return 0;
+}
+
+/*
+ * archs can override this function if they must do something
+ * to disable hook for graph tracer.
+ */
+int __weak ftrace_disable_ftrace_graph_caller(void)
+{
+       return 0;
+}
+
 /**
  * ftrace_graph_stop - set to permanently disable function graph tracing
  *
@@ -404,9 +422,9 @@ free:
 
 static void
 ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
-                               unsigned int prev_state,
                                struct task_struct *prev,
-                               struct task_struct *next)
+                               struct task_struct *next,
+                               unsigned int prev_state)
 {
        unsigned long long timestamp;
        int index;
index 4f1d2f5e7263412f5c5467fef32621e05cb14450..af899b058c8d09b8eb14e2b5614817122f6bb9b9 100644 (file)
@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
 
 static void
 ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
-                                    unsigned int prev_state,
                                     struct task_struct *prev,
-                                    struct task_struct *next)
+                                    struct task_struct *next,
+                                    unsigned int prev_state)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *pid_list;
index f4de111fa18ffa23a52273509bbee80027e52dbe..124f1897fd56942076454205b0e00e13cd89f0d0 100644 (file)
@@ -4289,17 +4289,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
                   entries,
                   total,
                   buf->cpu,
-#if defined(CONFIG_PREEMPT_NONE)
-                  "server",
-#elif defined(CONFIG_PREEMPT_VOLUNTARY)
-                  "desktop",
-#elif defined(CONFIG_PREEMPT)
-                  "preempt",
-#elif defined(CONFIG_PREEMPT_RT)
-                  "preempt_rt",
-#else
+                  preempt_model_none()      ? "server" :
+                  preempt_model_voluntary() ? "desktop" :
+                  preempt_model_full()      ? "preempt" :
+                  preempt_model_rt()        ? "preempt_rt" :
                   "unknown",
-#endif
                   /* These are reserved for later use */
                   0, 0, 0, 0);
 #ifdef CONFIG_SMP
index e11e167b78090a2b3aa2e621fc73e0335eb600a3..f97de82d1342ac3ab280be6b54e008c5621fbff1 100644 (file)
@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
 
 static void
 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
-                                       unsigned int prev_state,
                                        struct task_struct *prev,
-                                       struct task_struct *next)
+                                       struct task_struct *next,
+                                       unsigned int prev_state)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *no_pid_list;
@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 
 static void
 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
-                                        unsigned int prev_state,
                                         struct task_struct *prev,
-                                        struct task_struct *next)
+                                        struct task_struct *next,
+                                        unsigned int prev_state)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *no_pid_list;
index e9ae1f33a7f03b04af4470094879744ac99906fa..afb92e2f0aeab5327a727d464e9150815050feba 100644 (file)
@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
  */
 static void
 trace_sched_switch_callback(void *data, bool preempt,
-                           unsigned int prev_state,
                            struct task_struct *p,
-                           struct task_struct *n)
+                           struct task_struct *n,
+                           unsigned int prev_state)
 {
        struct osnoise_variables *osn_var = this_cpu_osn_var();
 
index f4938040c2286e6be613ca34cd767bee5dcaaeae..95b58bd757ce400f1766e296088650850d57ae30 100644 (file)
@@ -46,7 +46,7 @@ void trace_hardirqs_on(void)
                this_cpu_write(tracing_irq_cpu, 0);
        }
 
-       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       lockdep_hardirqs_on_prepare();
        lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
@@ -94,7 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
                this_cpu_write(tracing_irq_cpu, 0);
        }
 
-       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       lockdep_hardirqs_on_prepare();
        lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
index 45796d8bd4b2dcee74bc131203c1c2f64bc219ee..c9ffdcfe622e14ae9e09168f8442333dddda8f85 100644 (file)
@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex);
 
 static void
 probe_sched_switch(void *ignore, bool preempt,
-                  unsigned int prev_state,
-                  struct task_struct *prev, struct task_struct *next)
+                  struct task_struct *prev, struct task_struct *next,
+                  unsigned int prev_state)
 {
        int flags;
 
index 46429f9a96fafd176cab549a5bdc49030601d844..330aee1c1a49e6804df4c4ce7389282c126f4922 100644 (file)
@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 
 static void notrace
 probe_wakeup_sched_switch(void *ignore, bool preempt,
-                         unsigned int prev_state,
-                         struct task_struct *prev, struct task_struct *next)
+                         struct task_struct *prev, struct task_struct *next,
+                         unsigned int prev_state)
 {
        struct trace_array_cpu *data;
        u64 T0, T1, delta;
index 087e06b4cdfdeb08dbe832a6b98a1f22e8d57483..6a843639814fbfeb0e48fc99481411fea4a71b2d 100644 (file)
@@ -737,3 +737,6 @@ config PLDMFW
 
 config ASN1_ENCODER
        tristate
+
+config POLYNOMIAL
+       tristate
index 075cd25363ac38ea7cc126dac79c9c0b40145bf8..a30d5279efda6fad5cd4165a00277554b802c7e3 100644 (file)
@@ -485,24 +485,25 @@ config FRAME_POINTER
          larger and slower, but it gives very useful debugging information
          in case of kernel bugs. (precise oopses/stacktraces/warnings)
 
+config OBJTOOL
+       bool
+
 config STACK_VALIDATION
        bool "Compile-time stack metadata validation"
-       depends on HAVE_STACK_VALIDATION
+       depends on HAVE_STACK_VALIDATION && UNWINDER_FRAME_POINTER
+       select OBJTOOL
        default n
        help
-         Add compile-time checks to validate stack metadata, including frame
-         pointers (if CONFIG_FRAME_POINTER is enabled).  This helps ensure
-         that runtime stack traces are more reliable.
-
-         This is also a prerequisite for generation of ORC unwind data, which
-         is needed for CONFIG_UNWINDER_ORC.
+         Validate frame pointer rules at compile-time.  This helps ensure that
+         runtime stack traces are more reliable.
 
          For more information, see
          tools/objtool/Documentation/stack-validation.txt.
 
-config VMLINUX_VALIDATION
+config NOINSTR_VALIDATION
        bool
-       depends on STACK_VALIDATION && DEBUG_ENTRY
+       depends on HAVE_NOINSTR_VALIDATION && DEBUG_ENTRY
+       select OBJTOOL
        default y
 
 config VMLINUX_MAP
@@ -1616,8 +1617,7 @@ config WARN_ALL_UNSEEDED_RANDOM
          so architecture maintainers really need to do what they can
          to get the CRNG seeded sooner after the system is booted.
          However, since users cannot do anything actionable to
-         address this, by default the kernel will issue only a single
-         warning for the first use of unseeded randomness.
+         address this, by default this option is disabled.
 
          Say Y here if you want to receive warnings for all uses of
          unseeded randomness.  This will be of use primarily for
@@ -2035,10 +2035,11 @@ config KCOV
        bool "Code coverage for fuzzing"
        depends on ARCH_HAS_KCOV
        depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
-       depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \
+       depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \
                   GCC_VERSION >= 120000 || CLANG_VERSION >= 130000
        select DEBUG_FS
        select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
+       select OBJTOOL if HAVE_NOINSTR_HACK
        help
          KCOV exposes kernel code coverage information in a form suitable
          for coverage-guided fuzzing (randomized testing).
index de022445fbba563c7c629151ba3bb31879ecd1ae..47a693c458642fa1968b554366fa87b833f1859a 100644 (file)
@@ -187,7 +187,9 @@ config KCSAN_WEAK_MEMORY
        # We can either let objtool nop __tsan_func_{entry,exit}() and builtin
        # atomics instrumentation in .noinstr.text, or use a compiler that can
        # implement __no_kcsan to really remove all instrumentation.
-       depends on STACK_VALIDATION || CC_IS_GCC || CLANG_VERSION >= 140000
+       depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \
+                  CC_IS_GCC || CLANG_VERSION >= 140000
+       select OBJTOOL if HAVE_NOINSTR_HACK
        help
          Enable support for modeling a subset of weak memory, which allows
          detecting a subset of data races due to missing memory barriers.
index f3c57ed51838147370336a7494530a674bcf9792..c4fe15d38b60eae97e1b4c8b63f17a7dd0012499 100644 (file)
@@ -94,7 +94,7 @@ config UBSAN_UNREACHABLE
        bool "Perform checking for unreachable code"
        # objtool already handles unreachable checking and gets angry about
        # seeing UBSan instrumentation located in unreachable places.
-       depends on !STACK_VALIDATION
+       depends on !(OBJTOOL && (STACK_VALIDATION || UNWINDER_ORC || X86_SMAP))
        depends on $(cc-option,-fsanitize=unreachable)
        help
          This option enables -fsanitize=unreachable which checks for control
index 6b9ffc1bd1eed2c8660a3281456441545cb257b6..89fcae891361f000550b451d13825589ea92cc91 100644 (file)
@@ -263,6 +263,8 @@ obj-$(CONFIG_MEMREGION) += memregion.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
+obj-$(CONFIG_POLYNOMIAL) += polynomial.o
+
 # stackdepot.c should not be instrumented or call instrumented functions.
 # Prevent the compiler from calling builtins like memcmp() or bcmp() from this
 # file.
index 45a0584f65417e97e1664644b1a21195fd4b231c..c223a2575b7217f8f61ea422b5a93e642afa9505 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -6,8 +6,7 @@
 
   CONFIG_BUG - emit BUG traps.  Nothing happens without this.
   CONFIG_GENERIC_BUG - enable this code.
-  CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to
-       the containing struct bug_entry for bug_addr and file.
+  CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit relative pointers for bug_addr and file
   CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG
 
   CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable
@@ -53,10 +52,10 @@ extern struct bug_entry __start___bug_table[], __stop___bug_table[];
 
 static inline unsigned long bug_addr(const struct bug_entry *bug)
 {
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-       return bug->bug_addr;
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+       return (unsigned long)&bug->bug_addr_disp + bug->bug_addr_disp;
 #else
-       return (unsigned long)bug + bug->bug_addr_disp;
+       return bug->bug_addr;
 #endif
 }
 
@@ -131,10 +130,10 @@ void bug_get_file_line(struct bug_entry *bug, const char **file,
                       unsigned int *line)
 {
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-       *file = bug->file;
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+       *file = (const char *)&bug->file_disp + bug->file_disp;
 #else
-       *file = (const char *)bug + bug->file_disp;
+       *file = bug->file;
 #endif
        *line = bug->line;
 #else
index 6946f8e204e3950614c979f5e9732ac65546debc..337d797a714163460d2e89e55b6cce443d1b52db 100644 (file)
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generic infrastructure for lifetime debugging of objects.
  *
- * Started by Thomas Gleixner
- *
  * Copyright (C) 2008, Thomas Gleixner <tglx@linutronix.de>
- *
- * For licencing details see kernel-base/COPYING
  */
 
 #define pr_fmt(fmt) "ODEBUG: " fmt
index 06811d866775c0c353a885f119879c7cc992db5d..53f6b9c6e9366200906c8e49a98d2e37161fa51b 100644 (file)
  *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
  */
 #define NET_DIM_PARAMS_NUM_PROFILES 5
-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
+#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
 #define NET_DIM_DEF_PROFILE_CQE 1
 #define NET_DIM_DEF_PROFILE_EQE 1
 
 #define NET_DIM_RX_EQE_PROFILES { \
-       {1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+       {.usec = 1,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 8,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 64,  .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}  \
 }
 
 #define NET_DIM_RX_CQE_PROFILES { \
-       {2,  256},             \
-       {8,  128},             \
-       {16, 64},              \
-       {32, 64},              \
-       {64, 64}               \
+       {.usec = 2,  .pkts = 256,},             \
+       {.usec = 8,  .pkts = 128,},             \
+       {.usec = 16, .pkts = 64,},              \
+       {.usec = 32, .pkts = 64,},              \
+       {.usec = 64, .pkts = 64,}               \
 }
 
 #define NET_DIM_TX_EQE_PROFILES { \
-       {1,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {8,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {32,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {64,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}   \
+       {.usec = 1,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 8,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 32,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 64,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}   \
 }
 
 #define NET_DIM_TX_CQE_PROFILES { \
-       {5,  128},  \
-       {8,  64},  \
-       {16, 32},  \
-       {32, 32},  \
-       {64, 32}   \
+       {.usec = 5,  .pkts = 128,},  \
+       {.usec = 8,  .pkts = 64,},  \
+       {.usec = 16, .pkts = 32,},  \
+       {.usec = 32, .pkts = 32,},  \
+       {.usec = 64, .pkts = 32,}   \
 }
 
 static const struct dim_cq_moder
index 2f17b488d58e11b51996565dfe5e24adccdae3b1..2d5329a421058f959ea565a1ea2fed0104aa8968 100644 (file)
@@ -188,14 +188,18 @@ EXPORT_SYMBOL(irq_poll_init);
 static int irq_poll_cpu_dead(unsigned int cpu)
 {
        /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
+        * If a CPU goes away, splice its entries to the current CPU and
+        * set the POLL softirq bit. The local_bh_disable()/enable() pair
+        * ensures that it is handled. Otherwise the current CPU could
+        * reach idle with the POLL softirq pending.
         */
+       local_bh_disable();
        local_irq_disable();
        list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
                         this_cpu_ptr(&blk_cpu_iopoll));
        __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
        local_irq_enable();
+       local_bh_enable();
 
        return 0;
 }
index af9302141bcf63983b8bac15468174f63ed0edc7..e5c5315da274194e5f8f35b5e9735551c030e5a8 100644 (file)
@@ -76,6 +76,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
        data = kzalloc(sizeof(*ref->data), gfp);
        if (!data) {
                free_percpu((void __percpu *)ref->percpu_count_ptr);
+               ref->percpu_count_ptr = 0;
                return -ENOMEM;
        }
 
diff --git a/lib/polynomial.c b/lib/polynomial.c
new file mode 100644 (file)
index 0000000..66d3834
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic polynomial calculation using integer coefficients.
+ *
+ * Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+ *
+ * Authors:
+ *   Maxim Kaurkin <maxim.kaurkin@baikalelectronics.ru>
+ *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/polynomial.h>
+
+/*
+ * Originally this was part of drivers/hwmon/bt1-pvt.c.
+ * There the following conversion is used and should serve as an example here:
+ *
+ * The original translation formulae of the temperature (in degrees of Celsius)
+ * to PVT data and vice-versa are following:
+ *
+ * N = 1.8322e-8*(T^4) + 2.343e-5*(T^3) + 8.7018e-3*(T^2) + 3.9269*(T^1) +
+ *     1.7204e2
+ * T = -1.6743e-11*(N^4) + 8.1542e-8*(N^3) + -1.8201e-4*(N^2) +
+ *     3.1020e-1*(N^1) - 4.838e1
+ *
+ * where T = [-48.380, 147.438]C and N = [0, 1023].
+ *
+ * They must be accordingly altered to be suitable for the integer arithmetics.
+ * The technique is called 'factor redistribution', which just makes sure the
+ * multiplications and divisions are made so to have a result of the operations
+ * within the integer numbers limit. In addition we need to translate the
+ * formulae to accept millidegrees of Celsius. Here what they look like after
+ * the alterations:
+ *
+ * N = (18322e-20*(T^4) + 2343e-13*(T^3) + 87018e-9*(T^2) + 39269e-3*T +
+ *     17204e2) / 1e4
+ * T = -16743e-12*(D^4) + 81542e-9*(D^3) - 182010e-6*(D^2) + 310200e-3*D -
+ *     48380
+ * where T = [-48380, 147438] mC and N = [0, 1023].
+ *
+ * static const struct polynomial poly_temp_to_N = {
+ *         .total_divider = 10000,
+ *         .terms = {
+ *                 {4, 18322, 10000, 10000},
+ *                 {3, 2343, 10000, 10},
+ *                 {2, 87018, 10000, 10},
+ *                 {1, 39269, 1000, 1},
+ *                 {0, 1720400, 1, 1}
+ *         }
+ * };
+ *
+ * static const struct polynomial poly_N_to_temp = {
+ *         .total_divider = 1,
+ *         .terms = {
+ *                 {4, -16743, 1000, 1},
+ *                 {3, 81542, 1000, 1},
+ *                 {2, -182010, 1000, 1},
+ *                 {1, 310200, 1000, 1},
+ *                 {0, -48380, 1, 1}
+ *         }
+ * };
+ */
+
+/**
+ * polynomial_calc - calculate a polynomial using integer arithmetic
+ *
+ * @poly: pointer to the descriptor of the polynomial
+ * @data: input value of the polynimal
+ *
+ * Calculate the result of a polynomial using only integer arithmetic. For
+ * this to work without too much loss of precision the coefficients has to
+ * be altered. This is called factor redistribution.
+ *
+ * Returns the result of the polynomial calculation.
+ */
+long polynomial_calc(const struct polynomial *poly, long data)
+{
+       const struct polynomial_term *term = poly->terms;
+       long total_divider = poly->total_divider ?: 1;
+       long tmp, ret = 0;
+       int deg;
+
+       /*
+        * Here is the polynomial calculation function, which performs the
+        * redistributed terms calculations. It's pretty straightforward.
+        * We walk over each degree term up to the free one, and perform
+        * the redistributed multiplication of the term coefficient, its
+        * divider (as for the rationale fraction representation), data
+        * power and the rational fraction divider leftover. Then all of
+        * this is collected in a total sum variable, which value is
+        * normalized by the total divider before being returned.
+        */
+       do {
+               tmp = term->coef;
+               for (deg = 0; deg < term->deg; ++deg)
+                       tmp = mult_frac(tmp, data, term->divider);
+               ret += tmp / term->divider_leftover;
+       } while ((term++)->deg);
+
+       return ret / total_divider;
+}
+EXPORT_SYMBOL_GPL(polynomial_calc);
+
+MODULE_DESCRIPTION("Generic polynomial calculations");
+MODULE_LICENSE("GPL");
index 976632003ec6589f926e804ed2fba1e8b48c1a4c..d5d9029362cbb3e6d44e98c41ce16c4f01360429 100644 (file)
@@ -245,25 +245,13 @@ static struct prandom_test2 {
        {  407983964U, 921U,  728767059U },
 };
 
-static u32 __extract_hwseed(void)
-{
-       unsigned int val = 0;
-
-       (void)(arch_get_random_seed_int(&val) ||
-              arch_get_random_int(&val));
-
-       return val;
-}
-
-static void prandom_seed_early(struct rnd_state *state, u32 seed,
-                              bool mix_with_hwseed)
+static void prandom_state_selftest_seed(struct rnd_state *state, u32 seed)
 {
 #define LCG(x)  ((x) * 69069U) /* super-duper LCG */
-#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
-       state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
-       state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
-       state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
-       state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
+       state->s1 = __seed(LCG(seed),        2U);
+       state->s2 = __seed(LCG(state->s1),   8U);
+       state->s3 = __seed(LCG(state->s2),  16U);
+       state->s4 = __seed(LCG(state->s3), 128U);
 }
 
 static int __init prandom_state_selftest(void)
@@ -274,7 +262,7 @@ static int __init prandom_state_selftest(void)
        for (i = 0; i < ARRAY_SIZE(test1); i++) {
                struct rnd_state state;
 
-               prandom_seed_early(&state, test1[i].seed, false);
+               prandom_state_selftest_seed(&state, test1[i].seed);
                prandom_warmup(&state);
 
                if (test1[i].result != prandom_u32_state(&state))
@@ -289,7 +277,7 @@ static int __init prandom_state_selftest(void)
        for (i = 0; i < ARRAY_SIZE(test2); i++) {
                struct rnd_state state;
 
-               prandom_seed_early(&state, test2[i].seed, false);
+               prandom_state_selftest_seed(&state, test2[i].seed);
                prandom_warmup(&state);
 
                for (j = 0; j < test2[i].iteration - 1; j++)
@@ -310,324 +298,3 @@ static int __init prandom_state_selftest(void)
 }
 core_initcall(prandom_state_selftest);
 #endif
-
-/*
- * The prandom_u32() implementation is now completely separate from the
- * prandom_state() functions, which are retained (for now) for compatibility.
- *
- * Because of (ab)use in the networking code for choosing random TCP/UDP port
- * numbers, which open DoS possibilities if guessable, we want something
- * stronger than a standard PRNG.  But the performance requirements of
- * the network code do not allow robust crypto for this application.
- *
- * So this is a homebrew Junior Spaceman implementation, based on the
- * lowest-latency trustworthy crypto primitive available, SipHash.
- * (The authors of SipHash have not been consulted about this abuse of
- * their work.)
- *
- * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
- * one word of output.  This abbreviated version uses 2 rounds per word
- * of output.
- */
-
-struct siprand_state {
-       unsigned long v0;
-       unsigned long v1;
-       unsigned long v2;
-       unsigned long v3;
-};
-
-static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
-DEFINE_PER_CPU(unsigned long, net_rand_noise);
-EXPORT_PER_CPU_SYMBOL(net_rand_noise);
-
-/*
- * This is the core CPRNG function.  As "pseudorandom", this is not used
- * for truly valuable things, just intended to be a PITA to guess.
- * For maximum speed, we do just two SipHash rounds per word.  This is
- * the same rate as 4 rounds per 64 bits that SipHash normally uses,
- * so hopefully it's reasonably secure.
- *
- * There are two changes from the official SipHash finalization:
- * - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
- *   they are there only to make the output rounds distinct from the input
- *   rounds, and this application has no input rounds.
- * - Rather than returning v0^v1^v2^v3, return v1+v3.
- *   If you look at the SipHash round, the last operation on v3 is
- *   "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
- *   Likewise "v1 ^= v2".  (The rotate of v2 makes a difference, but
- *   it still cancels out half of the bits in v2 for no benefit.)
- *   Second, since the last combining operation was xor, continue the
- *   pattern of alternating xor/add for a tiny bit of extra non-linearity.
- */
-static inline u32 siprand_u32(struct siprand_state *s)
-{
-       unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
-       unsigned long n = raw_cpu_read(net_rand_noise);
-
-       v3 ^= n;
-       PRND_SIPROUND(v0, v1, v2, v3);
-       PRND_SIPROUND(v0, v1, v2, v3);
-       v0 ^= n;
-       s->v0 = v0;  s->v1 = v1;  s->v2 = v2;  s->v3 = v3;
-       return v1 + v3;
-}
-
-
-/**
- *     prandom_u32 - pseudo random number generator
- *
- *     A 32 bit pseudo-random number is generated using a fast
- *     algorithm suitable for simulation. This algorithm is NOT
- *     considered safe for cryptographic use.
- */
-u32 prandom_u32(void)
-{
-       struct siprand_state *state = get_cpu_ptr(&net_rand_state);
-       u32 res = siprand_u32(state);
-
-       put_cpu_ptr(&net_rand_state);
-       return res;
-}
-EXPORT_SYMBOL(prandom_u32);
-
-/**
- *     prandom_bytes - get the requested number of pseudo-random bytes
- *     @buf: where to copy the pseudo-random bytes to
- *     @bytes: the requested number of bytes
- */
-void prandom_bytes(void *buf, size_t bytes)
-{
-       struct siprand_state *state = get_cpu_ptr(&net_rand_state);
-       u8 *ptr = buf;
-
-       while (bytes >= sizeof(u32)) {
-               put_unaligned(siprand_u32(state), (u32 *)ptr);
-               ptr += sizeof(u32);
-               bytes -= sizeof(u32);
-       }
-
-       if (bytes > 0) {
-               u32 rem = siprand_u32(state);
-
-               do {
-                       *ptr++ = (u8)rem;
-                       rem >>= BITS_PER_BYTE;
-               } while (--bytes > 0);
-       }
-       put_cpu_ptr(&net_rand_state);
-}
-EXPORT_SYMBOL(prandom_bytes);
-
-/**
- *     prandom_seed - add entropy to pseudo random number generator
- *     @entropy: entropy value
- *
- *     Add some additional seed material to the prandom pool.
- *     The "entropy" is actually our IP address (the only caller is
- *     the network code), not for unpredictability, but to ensure that
- *     different machines are initialized differently.
- */
-void prandom_seed(u32 entropy)
-{
-       int i;
-
-       add_device_randomness(&entropy, sizeof(entropy));
-
-       for_each_possible_cpu(i) {
-               struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
-               unsigned long v0 = state->v0, v1 = state->v1;
-               unsigned long v2 = state->v2, v3 = state->v3;
-
-               do {
-                       v3 ^= entropy;
-                       PRND_SIPROUND(v0, v1, v2, v3);
-                       PRND_SIPROUND(v0, v1, v2, v3);
-                       v0 ^= entropy;
-               } while (unlikely(!v0 || !v1 || !v2 || !v3));
-
-               WRITE_ONCE(state->v0, v0);
-               WRITE_ONCE(state->v1, v1);
-               WRITE_ONCE(state->v2, v2);
-               WRITE_ONCE(state->v3, v3);
-       }
-}
-EXPORT_SYMBOL(prandom_seed);
-
-/*
- *     Generate some initially weak seeding values to allow
- *     the prandom_u32() engine to be started.
- */
-static int __init prandom_init_early(void)
-{
-       int i;
-       unsigned long v0, v1, v2, v3;
-
-       if (!arch_get_random_long(&v0))
-               v0 = jiffies;
-       if (!arch_get_random_long(&v1))
-               v1 = random_get_entropy();
-       v2 = v0 ^ PRND_K0;
-       v3 = v1 ^ PRND_K1;
-
-       for_each_possible_cpu(i) {
-               struct siprand_state *state;
-
-               v3 ^= i;
-               PRND_SIPROUND(v0, v1, v2, v3);
-               PRND_SIPROUND(v0, v1, v2, v3);
-               v0 ^= i;
-
-               state = per_cpu_ptr(&net_rand_state, i);
-               state->v0 = v0;  state->v1 = v1;
-               state->v2 = v2;  state->v3 = v3;
-       }
-
-       return 0;
-}
-core_initcall(prandom_init_early);
-
-
-/* Stronger reseeding when available, and periodically thereafter. */
-static void prandom_reseed(struct timer_list *unused);
-
-static DEFINE_TIMER(seed_timer, prandom_reseed);
-
-static void prandom_reseed(struct timer_list *unused)
-{
-       unsigned long expires;
-       int i;
-
-       /*
-        * Reinitialize each CPU's PRNG with 128 bits of key.
-        * No locking on the CPUs, but then somewhat random results are,
-        * well, expected.
-        */
-       for_each_possible_cpu(i) {
-               struct siprand_state *state;
-               unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
-               unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
-#if BITS_PER_LONG == 32
-               int j;
-
-               /*
-                * On 32-bit machines, hash in two extra words to
-                * approximate 128-bit key length.  Not that the hash
-                * has that much security, but this prevents a trivial
-                * 64-bit brute force.
-                */
-               for (j = 0; j < 2; j++) {
-                       unsigned long m = get_random_long();
-
-                       v3 ^= m;
-                       PRND_SIPROUND(v0, v1, v2, v3);
-                       PRND_SIPROUND(v0, v1, v2, v3);
-                       v0 ^= m;
-               }
-#endif
-               /*
-                * Probably impossible in practice, but there is a
-                * theoretical risk that a race between this reseeding
-                * and the target CPU writing its state back could
-                * create the all-zero SipHash fixed point.
-                *
-                * To ensure that never happens, ensure the state
-                * we write contains no zero words.
-                */
-               state = per_cpu_ptr(&net_rand_state, i);
-               WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
-               WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
-               WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
-               WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
-       }
-
-       /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-       expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
-       mod_timer(&seed_timer, expires);
-}
-
-/*
- * The random ready callback can be called from almost any interrupt.
- * To avoid worrying about whether it's safe to delay that interrupt
- * long enough to seed all CPUs, just schedule an immediate timer event.
- */
-static int prandom_timer_start(struct notifier_block *nb,
-                              unsigned long action, void *data)
-{
-       mod_timer(&seed_timer, jiffies);
-       return 0;
-}
-
-#ifdef CONFIG_RANDOM32_SELFTEST
-/* Principle: True 32-bit random numbers will all have 16 differing bits on
- * average. For each 32-bit number, there are 601M numbers differing by 16
- * bits, and 89% of the numbers differ by at least 12 bits. Note that more
- * than 16 differing bits also implies a correlation with inverted bits. Thus
- * we take 1024 random numbers and compare each of them to the other ones,
- * counting the deviation of correlated bits to 16. Constants report 32,
- * counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the
- * u32 total, TEST_SIZE may be as large as 4096 samples.
- */
-#define TEST_SIZE 1024
-static int __init prandom32_state_selftest(void)
-{
-       unsigned int x, y, bits, samples;
-       u32 xor, flip;
-       u32 total;
-       u32 *data;
-
-       data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL);
-       if (!data)
-               return 0;
-
-       for (samples = 0; samples < TEST_SIZE; samples++)
-               data[samples] = prandom_u32();
-
-       flip = total = 0;
-       for (x = 0; x < samples; x++) {
-               for (y = 0; y < samples; y++) {
-                       if (x == y)
-                               continue;
-                       xor = data[x] ^ data[y];
-                       flip |= xor;
-                       bits = hweight32(xor);
-                       total += (bits - 16) * (bits - 16);
-               }
-       }
-
-       /* We'll return the average deviation as 2*sqrt(corr/samples), which
-        * is also sqrt(4*corr/samples) which provides a better resolution.
-        */
-       bits = int_sqrt(total / (samples * (samples - 1)) * 4);
-       if (bits > 6)
-               pr_warn("prandom32: self test failed (at least %u bits"
-                       " correlated, fixed_mask=%#x fixed_value=%#x\n",
-                       bits, ~flip, data[0] & ~flip);
-       else
-               pr_info("prandom32: self test passed (less than %u bits"
-                       " correlated)\n",
-                       bits+1);
-       kfree(data);
-       return 0;
-}
-core_initcall(prandom32_state_selftest);
-#endif /*  CONFIG_RANDOM32_SELFTEST */
-
-/*
- * Start periodic full reseeding as soon as strong
- * random numbers are available.
- */
-static int __init prandom_init_late(void)
-{
-       static struct notifier_block random_ready = {
-               .notifier_call = prandom_timer_start
-       };
-       int ret = register_random_ready_notifier(&random_ready);
-
-       if (ret == -EALREADY) {
-               prandom_timer_start(&random_ready, 0, NULL);
-               ret = 0;
-       }
-       return ret;
-}
-late_initcall(prandom_init_late);
index 72b9068ab57bffbb308e1c449218aa205f5a8ffd..71d315a6ad623c99c78d0dfc99e39e198c67b1d6 100644 (file)
 #include <asm/word-at-a-time.h>
 #endif
 
-#define SIPROUND \
-       do { \
-       v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
-       v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
-       v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
-       v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
-       } while (0)
+#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
 
 #define PREAMBLE(len) \
-       u64 v0 = 0x736f6d6570736575ULL; \
-       u64 v1 = 0x646f72616e646f6dULL; \
-       u64 v2 = 0x6c7967656e657261ULL; \
-       u64 v3 = 0x7465646279746573ULL; \
+       u64 v0 = SIPHASH_CONST_0; \
+       u64 v1 = SIPHASH_CONST_1; \
+       u64 v2 = SIPHASH_CONST_2; \
+       u64 v3 = SIPHASH_CONST_3; \
        u64 b = ((u64)(len)) << 56; \
        v3 ^= key->key[1]; \
        v2 ^= key->key[0]; \
@@ -389,19 +383,13 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
 }
 EXPORT_SYMBOL(hsiphash_4u32);
 #else
-#define HSIPROUND \
-       do { \
-       v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
-       v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
-       v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
-       v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
-       } while (0)
+#define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3)
 
 #define HPREAMBLE(len) \
-       u32 v0 = 0; \
-       u32 v1 = 0; \
-       u32 v2 = 0x6c796765U; \
-       u32 v3 = 0x74656462U; \
+       u32 v0 = HSIPHASH_CONST_0; \
+       u32 v1 = HSIPHASH_CONST_1; \
+       u32 v2 = HSIPHASH_CONST_2; \
+       u32 v3 = HSIPHASH_CONST_3; \
        u32 b = ((u32)(len)) << 24; \
        v3 ^= key->key[1]; \
        v2 ^= key->key[0]; \
index 40d26a07a13319a9e09f07abcd9202723a0d28a3..fb77f7bfd126b5fd7498ec6e979d6eefd9f07b5a 100644 (file)
@@ -750,61 +750,38 @@ static int __init debug_boot_weak_hash_enable(char *str)
 }
 early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
 
-static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
-static siphash_key_t ptr_key __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(filled_random_ptr_key);
 
 static void enable_ptr_key_workfn(struct work_struct *work)
 {
-       get_random_bytes(&ptr_key, sizeof(ptr_key));
-       /* Needs to run from preemptible context */
-       static_branch_disable(&not_filled_random_ptr_key);
+       static_branch_enable(&filled_random_ptr_key);
 }
 
-static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
-
-static int fill_random_ptr_key(struct notifier_block *nb,
-                              unsigned long action, void *data)
-{
-       /* This may be in an interrupt handler. */
-       queue_work(system_unbound_wq, &enable_ptr_key_work);
-       return 0;
-}
-
-static struct notifier_block random_ready = {
-       .notifier_call = fill_random_ptr_key
-};
-
-static int __init initialize_ptr_random(void)
-{
-       int key_size = sizeof(ptr_key);
-       int ret;
-
-       /* Use hw RNG if available. */
-       if (get_random_bytes_arch(&ptr_key, key_size) == key_size) {
-               static_branch_disable(&not_filled_random_ptr_key);
-               return 0;
-       }
-
-       ret = register_random_ready_notifier(&random_ready);
-       if (!ret) {
-               return 0;
-       } else if (ret == -EALREADY) {
-               /* This is in preemptible context */
-               enable_ptr_key_workfn(&enable_ptr_key_work);
-               return 0;
-       }
-
-       return ret;
-}
-early_initcall(initialize_ptr_random);
-
 /* Maps a pointer to a 32 bit unique identifier. */
 static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
 {
+       static siphash_key_t ptr_key __read_mostly;
        unsigned long hashval;
 
-       if (static_branch_unlikely(&not_filled_random_ptr_key))
-               return -EAGAIN;
+       if (!static_branch_likely(&filled_random_ptr_key)) {
+               static bool filled = false;
+               static DEFINE_SPINLOCK(filling);
+               static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+               unsigned long flags;
+
+               if (!system_unbound_wq ||
+                   (!rng_is_initialized() && !rng_has_arch_random()) ||
+                   !spin_trylock_irqsave(&filling, flags))
+                       return -EAGAIN;
+
+               if (!filled) {
+                       get_random_bytes(&ptr_key, sizeof(ptr_key));
+                       queue_work(system_unbound_wq, &enable_ptr_key_work);
+                       filled = true;
+               }
+               spin_unlock_irqrestore(&filling, flags);
+       }
+
 
 #ifdef CONFIG_64BIT
        hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
index 7176af65b103a4500fda68dc62028fb37794d3a0..ff60bd7d74e0729b81f80ecfad4e9296e1e08079 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <linux/blkdev.h>
 #include <linux/wait.h>
 #include <linux/rbtree.h>
 #include <linux/kthread.h>
@@ -390,7 +391,6 @@ static void cgwb_release_workfn(struct work_struct *work)
 {
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
                                                release_work);
-       struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
        struct backing_dev_info *bdi = wb->bdi;
 
        mutex_lock(&wb->bdi->cgwb_release_mutex);
@@ -401,7 +401,7 @@ static void cgwb_release_workfn(struct work_struct *work)
        mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
        /* triggers blkg destruction if no online users left */
-       blkcg_unpin_online(blkcg);
+       blkcg_unpin_online(wb->blkcg_css);
 
        fprop_local_destroy_percpu(&wb->memcg_completions);
 
@@ -446,7 +446,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
 {
        struct mem_cgroup *memcg;
        struct cgroup_subsys_state *blkcg_css;
-       struct blkcg *blkcg;
        struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
        struct bdi_writeback *wb;
        unsigned long flags;
@@ -454,9 +453,8 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
        memcg = mem_cgroup_from_css(memcg_css);
        blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
-       blkcg = css_to_blkcg(blkcg_css);
        memcg_cgwb_list = &memcg->cgwb_list;
-       blkcg_cgwb_list = &blkcg->cgwb_list;
+       blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css);
 
        /* look up again under lock and discard on blkcg mismatch */
        spin_lock_irqsave(&cgwb_lock, flags);
@@ -511,7 +509,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
                        list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
                        list_add(&wb->memcg_node, memcg_cgwb_list);
                        list_add(&wb->blkcg_node, blkcg_cgwb_list);
-                       blkcg_pin_online(blkcg);
+                       blkcg_pin_online(blkcg_css);
                        css_get(memcg_css);
                        css_get(blkcg_css);
                }
@@ -724,18 +722,19 @@ void wb_memcg_offline(struct mem_cgroup *memcg)
 
 /**
  * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
- * @blkcg: blkcg being offlined
+ * @css: blkcg being offlined
  *
  * Also prevents creation of any new wb's associated with @blkcg.
  */
-void wb_blkcg_offline(struct blkcg *blkcg)
+void wb_blkcg_offline(struct cgroup_subsys_state *css)
 {
        struct bdi_writeback *wb, *next;
+       struct list_head *list = blkcg_get_cgwb_list(css);
 
        spin_lock_irq(&cgwb_lock);
-       list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
+       list_for_each_entry_safe(wb, next, list, blkcg_node)
                cgwb_kill(wb);
-       blkcg->cgwb_list.next = NULL;   /* prevent new wb's */
+       list->next = NULL;      /* prevent new wb's */
        spin_unlock_irq(&cgwb_lock);
 }
 
index f598a037eb04f75799cb7adba885c96d1a2e1ec3..501bc150792c8b8856cdea8e0d4414553d898979 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1648,6 +1648,35 @@ out:
 }
 EXPORT_SYMBOL(fault_in_writeable);
 
+/**
+ * fault_in_subpage_writeable - fault in an address range for writing
+ * @uaddr: start of address range
+ * @size: size of address range
+ *
+ * Fault in a user address range for writing while checking for permissions at
+ * sub-page granularity (e.g. arm64 MTE). This function should be used when
+ * the caller cannot guarantee forward progress of a copy_to_user() loop.
+ *
+ * Returns the number of bytes not faulted in (like copy_to_user() and
+ * copy_from_user()).
+ */
+size_t fault_in_subpage_writeable(char __user *uaddr, size_t size)
+{
+       size_t faulted_in;
+
+       /*
+        * Attempt faulting in at page granularity first for page table
+        * permission checking. The arch-specific probe_subpage_writeable()
+        * functions may not check for this.
+        */
+       faulted_in = size - fault_in_writeable(uaddr, size);
+       if (faulted_in)
+               faulted_in -= probe_subpage_writeable(uaddr, faulted_in);
+
+       return size - faulted_in;
+}
+EXPORT_SYMBOL(fault_in_subpage_writeable);
+
 /*
  * fault_in_safe_writeable - fault in an address range for writing
  * @uaddr: start of address range
index c468fee595ffa49952952da40d7a2d70703ac0cc..910a138e9859e9a6c02e7bf234c5431c8e3cebb5 100644 (file)
@@ -2495,11 +2495,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        struct address_space *mapping = NULL;
        int extra_pins, ret;
        pgoff_t end;
+       bool is_hzp;
 
-       VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
        VM_BUG_ON_PAGE(!PageLocked(head), head);
        VM_BUG_ON_PAGE(!PageCompound(head), head);
 
+       is_hzp = is_huge_zero_page(head);
+       VM_WARN_ON_ONCE_PAGE(is_hzp, head);
+       if (is_hzp)
+               return -EBUSY;
+
        if (PageWriteback(head))
                return -EBUSY;
 
index 9b2b5f56f4aeffa42f2fa2d85c46901768128234..11a954763be9cd04d16707e0817b9e2b24ad1d58 100644 (file)
@@ -621,6 +621,16 @@ static bool __init kfence_init_pool_early(void)
         * fails for the first page, and therefore expect addr==__kfence_pool in
         * most failure cases.
         */
+       for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) {
+               struct slab *slab = virt_to_slab(p);
+
+               if (!slab)
+                       continue;
+#ifdef CONFIG_MEMCG
+               slab->memcg_data = 0;
+#endif
+               __folio_clear_slab(slab_folio(slab));
+       }
        memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
        __kfence_pool = NULL;
        return false;
index 27760c19bad754d6f3281f50885d824bdf6c184a..d4a4adcca01f3f77721fedd241a80de55e1b91ee 100644 (file)
@@ -1274,7 +1274,7 @@ try_again:
        }
 out:
        if (ret == -EIO)
-               dump_page(p, "hwpoison: unhandlable page");
+               pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p));
 
        return ret;
 }
@@ -1860,19 +1860,6 @@ try_again:
        }
 
        if (PageTransHuge(hpage)) {
-               /*
-                * Bail out before SetPageHasHWPoisoned() if hpage is
-                * huge_zero_page, although PG_has_hwpoisoned is not
-                * checked in set_huge_zero_page().
-                *
-                * TODO: Handle memory failure of huge_zero_page thoroughly.
-                */
-               if (is_huge_zero_page(hpage)) {
-                       action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
-                       res = -EBUSY;
-                       goto unlock_mutex;
-               }
-
                /*
                 * The flag must be set after the refcount is bumped
                 * otherwise it may race with THP split.
index 303d3290b938667699e0ab61cbd8e6ac81c286eb..0b93fac76851102272c697500f6ceca14888753d 100644 (file)
@@ -947,7 +947,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
                return -EINTR;
        vma = vma_lookup(mm, addr);
        if (!vma) {
-               ret = EFAULT;
+               ret = -EFAULT;
                goto out;
        }
 
index 89fbf3cae30f7cf58a0a1b83970ed8b17d627507..3fbdab6a940e783020057596e42af8a4f836ba92 100644 (file)
@@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous)
         * attempt to access it in the page fault retry time check.
         */
        if (synchronous) {
-               bio->bi_opf |= REQ_POLLED;
                get_task_struct(current);
                bio->bi_private = current;
        }
@@ -372,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
                if (!READ_ONCE(bio->bi_private))
                        break;
 
-               if (!bio_poll(bio, NULL, 0))
-                       blk_io_schedule();
+               blk_io_schedule();
        }
        __set_current_state(TASK_RUNNING);
        bio_put(bio);
index 4a60cdb64262af49fbc086c6fef0f33947786b8b..26bf74a6b2fe6ae67b42ce931eb264a0fca648a3 100644 (file)
  * ->readpage() which may be less efficient.
  */
 
+#include <linux/blkdev.h>
 #include <linux/kernel.h>
 #include <linux/dax.h>
 #include <linux/gfp.h>
index 63c61f8b261188c34d26b276e5e81cd1a07eb878..981a6e85c88e773b9b1a11a27b5265b17819928e 100644 (file)
@@ -6,6 +6,7 @@
  *  Swap reorganised 29.12.95, Stephen Tweedie
  */
 
+#include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
@@ -179,7 +180,7 @@ static int discard_swap(struct swap_info_struct *si)
        nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
        if (nr_blocks) {
                err = blkdev_issue_discard(si->bdev, start_block,
-                               nr_blocks, GFP_KERNEL, 0);
+                               nr_blocks, GFP_KERNEL);
                if (err)
                        return err;
                cond_resched();
@@ -190,7 +191,7 @@ static int discard_swap(struct swap_info_struct *si)
                nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
 
                err = blkdev_issue_discard(si->bdev, start_block,
-                               nr_blocks, GFP_KERNEL, 0);
+                               nr_blocks, GFP_KERNEL);
                if (err)
                        break;
 
@@ -254,7 +255,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
                start_block <<= PAGE_SHIFT - 9;
                nr_blocks <<= PAGE_SHIFT - 9;
                if (blkdev_issue_discard(si->bdev, start_block,
-                                       nr_blocks, GFP_NOIO, 0))
+                                       nr_blocks, GFP_NOIO))
                        break;
 
                se = next_se(se);
@@ -2466,7 +2467,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);
 
-       if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev)))
+       if (!p->bdev || !bdev_nonrot(p->bdev))
                atomic_dec(&nr_rotate_swap);
 
        mutex_lock(&swapon_mutex);
@@ -2761,7 +2762,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                 * write only restriction.  Hence zoned block devices are not
                 * suitable for swapping.  Disallow them here.
                 */
-               if (blk_queue_is_zoned(p->bdev->bd_disk->queue))
+               if (bdev_is_zoned(p->bdev))
                        return -EINVAL;
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
@@ -2957,20 +2958,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
        return nr_extents;
 }
 
-/*
- * Helper to sys_swapon determining if a given swap
- * backing device queue supports DISCARD operations.
- */
-static bool swap_discardable(struct swap_info_struct *si)
-{
-       struct request_queue *q = bdev_get_queue(si->bdev);
-
-       if (!blk_queue_discard(q))
-               return false;
-
-       return true;
-}
-
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
        struct swap_info_struct *p;
@@ -3065,13 +3052,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                goto bad_swap_unlock_inode;
        }
 
-       if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue))
+       if (p->bdev && bdev_stable_writes(p->bdev))
                p->flags |= SWP_STABLE_WRITES;
 
        if (p->bdev && p->bdev->bd_disk->fops->rw_page)
                p->flags |= SWP_SYNCHRONOUS_IO;
 
-       if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+       if (p->bdev && bdev_nonrot(p->bdev)) {
                int cpu;
                unsigned long ci, nr_cluster;
 
@@ -3132,7 +3119,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                                         sizeof(long),
                                         GFP_KERNEL);
 
-       if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
+       if ((swap_flags & SWAP_FLAG_DISCARD) &&
+           p->bdev && bdev_max_discard_sectors(p->bdev)) {
                /*
                 * When discard is enabled for swap with no particular
                 * policy flagged, we set all swap discard flags here in
index 2c235d5c2364ce92e03d0757c294e2bf728154a2..baeacc735b83f98cab991cbdc1fdecd9d213fcd9 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
+#include <linux/vmalloc.h>
 #include <linux/atomic.h>
 #include <linux/jump_label.h>
 #include <asm/sections.h>
@@ -157,91 +158,47 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
                usercopy_abort("null address", NULL, to_user, ptr, n);
 }
 
-/* Checks for allocs that are marked in some way as spanning multiple pages. */
-static inline void check_page_span(const void *ptr, unsigned long n,
-                                  struct page *page, bool to_user)
+static inline void check_heap_object(const void *ptr, unsigned long n,
+                                    bool to_user)
 {
-#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
-       const void *end = ptr + n - 1;
-       struct page *endpage;
-       bool is_reserved, is_cma;
+       struct folio *folio;
 
-       /*
-        * Sometimes the kernel data regions are not marked Reserved (see
-        * check below). And sometimes [_sdata,_edata) does not cover
-        * rodata and/or bss, so check each range explicitly.
-        */
+       if (is_kmap_addr(ptr)) {
+               unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1);
 
-       /* Allow reads of kernel rodata region (if not marked as Reserved). */
-       if (ptr >= (const void *)__start_rodata &&
-           end <= (const void *)__end_rodata) {
-               if (!to_user)
-                       usercopy_abort("rodata", NULL, to_user, 0, n);
+               if ((unsigned long)ptr + n - 1 > page_end)
+                       usercopy_abort("kmap", NULL, to_user,
+                                       offset_in_page(ptr), n);
                return;
        }
 
-       /* Allow kernel data region (if not marked as Reserved). */
-       if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
-               return;
+       if (is_vmalloc_addr(ptr)) {
+               struct vm_struct *area = find_vm_area(ptr);
+               unsigned long offset;
 
-       /* Allow kernel bss region (if not marked as Reserved). */
-       if (ptr >= (const void *)__bss_start &&
-           end <= (const void *)__bss_stop)
-               return;
-
-       /* Is the object wholly within one base page? */
-       if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
-                  ((unsigned long)end & (unsigned long)PAGE_MASK)))
-               return;
+               if (!area) {
+                       usercopy_abort("vmalloc", "no area", to_user, 0, n);
+                       return;
+               }
 
-       /* Allow if fully inside the same compound (__GFP_COMP) page. */
-       endpage = virt_to_head_page(end);
-       if (likely(endpage == page))
+               offset = ptr - area->addr;
+               if (offset + n > get_vm_area_size(area))
+                       usercopy_abort("vmalloc", NULL, to_user, offset, n);
                return;
-
-       /*
-        * Reject if range is entirely either Reserved (i.e. special or
-        * device memory), or CMA. Otherwise, reject since the object spans
-        * several independently allocated pages.
-        */
-       is_reserved = PageReserved(page);
-       is_cma = is_migrate_cma_page(page);
-       if (!is_reserved && !is_cma)
-               usercopy_abort("spans multiple pages", NULL, to_user, 0, n);
-
-       for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
-               page = virt_to_head_page(ptr);
-               if (is_reserved && !PageReserved(page))
-                       usercopy_abort("spans Reserved and non-Reserved pages",
-                                      NULL, to_user, 0, n);
-               if (is_cma && !is_migrate_cma_page(page))
-                       usercopy_abort("spans CMA and non-CMA pages", NULL,
-                                      to_user, 0, n);
        }
-#endif
-}
-
-static inline void check_heap_object(const void *ptr, unsigned long n,
-                                    bool to_user)
-{
-       struct folio *folio;
 
        if (!virt_addr_valid(ptr))
                return;
 
-       /*
-        * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
-        * highmem page or fallback to virt_to_page(). The following
-        * is effectively a highmem-aware virt_to_slab().
-        */
-       folio = page_folio(kmap_to_page((void *)ptr));
+       folio = virt_to_folio(ptr);
 
        if (folio_test_slab(folio)) {
                /* Check slab allocator for flags and size. */
                __check_heap_object(ptr, n, folio_slab(folio), to_user);
-       } else {
-               /* Verify object does not incorrectly span multiple pages. */
-               check_page_span(ptr, n, folio_page(folio, 0), to_user);
+       } else if (folio_test_large(folio)) {
+               unsigned long offset = ptr - folio_address(folio);
+               if (offset + n > folio_size(folio))
+                       usercopy_abort("page alloc", NULL, to_user, offset, n);
        }
 }
 
index 3492a9e81aa3a28d73c6008ac601a32c1011aa40..ac63e5ca8b2110e99046aa1fe234c73f6bcfeb62 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -343,6 +343,38 @@ unsigned long randomize_stack_top(unsigned long stack_top)
 #endif
 }
 
+/**
+ * randomize_page - Generate a random, page aligned address
+ * @start:     The smallest acceptable address the caller will take.
+ * @range:     The size of the area, starting at @start, within which the
+ *             random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
+ *
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned.  We now align it regardless.
+ *
+ * Return: A page aligned address within [start, start + range).  On error,
+ * @start is returned.
+ */
+unsigned long randomize_page(unsigned long start, unsigned long range)
+{
+       if (!PAGE_ALIGNED(start)) {
+               range -= PAGE_ALIGN(start) - start;
+               start = PAGE_ALIGN(start);
+       }
+
+       if (start > ULONG_MAX - range)
+               range = ULONG_MAX - start;
+
+       range >>= PAGE_SHIFT;
+
+       if (range == 0)
+               return start;
+
+       return start + (get_random_long() % range << PAGE_SHIFT);
+}
+
 #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
index 0899a729a23f474c313e2a9e6f98f1ba875a6ada..c120c7c6d25fc13fe8e2a3724ffda4539b72d5c3 100644 (file)
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                goto free_skb;
        }
 
+       /* GRO might have added fragments to the fragment list instead of
+        * frags[]. But this is not handled by skb_split and must be
+        * linearized to avoid incorrect length information after all
+        * batman-adv fragments were created and submitted to the
+        * hard-interface
+        */
+       if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+               ret = -ENOMEM;
+               goto free_skb;
+       }
+
        /* Create one header to be copied to all fragments */
        frag_header.packet_type = BATADV_UNICAST_FRAG;
        frag_header.version = BATADV_COMPAT_VERSION;
index b4782a6c1025d6cf907d6290ecd8ee584e454926..45c2dd2e15905fef2695391ec80414c1be5bbf90 100644 (file)
@@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev)
         */
        switch (hdev->dev_type) {
        case HCI_PRIMARY:
-               id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+               id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
                break;
        case HCI_AMP:
-               id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+               id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
                break;
        default:
                return -EINVAL;
@@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev)
        if (id < 0)
                return id;
 
-       sprintf(hdev->name, "hci%d", id);
+       snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
        hdev->id = id;
 
        BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
index 196417859c4a97168c2f87b8af476b0794c3b2b4..68b3e850bcb9dba2121f22f17e810ed19c28482f 100644 (file)
@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
        dev_sw_netstats_rx_add(brdev, skb->len);
 
        vg = br_vlan_group_rcu(br);
+
+       /* Reset the offload_fwd_mark because there could be a stacked
+        * bridge above, and it should not think this bridge it doing
+        * that bridge's work forwarding out its ports.
+        */
+       br_switchdev_frame_unmark(skb);
+
        /* Bridge is just like any other port.  Make sure the
         * packet is allowed except in promisc mode when someone
         * may be running packet capture.
index 83eb97c94e834053037955460aec14a01a08e4dc..9d82bb42e958f4709b1e734377984a940d8016bb 100644 (file)
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
        target_init(&req->r_t);
 }
 
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable.  Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
-       struct ceph_osd_client *osdc = req->r_osdc;
-       bool mempool = req->r_mempool;
-       unsigned int num_ops = req->r_num_ops;
-       u64 snapid = req->r_snapid;
-       struct ceph_snap_context *snapc = req->r_snapc;
-       bool linger = req->r_linger;
-       struct ceph_msg *request_msg = req->r_request;
-       struct ceph_msg *reply_msg = req->r_reply;
-
-       dout("%s req %p\n", __func__, req);
-       WARN_ON(kref_read(&req->r_kref) != 1);
-       request_release_checks(req);
-
-       WARN_ON(kref_read(&request_msg->kref) != 1);
-       WARN_ON(kref_read(&reply_msg->kref) != 1);
-       target_destroy(&req->r_t);
-
-       request_init(req);
-       req->r_osdc = osdc;
-       req->r_mempool = mempool;
-       req->r_num_ops = num_ops;
-       req->r_snapid = snapid;
-       req->r_snapc = snapc;
-       req->r_linger = linger;
-       req->r_request = request_msg;
-       req->r_reply = reply_msg;
-}
-
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
                                               struct ceph_snap_context *snapc,
                                               unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
  * @watch_opcode: CEPH_OSD_WATCH_OP_*
  */
 static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
-                                 u64 cookie, u8 watch_opcode)
+                                 u8 watch_opcode, u64 cookie, u32 gen)
 {
        struct ceph_osd_req_op *op;
 
        op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
        op->watch.cookie = cookie;
        op->watch.op = watch_opcode;
-       op->watch.gen = 0;
+       op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+                                  u64 cookie, struct ceph_pagelist *request_pl)
+{
+       struct ceph_osd_req_op *op;
+
+       op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+       op->notify.cookie = cookie;
+
+       ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+       op->indata_len = request_pl->length;
 }
 
 /*
@@ -2731,10 +2710,13 @@ static void linger_release(struct kref *kref)
        WARN_ON(!list_empty(&lreq->pending_lworks));
        WARN_ON(lreq->osd);
 
-       if (lreq->reg_req)
-               ceph_osdc_put_request(lreq->reg_req);
-       if (lreq->ping_req)
-               ceph_osdc_put_request(lreq->ping_req);
+       if (lreq->request_pl)
+               ceph_pagelist_release(lreq->request_pl);
+       if (lreq->notify_id_pages)
+               ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+       ceph_osdc_put_request(lreq->reg_req);
+       ceph_osdc_put_request(lreq->ping_req);
        target_destroy(&lreq->t);
        kfree(lreq);
 }
@@ -3003,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
        struct ceph_osd_linger_request *lreq = req->r_priv;
 
        mutex_lock(&lreq->lock);
+       if (req != lreq->reg_req) {
+               dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+                    __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+               goto out;
+       }
+
        dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
             lreq->linger_id, req->r_result);
        linger_reg_commit_complete(lreq, req->r_result);
@@ -3026,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
                }
        }
 
+out:
        mutex_unlock(&lreq->lock);
        linger_put(lreq);
 }
@@ -3048,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
        struct ceph_osd_linger_request *lreq = req->r_priv;
 
        mutex_lock(&lreq->lock);
+       if (req != lreq->reg_req) {
+               dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+                    __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+               goto out;
+       }
+
        dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
             lreq, lreq->linger_id, req->r_result, lreq->last_error);
        if (req->r_result < 0) {
@@ -3057,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
                }
        }
 
+out:
        mutex_unlock(&lreq->lock);
        linger_put(lreq);
 }
 
 static void send_linger(struct ceph_osd_linger_request *lreq)
 {
-       struct ceph_osd_request *req = lreq->reg_req;
-       struct ceph_osd_req_op *op = &req->r_ops[0];
+       struct ceph_osd_client *osdc = lreq->osdc;
+       struct ceph_osd_request *req;
+       int ret;
 
-       verify_osdc_wrlocked(req->r_osdc);
+       verify_osdc_wrlocked(osdc);
+       mutex_lock(&lreq->lock);
        dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
 
-       if (req->r_osd)
-               cancel_linger_request(req);
+       if (lreq->reg_req) {
+               if (lreq->reg_req->r_osd)
+                       cancel_linger_request(lreq->reg_req);
+               ceph_osdc_put_request(lreq->reg_req);
+       }
+
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+       BUG_ON(!req);
 
-       request_reinit(req);
        target_copy(&req->r_t, &lreq->t);
        req->r_mtime = lreq->mtime;
 
-       mutex_lock(&lreq->lock);
        if (lreq->is_watch && lreq->committed) {
-               WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
-                       op->watch.cookie != lreq->linger_id);
-               op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
-               op->watch.gen = ++lreq->register_gen;
+               osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+                                     lreq->linger_id, ++lreq->register_gen);
                dout("lreq %p reconnect register_gen %u\n", lreq,
-                    op->watch.gen);
+                    req->r_ops[0].watch.gen);
                req->r_callback = linger_reconnect_cb;
        } else {
-               if (!lreq->is_watch)
+               if (lreq->is_watch) {
+                       osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+                                             lreq->linger_id, 0);
+               } else {
                        lreq->notify_id = 0;
-               else
-                       WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+                       refcount_inc(&lreq->request_pl->refcnt);
+                       osd_req_op_notify_init(req, 0, lreq->linger_id,
+                                              lreq->request_pl);
+                       ceph_osd_data_pages_init(
+                           osd_req_op_data(req, 0, notify, response_data),
+                           lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+               }
                dout("lreq %p register\n", lreq);
                req->r_callback = linger_commit_cb;
        }
-       mutex_unlock(&lreq->lock);
+
+       ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+       BUG_ON(ret);
 
        req->r_priv = linger_get(lreq);
        req->r_linger = true;
+       lreq->reg_req = req;
+       mutex_unlock(&lreq->lock);
 
        submit_request(req, true);
 }
@@ -3106,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
        struct ceph_osd_linger_request *lreq = req->r_priv;
 
        mutex_lock(&lreq->lock);
+       if (req != lreq->ping_req) {
+               dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+                    __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+               goto out;
+       }
+
        dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
             __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
             lreq->last_error);
@@ -3121,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
                     lreq->register_gen, req->r_ops[0].watch.gen);
        }
 
+out:
        mutex_unlock(&lreq->lock);
        linger_put(lreq);
 }
@@ -3128,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
 static void send_linger_ping(struct ceph_osd_linger_request *lreq)
 {
        struct ceph_osd_client *osdc = lreq->osdc;
-       struct ceph_osd_request *req = lreq->ping_req;
-       struct ceph_osd_req_op *op = &req->r_ops[0];
+       struct ceph_osd_request *req;
+       int ret;
 
        if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
                dout("%s PAUSERD\n", __func__);
@@ -3141,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
             __func__, lreq, lreq->linger_id, lreq->ping_sent,
             lreq->register_gen);
 
-       if (req->r_osd)
-               cancel_linger_request(req);
+       if (lreq->ping_req) {
+               if (lreq->ping_req->r_osd)
+                       cancel_linger_request(lreq->ping_req);
+               ceph_osdc_put_request(lreq->ping_req);
+       }
 
-       request_reinit(req);
-       target_copy(&req->r_t, &lreq->t);
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+       BUG_ON(!req);
 
-       WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
-               op->watch.cookie != lreq->linger_id ||
-               op->watch.op != CEPH_OSD_WATCH_OP_PING);
-       op->watch.gen = lreq->register_gen;
+       target_copy(&req->r_t, &lreq->t);
+       osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+                             lreq->register_gen);
        req->r_callback = linger_ping_cb;
+
+       ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+       BUG_ON(ret);
+
        req->r_priv = linger_get(lreq);
        req->r_linger = true;
+       lreq->ping_req = req;
 
        ceph_osdc_get_request(req);
        account_request(req);
@@ -3169,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 
        down_write(&osdc->lock);
        linger_register(lreq);
-       if (lreq->is_watch) {
-               lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
-               lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
-       } else {
-               lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
-       }
 
        calc_target(osdc, &lreq->t, false);
        osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3206,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
  */
 static void __linger_cancel(struct ceph_osd_linger_request *lreq)
 {
-       if (lreq->is_watch && lreq->ping_req->r_osd)
+       if (lreq->ping_req && lreq->ping_req->r_osd)
                cancel_linger_request(lreq->ping_req);
-       if (lreq->reg_req->r_osd)
+       if (lreq->reg_req && lreq->reg_req->r_osd)
                cancel_linger_request(lreq->reg_req);
        cancel_linger_map_check(lreq);
        unlink_linger(lreq->osd, lreq);
@@ -4570,8 +4591,13 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 EXPORT_SYMBOL(ceph_osdc_start_request);
 
 /*
- * Unregister a registered request.  The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request.  If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
  */
 void ceph_osdc_cancel_request(struct ceph_osd_request *req)
 {
@@ -4657,43 +4683,6 @@ again:
 }
 EXPORT_SYMBOL(ceph_osdc_sync);
 
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
-       struct ceph_osd_request *req;
-
-       req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
-       if (!req)
-               return NULL;
-
-       ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
-       ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
-       return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
-       struct ceph_osd_request *req;
-
-       req = alloc_linger_request(lreq);
-       if (!req)
-               return NULL;
-
-       /*
-        * Pass 0 for cookie because we don't know it yet, it will be
-        * filled in by linger_submit().
-        */
-       osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
-       if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
-               ceph_osdc_put_request(req);
-               return NULL;
-       }
-
-       return req;
-}
-
 /*
  * Returns a handle, caller owns a ref.
  */
@@ -4723,18 +4712,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
        lreq->t.flags = CEPH_OSD_FLAG_WRITE;
        ktime_get_real_ts64(&lreq->mtime);
 
-       lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
-       if (!lreq->reg_req) {
-               ret = -ENOMEM;
-               goto err_put_lreq;
-       }
-
-       lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
-       if (!lreq->ping_req) {
-               ret = -ENOMEM;
-               goto err_put_lreq;
-       }
-
        linger_submit(lreq);
        ret = linger_reg_commit_wait(lreq);
        if (ret) {
@@ -4772,8 +4749,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
        ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
        req->r_flags = CEPH_OSD_FLAG_WRITE;
        ktime_get_real_ts64(&req->r_mtime);
-       osd_req_op_watch_init(req, 0, lreq->linger_id,
-                             CEPH_OSD_WATCH_OP_UNWATCH);
+       osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+                             lreq->linger_id, 0);
 
        ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
        if (ret)
@@ -4859,35 +4836,6 @@ out_put_req:
 }
 EXPORT_SYMBOL(ceph_osdc_notify_ack);
 
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
-                                 u64 cookie, u32 prot_ver, u32 timeout,
-                                 void *payload, u32 payload_len)
-{
-       struct ceph_osd_req_op *op;
-       struct ceph_pagelist *pl;
-       int ret;
-
-       op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
-       op->notify.cookie = cookie;
-
-       pl = ceph_pagelist_alloc(GFP_NOIO);
-       if (!pl)
-               return -ENOMEM;
-
-       ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
-       ret |= ceph_pagelist_encode_32(pl, timeout);
-       ret |= ceph_pagelist_encode_32(pl, payload_len);
-       ret |= ceph_pagelist_append(pl, payload, payload_len);
-       if (ret) {
-               ceph_pagelist_release(pl);
-               return -ENOMEM;
-       }
-
-       ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
-       op->indata_len = pl->length;
-       return 0;
-}
-
 /*
  * @timeout: in seconds
  *
@@ -4906,7 +4854,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
                     size_t *preply_len)
 {
        struct ceph_osd_linger_request *lreq;
-       struct page **pages;
        int ret;
 
        WARN_ON(!timeout);
@@ -4919,41 +4866,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
        if (!lreq)
                return -ENOMEM;
 
-       lreq->preply_pages = preply_pages;
-       lreq->preply_len = preply_len;
-
-       ceph_oid_copy(&lreq->t.base_oid, oid);
-       ceph_oloc_copy(&lreq->t.base_oloc, oloc);
-       lreq->t.flags = CEPH_OSD_FLAG_READ;
-
-       lreq->reg_req = alloc_linger_request(lreq);
-       if (!lreq->reg_req) {
+       lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+       if (!lreq->request_pl) {
                ret = -ENOMEM;
                goto out_put_lreq;
        }
 
-       /*
-        * Pass 0 for cookie because we don't know it yet, it will be
-        * filled in by linger_submit().
-        */
-       ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
-                                    payload, payload_len);
-       if (ret)
+       ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+       ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+       ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+       ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+       if (ret) {
+               ret = -ENOMEM;
                goto out_put_lreq;
+       }
 
        /* for notify_id */
-       pages = ceph_alloc_page_vector(1, GFP_NOIO);
-       if (IS_ERR(pages)) {
-               ret = PTR_ERR(pages);
+       lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+       if (IS_ERR(lreq->notify_id_pages)) {
+               ret = PTR_ERR(lreq->notify_id_pages);
+               lreq->notify_id_pages = NULL;
                goto out_put_lreq;
        }
-       ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
-                                                response_data),
-                                pages, PAGE_SIZE, 0, false, true);
 
-       ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
-       if (ret)
-               goto out_put_lreq;
+       lreq->preply_pages = preply_pages;
+       lreq->preply_len = preply_len;
+
+       ceph_oid_copy(&lreq->t.base_oid, oid);
+       ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+       lreq->t.flags = CEPH_OSD_FLAG_READ;
 
        linger_submit(lreq);
        ret = linger_reg_commit_wait(lreq);
index 1461c2d9dec8099a9a2d43a704b4c6cb0375f480..191ec76d4c3b3a72bd216b1027087234790adf07 100644 (file)
@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
        const struct net_device *last_dev;
        struct net_device_path_ctx ctx = {
                .dev    = dev,
-               .daddr  = daddr,
        };
        struct net_device_path *path;
        int ret = 0;
 
+       memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
        stack->num_paths = 0;
        while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
                last_dev = ctx.dev;
@@ -3527,7 +3527,6 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
                dev_queue_xmit_nit(skb, dev);
 
        len = skb->len;
-       PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
        trace_net_dev_start_xmit(skb, dev);
        rc = netdev_start_xmit(skb, dev, txq, more);
        trace_net_dev_xmit(skb, rc, dev, len);
@@ -4168,7 +4167,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
                        if (!skb)
                                goto out;
 
-                       PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
                        HARD_TX_LOCK(dev, txq, cpu);
 
                        if (!netif_xmit_stopped(txq)) {
@@ -4234,7 +4232,6 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
 
        skb_set_queue_mapping(skb, queue_id);
        txq = skb_get_tx_queue(dev, skb);
-       PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
 
        local_bh_disable();
 
index 30b523fa4ad2e9be30bdefdc61f70f989c345bbf..c90c74de90d5abd40460e1ca39e20903f533dccc 100644 (file)
@@ -3897,7 +3897,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
        unsigned int delta_len = 0;
        struct sk_buff *tail = NULL;
        struct sk_buff *nskb, *tmp;
-       int err;
+       int len_diff, err;
 
        skb_push(skb, -skb_network_offset(skb) + offset);
 
@@ -3937,9 +3937,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
                skb_push(nskb, -skb_network_offset(nskb) + offset);
 
                skb_release_head_state(nskb);
+               len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
                __copy_skb_header(nskb, skb);
 
                skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+               nskb->transport_header += len_diff;
                skb_copy_from_linear_data_offset(skb, -tnl_hlen,
                                                 nskb->data - tnl_hlen,
                                                 offset + tnl_hlen);
index ae662567a6cb6a440c79a9805a2cd6d146ac5a29..0ea29270d7e53730d14ec43654be8f956f891552 100644 (file)
@@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v4_ctl_sk);
 }
 
+static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
+{
+       inet_twsk_purge(&dccp_hashinfo, AF_INET);
+}
+
 static struct pernet_operations dccp_v4_ops = {
        .init   = dccp_v4_init_net,
        .exit   = dccp_v4_exit_net,
+       .exit_batch = dccp_v4_exit_batch,
        .id     = &dccp_v4_pernet_id,
        .size   = sizeof(struct dccp_v4_pernet),
 };
index eab3bd1ee9a0a0064c04ff97fd8363e60daa0079..fa663518fa0e465458b7486ad0cd0672425f08b0 100644 (file)
@@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v6_ctl_sk);
 }
 
+static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
+{
+       inet_twsk_purge(&dccp_hashinfo, AF_INET6);
+}
+
 static struct pernet_operations dccp_v6_ops = {
        .init   = dccp_v6_init_net,
        .exit   = dccp_v6_exit_net,
+       .exit_batch = dccp_v6_exit_batch,
        .id     = &dccp_v6_pernet_id,
        .size   = sizeof(struct dccp_v6_pernet),
 };
index 0ee7d4c0c95545542d850cd2061cc3cddde38306..a09ba642b5e76abdbfd0d844d12be9572c001abc 100644 (file)
@@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
        memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
 
        if (dn_db->router) {
-               struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+               struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
                dn_dn2eth(msg->neighbor, dn->addr);
        }
 
@@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 {
        int n;
        struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-       struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+       struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
        struct sk_buff *skb;
        size_t size;
        unsigned char *ptr;
index 94b306f6d5511b8fcd615c690c6e0dbf20a44f14..fbd98ac853ea0554f7ffbd003d676be4bedfdeee 100644 (file)
@@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
                        if (!dn_db->router) {
                                dn_db->router = neigh_clone(neigh);
                        } else {
-                               if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+                               if (msg->priority > container_of(dn_db->router,
+                                                                struct dn_neigh, n)->priority)
                                        neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
                        }
                }
index 7e85f2a1ae2541b093d58e1a4f9c3b224ab5476d..d1d78a463a06bf091a799001edbfa530a34ff7df 100644 (file)
@@ -1120,7 +1120,7 @@ source_ok:
                /* Ok then, we assume its directly connected and move on */
 select_source:
                if (neigh)
-                       gateway = ((struct dn_neigh *)neigh)->addr;
+                       gateway = container_of(neigh, struct dn_neigh, n)->addr;
                if (gateway == 0)
                        gateway = fld.daddr;
                if (fld.saddr == 0) {
@@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
                /* Use the default router if there is one */
                neigh = neigh_clone(dn_db->router);
                if (neigh) {
-                       gateway = ((struct dn_neigh *)neigh)->addr;
+                       gateway = container_of(neigh, struct dn_neigh, n)->addr;
                        goto make_route;
                }
 
index cdc56ba11f52b2eaa813d61addde078831813733..bdccb613285dbf69b6f9ec7ef625df87aeedd3c3 100644 (file)
@@ -451,6 +451,7 @@ out_rollback_unoffload:
        switchdev_bridge_port_unoffload(brport_dev, dp,
                                        &dsa_slave_switchdev_notifier,
                                        &dsa_slave_switchdev_blocking_notifier);
+       dsa_flush_workqueue();
 out_rollback_unbridge:
        dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
 out_rollback:
index 53a6b14dc50a5bd6a2d940b4ed20943826c71b2b..3d6d33ac20cc29b56a1d8ce4d8930c238025f290 100644 (file)
@@ -536,10 +536,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
                return ret;
        }
 
-       if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
-               prandom_seed((__force u32) ifa->ifa_local);
+       if (!(ifa->ifa_flags & IFA_F_SECONDARY))
                ifap = last_primary;
-       }
 
        rcu_assign_pointer(ifa->ifa_next, *ifap);
        rcu_assign_pointer(*ifap, ifa);
index 9e0bbd02656013e6e8be5765a7b86fc16e6bf831..0ec501845cb3bb51082f8091b4e0ebb32f83bf33 100644 (file)
@@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
        spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
-       bhead = &hashinfo->bhash[tw->tw_bslot];
+       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+                       hashinfo->bhash_size)];
 
        spin_lock(&bhead->lock);
        inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
           Note, that any socket with inet->num != 0 MUST be bound in
           binding cache, even if it is closed.
         */
-       /* Cache inet_bhashfn(), because 'struct net' might be no longer
-        * available later in inet_twsk_kill().
-        */
-       tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
-                                   hashinfo->bhash_size);
-       bhead = &hashinfo->bhash[tw->tw_bslot];
+       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
+                       hashinfo->bhash_size)];
        spin_lock(&bhead->lock);
        tw->tw_tb = icsk->icsk_bind_hash;
        WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
        }
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+{
+       struct inet_timewait_sock *tw;
+       struct sock *sk;
+       struct hlist_nulls_node *node;
+       unsigned int slot;
+
+       for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+               struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+restart_rcu:
+               cond_resched();
+               rcu_read_lock();
+restart:
+               sk_nulls_for_each_rcu(sk, node, &head->chain) {
+                       if (sk->sk_state != TCP_TIME_WAIT)
+                               continue;
+                       tw = inet_twsk(sk);
+                       if ((tw->tw_family != family) ||
+                               refcount_read(&twsk_net(tw)->ns.count))
+                               continue;
+
+                       if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+                               continue;
+
+                       if (unlikely((tw->tw_family != family) ||
+                                    refcount_read(&twsk_net(tw)->ns.count))) {
+                               inet_twsk_put(tw);
+                               goto restart;
+                       }
+
+                       rcu_read_unlock();
+                       local_bh_disable();
+                       inet_twsk_deschedule_put(tw);
+                       local_bh_enable();
+                       goto restart_rcu;
+               }
+               /* If the nulls value we got at the end of this lookup is
+                * not the expected one, we must restart lookup.
+                * We probably met an item that was moved to another chain.
+                */
+               if (get_nulls_value(node) != slot)
+                       goto restart;
+               rcu_read_unlock();
+       }
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
index 3ee947557b88358e31afce995b3f157b0c41c0f8..aa9a11b20d18e9a11dd36199217ff670227a92f9 100644 (file)
@@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
        struct net *net = sock_net(sk);
        if (sk->sk_family == AF_INET) {
                struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+               u32 tb_id = RT_TABLE_LOCAL;
                int chk_addr_ret;
 
                if (addr_len < sizeof(*addr))
@@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
                         sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
-               chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+               tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+               chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
 
                if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
                                                 addr->sin_addr.s_addr,
@@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                                return -ENODEV;
                        }
                }
+
+               if (!dev && sk->sk_bound_dev_if) {
+                       dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+                       if (!dev) {
+                               rcu_read_unlock();
+                               return -ENODEV;
+                       }
+               }
                has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
                                                    scoped);
                rcu_read_unlock();
index 98c6f3429593150af72cdd6cb25efc5792fe23ef..ed01063d8f3033561ad77e4f646ddbc9e8f93354 100644 (file)
@@ -1726,6 +1726,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        unsigned int flags = RTCF_MULTICAST;
        struct rtable *rth;
+       bool no_policy;
        u32 itag = 0;
        int err;
 
@@ -1736,8 +1737,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (our)
                flags |= RTCF_LOCAL;
 
+       no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+       if (no_policy)
+               IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
        rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
-                          IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+                          no_policy, false);
        if (!rth)
                return -ENOBUFS;
 
@@ -1753,6 +1758,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #endif
        RT_CACHE_STAT_INC(in_slow_mc);
 
+       skb_dst_drop(skb);
        skb_dst_set(skb, &rth->dst);
        return 0;
 }
@@ -1795,7 +1801,7 @@ static int __mkroute_input(struct sk_buff *skb,
        struct rtable *rth;
        int err;
        struct in_device *out_dev;
-       bool do_cache;
+       bool do_cache, no_policy;
        u32 itag = 0;
 
        /* get a working reference to the output device */
@@ -1840,6 +1846,10 @@ static int __mkroute_input(struct sk_buff *skb,
                }
        }
 
+       no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+       if (no_policy)
+               IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
        fnhe = find_exception(nhc, daddr);
        if (do_cache) {
                if (fnhe)
@@ -1852,8 +1862,7 @@ static int __mkroute_input(struct sk_buff *skb,
                }
        }
 
-       rth = rt_dst_alloc(out_dev->dev, 0, res->type,
-                          IN_DEV_ORCONF(in_dev, NOPOLICY),
+       rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
                           IN_DEV_ORCONF(out_dev, NOXFRM));
        if (!rth) {
                err = -ENOBUFS;
@@ -2228,6 +2237,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        struct rtable   *rth;
        struct flowi4   fl4;
        bool do_cache = true;
+       bool no_policy;
 
        /* IP on this device is disabled. */
 
@@ -2346,6 +2356,10 @@ brd_input:
        RT_CACHE_STAT_INC(in_brd);
 
 local_input:
+       no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+       if (no_policy)
+               IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
        do_cache &= res->fi && !itag;
        if (do_cache) {
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2360,7 +2374,7 @@ local_input:
 
        rth = rt_dst_alloc(ip_rt_get_dev(net, res),
                           flags | RTCF_LOCAL, res->type,
-                          IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+                          no_policy, false);
        if (!rth)
                goto e_nobufs;
 
index cf18fbcbf123a864608a9603bfe215def9e4b70e..bb7ef45408e1aee88ee043b48d07dcecc4089711 100644 (file)
@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
        if (sk->sk_state == TCP_LISTEN)
                goto out;
 
-       if (tp->recvmsg_inq)
+       if (tp->recvmsg_inq) {
                *cmsg_flags = TCP_CMSG_INQ;
+               msg->msg_get_inq = 1;
+       }
        timeo = sock_rcvtimeo(sk, nonblock);
 
        /* Urgent data needs to be handled specially. */
@@ -2559,7 +2561,7 @@ recv_sndq:
 int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
                int flags, int *addr_len)
 {
-       int cmsg_flags = 0, ret, inq;
+       int cmsg_flags = 0, ret;
        struct scm_timestamping_internal tss;
 
        if (unlikely(flags & MSG_ERRQUEUE))
@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
        release_sock(sk);
        sk_defer_free_flush(sk);
 
-       if (cmsg_flags && ret >= 0) {
+       if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
                if (cmsg_flags & TCP_CMSG_TS)
                        tcp_recv_timestamp(msg, sk, &tss);
-               if (cmsg_flags & TCP_CMSG_INQ) {
-                       inq = tcp_inq_hint(sk);
-                       put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+               if (msg->msg_get_inq) {
+                       msg->msg_inq = tcp_inq_hint(sk);
+                       if (cmsg_flags & TCP_CMSG_INQ)
+                               put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
+                                        sizeof(msg->msg_inq), &msg->msg_inq);
                }
        }
        return ret;
index f9cec624068dfa1d218357d7e88c89459d7d54f4..457f5b5d5d4a95c06eca82db1dbe7822cb4d040c 100644 (file)
@@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
        struct net *net;
 
+       inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
        list_for_each_entry(net, net_exit_list, exit_list)
                tcp_fastopen_ctx_destroy(net);
 }
index b2250417658850da3516855f6350c66f2a3230db..e7c68fa12fae0343730113f7d718b336ce6c8819 100644 (file)
@@ -3972,8 +3972,6 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 
        addrconf_join_solict(dev, &ifp->addr);
 
-       prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
-
        read_lock_bh(&idev->lock);
        spin_lock(&ifp->lock);
        if (ifp->state == INET6_IFADDR_STATE_DEAD)
index 13678d3908fac9990e5b0c0df87fa4cca685baaf..faaddaf43c90b96e7a2bc9fbad7941ae5ada1b3c 100644 (file)
@@ -2207,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net)
        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
 }
 
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+       inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+}
+
 static struct pernet_operations tcpv6_net_ops = {
        .init       = tcpv6_net_init,
        .exit       = tcpv6_net_exit,
+       .exit_batch = tcpv6_net_exit_batch,
 };
 
 int __init tcpv6_init(void)
index fd51db3be91c4064b3f7b5a6f8297e2a9c0843de..339d95df19d324349f7f2791dd431631ad3493a5 100644 (file)
@@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
        void *ext_hdrs[SADB_EXT_MAX];
        int err;
 
-       pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
-                       BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+       err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+                             BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+       if (err)
+               return err;
 
        memset(ext_hdrs, 0, sizeof(ext_hdrs));
        err = parse_exthdrs(skb, hdr, ext_hdrs);
@@ -2898,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
                        break;
                if (!aalg->pfkey_supported)
                        continue;
-               if (aalg_tmpl_set(t, aalg))
+               if (aalg_tmpl_set(t, aalg) && aalg->available)
                        sz += sizeof(struct sadb_comb);
        }
        return sz + sizeof(struct sadb_prop);
@@ -2916,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
                if (!ealg->pfkey_supported)
                        continue;
 
-               if (!(ealg_tmpl_set(t, ealg)))
+               if (!(ealg_tmpl_set(t, ealg) && ealg->available))
                        continue;
 
                for (k = 1; ; k++) {
@@ -2927,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
                        if (!aalg->pfkey_supported)
                                continue;
 
-                       if (aalg_tmpl_set(t, aalg))
+                       if (aalg_tmpl_set(t, aalg) && aalg->available)
                                sz += sizeof(struct sadb_comb);
                }
        }
index 1b30c724ca8d1b56a659b20d20f48bfcb676ac2b..dc8aec1a5d3dd6e78bef08094301a90aa7369bab 100644 (file)
@@ -3657,6 +3657,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
                                cbss->transmitted_bss->bssid);
                bss_conf->bssid_indicator = cbss->max_bssid_indicator;
                bss_conf->bssid_index = cbss->bssid_index;
+       } else {
+               bss_conf->nontransmitted = false;
+               memset(bss_conf->transmitter_bssid, 0,
+                      sizeof(bss_conf->transmitter_bssid));
+               bss_conf->bssid_indicator = 0;
+               bss_conf->bssid_index = 0;
        }
 
        /*
index beb6b92eb7804759dddcc3cef2efb68a4338dc9d..88d797fa82ff64c0d1aad14a45bf6ff05ea1c619 100644 (file)
@@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
                goto dont_reorder;
 
        /* not part of a BA session */
-       if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
-           ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+       if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
                goto dont_reorder;
 
        /* new, potentially un-ordered, ampdu frame - process it */
index 325383646f5c0061f049603aa4a965ac40651f7d..b548cec86c9d87f0c063255461d93c891a09f476 100644 (file)
@@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                        ptr += 2;
                }
                if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
-                       mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+                       mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
                        mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
                        ptr += 2;
                }
@@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 
                        if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
                                mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
-                               mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+                               mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
                                ptr += 2;
                        }
 
@@ -1240,7 +1240,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
                WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
 }
 
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
 {
        struct csum_pseudo_header header;
        __wsum csum;
@@ -1256,15 +1256,25 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
        header.csum = 0;
 
        csum = csum_partial(&header, sizeof(header), sum);
-       return (__force u16)csum_fold(csum);
+       return csum_fold(csum);
 }
 
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext)
 {
        return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
                                 ~csum_unfold(mpext->csum));
 }
 
+static void put_len_csum(u16 len, __sum16 csum, void *data)
+{
+       __sum16 *sumptr = data + 2;
+       __be16 *ptr = data;
+
+       put_unaligned_be16(len, ptr);
+
+       put_unaligned(csum, sumptr);
+}
+
 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                         struct mptcp_out_options *opts)
 {
@@ -1340,8 +1350,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                        put_unaligned_be32(mpext->subflow_seq, ptr);
                        ptr += 1;
                        if (opts->csum_reqd) {
-                               put_unaligned_be32(mpext->data_len << 16 |
-                                                  mptcp_make_csum(mpext), ptr);
+                               put_len_csum(mpext->data_len,
+                                            mptcp_make_csum(mpext),
+                                            ptr);
                        } else {
                                put_unaligned_be32(mpext->data_len << 16 |
                                                   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1392,11 +1403,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                        goto mp_capable_done;
 
                if (opts->csum_reqd) {
-                       put_unaligned_be32(opts->data_len << 16 |
-                                          __mptcp_make_csum(opts->data_seq,
-                                                            opts->subflow_seq,
-                                                            opts->data_len,
-                                                            ~csum_unfold(opts->csum)), ptr);
+                       put_len_csum(opts->data_len,
+                                    __mptcp_make_csum(opts->data_seq,
+                                                      opts->subflow_seq,
+                                                      opts->data_len,
+                                                      ~csum_unfold(opts->csum)),
+                                    ptr);
                } else {
                        put_unaligned_be32(opts->data_len << 16 |
                                           TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
index 01809eef29b4bb50f25bb33238a331e942c4e646..aa51b100e03353d0dc2b8f170662bfbe9ad51370 100644 (file)
@@ -178,14 +178,13 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
        struct mptcp_pm_data *pm = &msk->pm;
        bool update_subflows;
 
-       update_subflows = (ssk->sk_state == TCP_CLOSE) &&
-                         (subflow->request_join || subflow->mp_join);
+       update_subflows = subflow->request_join || subflow->mp_join;
        if (!READ_ONCE(pm->work_pending) && !update_subflows)
                return;
 
        spin_lock_bh(&pm->lock);
        if (update_subflows)
-               pm->subflows--;
+               __mptcp_pm_close_subflow(msk);
 
        /* Even if this subflow is not really established, tell the PM to try
         * to pick the next ones, if possible.
index 3c1a3036550f8410ba31d4d2c960d27c29c90186..5655a63aa6a8b216e77fca208e99fb42b33ad666 100644 (file)
@@ -443,7 +443,8 @@ struct mptcp_subflow_context {
                can_ack : 1,        /* only after processing the remote a key */
                disposable : 1,     /* ctx can be free at ulp release time */
                stale : 1,          /* unable to snd/rcv data, do not use for xmit */
-               local_id_valid : 1; /* local_id is correctly initialized */
+               local_id_valid : 1, /* local_id is correctly initialized */
+               valid_csum_seen : 1;        /* at least one csum validated */
        enum mptcp_data_avail data_avail;
        u32     remote_nonce;
        u64     thmac;
@@ -723,7 +724,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
 void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
 
 void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
 
 void __init mptcp_pm_init(void);
 void mptcp_pm_data_init(struct mptcp_sock *msk);
@@ -833,6 +834,20 @@ unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
 
+/* called under PM lock */
+static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+       if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
+               WRITE_ONCE(msk->pm.accept_subflow, true);
+}
+
+static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+       spin_lock_bh(&msk->pm.lock);
+       __mptcp_pm_close_subflow(msk);
+       spin_unlock_bh(&msk->pm.lock);
+}
+
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
 
index aba260f547daa1a10f2f5d32294b54b0a1f2e10b..be76ada89d9692278f0474828108489f3afd80fd 100644 (file)
@@ -888,7 +888,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        u32 offset, seq, delta;
-       u16 csum;
+       __sum16 csum;
        int len;
 
        if (!csum_reqd)
@@ -955,11 +955,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
                                 subflow->map_data_csum);
        if (unlikely(csum)) {
                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
-               subflow->send_mp_fail = 1;
-               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+               if (subflow->mp_join || subflow->valid_csum_seen) {
+                       subflow->send_mp_fail = 1;
+                       MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+               }
                return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
        }
 
+       subflow->valid_csum_seen = 1;
        return MAPPING_OK;
 }
 
@@ -1141,6 +1144,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
        }
 }
 
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+       if (subflow->mp_join)
+               return false;
+       else if (READ_ONCE(msk->csum_enabled))
+               return !subflow->valid_csum_seen;
+       else
+               return !subflow->fully_established;
+}
+
 static bool subflow_check_data_avail(struct sock *ssk)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1218,7 +1233,7 @@ fallback:
                return true;
        }
 
-       if (subflow->mp_join || subflow->fully_established) {
+       if (!subflow_can_fallback(subflow)) {
                /* fatal protocol error, close the socket.
                 * subflow_error_report() will introduce the appropriate barriers
                 */
@@ -1422,20 +1437,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
        struct sockaddr_storage addr;
        int remote_id = remote->id;
        int local_id = loc->id;
+       int err = -ENOTCONN;
        struct socket *sf;
        struct sock *ssk;
        u32 remote_token;
        int addrlen;
        int ifindex;
        u8 flags;
-       int err;
 
        if (!mptcp_is_fully_established(sk))
-               return -ENOTCONN;
+               goto err_out;
 
        err = mptcp_subflow_create_socket(sk, &sf);
        if (err)
-               return err;
+               goto err_out;
 
        ssk = sf->sk;
        subflow = mptcp_subflow_ctx(ssk);
@@ -1492,6 +1507,12 @@ failed_unlink:
 failed:
        subflow->disposable = 1;
        sock_release(sf);
+
+err_out:
+       /* we account subflows before the creation, and this failures will not
+        * be caught by sk_state_change()
+        */
+       mptcp_pm_close_subflow(msk);
        return err;
 }
 
index 3db256da919bad534a5f598b6befda933edab15e..f2def06d10709a4fc7a55729e7fc147ad5bc5e97 100644 (file)
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
 
 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 {
-       tcp->state = TCP_CONNTRACK_ESTABLISHED;
        tcp->seen[0].td_maxwin = 0;
        tcp->seen[1].td_maxwin = 0;
 }
 
-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
 {
        struct net *net = nf_ct_net(ct);
        int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
        if (l4num == IPPROTO_TCP) {
                struct nf_tcp_net *tn = nf_tcp_pernet(net);
 
-               timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+               flow_offload_fixup_tcp(&ct->proto.tcp);
+
+               timeout = tn->timeouts[ct->proto.tcp.state];
                timeout -= tn->offload_timeout;
        } else if (l4num == IPPROTO_UDP) {
                struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
                WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
 }
 
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
-       if (nf_ct_protonum(ct) == IPPROTO_TCP)
-               flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
-       flow_offload_fixup_ct_state(ct);
-       flow_offload_fixup_ct_timeout(ct);
-}
-
 static void flow_offload_route_release(struct flow_offload *flow)
 {
        nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
        u32 timeout;
 
        timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
-       if (READ_ONCE(flow->timeout) != timeout)
+       if (timeout - READ_ONCE(flow->timeout) > HZ)
                WRITE_ONCE(flow->timeout, timeout);
+       else
+               return;
 
        if (likely(!nf_flowtable_hw_offload(flow_table)))
                return;
@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
        rhashtable_remove_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
                               nf_flow_offload_rhash_params);
-
-       clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
-       if (nf_flow_has_expired(flow))
-               flow_offload_fixup_ct(flow->ct);
-       else
-               flow_offload_fixup_ct_timeout(flow->ct);
-
        flow_offload_free(flow);
 }
 
 void flow_offload_teardown(struct flow_offload *flow)
 {
+       clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
        set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
-       flow_offload_fixup_ct_state(flow->ct);
+       flow_offload_fixup_ct(flow->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);
 
@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
        return err;
 }
 
-static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
-{
-       struct dst_entry *dst;
-
-       if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
-           tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
-               dst = tuple->dst_cache;
-               if (!dst_check(dst, tuple->dst_cookie))
-                       return true;
-       }
-
-       return false;
-}
-
-static bool nf_flow_has_stale_dst(struct flow_offload *flow)
-{
-       return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
-              flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
-}
-
 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
                                    struct flow_offload *flow, void *data)
 {
        if (nf_flow_has_expired(flow) ||
-           nf_ct_is_dying(flow->ct) ||
-           nf_flow_has_stale_dst(flow))
-               set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+           nf_ct_is_dying(flow->ct))
+               flow_offload_teardown(flow);
 
        if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
                if (test_bit(NF_FLOW_HW, &flow->flags)) {
index 32c0eb1b482122a7eca4171c17473a052bf4d4fb..b350fe9d00b0b921e36c98446a28a277bdb0dc76 100644 (file)
@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
        return true;
 }
 
+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
+{
+       if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
+           tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
+               return true;
+
+       return dst_check(tuple->dst_cache, tuple->dst_cookie);
+}
+
 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
                                      const struct nf_hook_state *state,
                                      struct dst_entry *dst)
@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
        if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
                return NF_ACCEPT;
 
+       if (!nf_flow_dst_check(&tuplehash->tuple)) {
+               flow_offload_teardown(flow);
+               return NF_ACCEPT;
+       }
+
        if (skb_try_make_writable(skb, thoff + hdrsize))
                return NF_DROP;
 
@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
        if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
                return NF_ACCEPT;
 
+       if (!nf_flow_dst_check(&tuplehash->tuple)) {
+               flow_offload_teardown(flow);
+               return NF_ACCEPT;
+       }
+
        if (skb_try_make_writable(skb, thoff + hdrsize))
                return NF_DROP;
 
index 16c3a39689f475d3b5b7d9a8b838494ff04b1bd6..a096b9fbbbdfffdbec2742087a82a243c323fa09 100644 (file)
@@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
 static bool nft_expr_reduce(struct nft_regs_track *track,
                            const struct nft_expr *expr)
 {
-       if (!expr->ops->reduce) {
-               pr_warn_once("missing reduce for expression %s ",
-                            expr->ops->type->name);
-               return false;
-       }
-
-       if (nft_reduce_is_readonly(expr))
-               return false;
-
-       return expr->ops->reduce(track, expr);
+       return false;
 }
 
 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
index 900d48c810a12654e011f3693252c0b04f493d5b..6f0b07fe648d04e8b3ac3a6560ea037759796237 100644 (file)
@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
        route->tuple[dir].xmit_type     = nft_xmit_type(dst_cache);
 }
 
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+       if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+           dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+               return false;
+
+       return true;
+}
+
 static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
                                     const struct dst_entry *dst_cache,
                                     const struct nf_conn *ct,
@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
        struct neighbour *n;
        u8 nud_state;
 
+       if (!nft_is_valid_ether_device(dev))
+               goto out;
+
        n = dst_neigh_lookup(dst_cache, daddr);
        if (!n)
                return -1;
@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
        if (!(nud_state & NUD_VALID))
                return -1;
 
+out:
        return dev_fill_forward_path(dev, ha, stack);
 }
 
@@ -78,15 +91,6 @@ struct nft_forward_info {
        enum flow_offload_xmit_type xmit_type;
 };
 
-static bool nft_is_valid_ether_device(const struct net_device *dev)
-{
-       if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
-           dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
-               return false;
-
-       return true;
-}
-
 static void nft_dev_path_info(const struct net_device_path_stack *stack,
                              struct nft_forward_info *info,
                              unsigned char *ha, struct nf_flowtable *flowtable)
@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
                                info->indev = NULL;
                                break;
                        }
-                       info->outdev = path->dev;
+                       if (!info->outdev)
+                               info->outdev = path->dev;
                        info->encap[info->num_encaps].id = path->encap.id;
                        info->encap[info->num_encaps].proto = path->encap.proto;
                        info->num_encaps++;
@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
        case IPPROTO_TCP:
                tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
                                          sizeof(_tcph), &_tcph);
-               if (unlikely(!tcph || tcph->fin || tcph->rst))
+               if (unlikely(!tcph || tcph->fin || tcph->rst ||
+                            !nf_conntrack_tcp_established(ct)))
                        goto out;
                break;
        case IPPROTO_UDP:
index 05a3795eac8e9a7c8343460d9a41e0755a64c36e..73e9c0a9c187674cced15dbec079734489c3329f 100644 (file)
@@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                copied = len;
        }
 
-       skb_reset_transport_header(data_skb);
        err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
 
        if (msg->msg_name) {
index 6055dc9a82aa0de28211ddc85b2291fb59bd7483..aa5e712adf0782f69827290911e6d9a8a9efe67f 100644 (file)
@@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 
                skb_frag = nci_skb_alloc(ndev,
                                         (NCI_DATA_HDR_SIZE + frag_len),
-                                        GFP_KERNEL);
+                                        GFP_ATOMIC);
                if (skb_frag == NULL) {
                        rc = -ENOMEM;
                        goto free_exit;
index 19703a649b5a685c5e56294a928e5f7ff46d3eb8..78c4b6addf15aa79add61b4bd9e920c3a7e1e7de 100644 (file)
@@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 
        i = 0;
        skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
-                           NCI_DATA_HDR_SIZE, GFP_KERNEL);
+                           NCI_DATA_HDR_SIZE, GFP_ATOMIC);
        if (!skb)
                return -ENOMEM;
 
@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
                if (i < data_len) {
                        skb = nci_skb_alloc(ndev,
                                            conn_info->max_pkt_payload_len +
-                                           NCI_DATA_HDR_SIZE, GFP_KERNEL);
+                                           NCI_DATA_HDR_SIZE, GFP_ATOMIC);
                        if (!skb)
                                return -ENOMEM;
 
index 2f638f8b7b1e7e0a4b12ccccbe2c52013c3ea972..73ee2771093d60253d3872cdd5379fac9ba8197e 100644 (file)
@@ -487,11 +487,11 @@ struct rds_tcp_net {
 /* All module specific customizations to the RDS-TCP socket should be done in
  * rds_tcp_tune() and applied after socket creation.
  */
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
 {
        struct sock *sk = sock->sk;
        struct net *net = sock_net(sk);
-       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct rds_tcp_net *rtn;
 
        tcp_sock_set_nodelay(sock->sk);
        lock_sock(sk);
@@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock)
         * a process which created this net namespace terminated.
         */
        if (!sk->sk_net_refcnt) {
+               if (!maybe_get_net(net)) {
+                       release_sock(sk);
+                       return false;
+               }
                sk->sk_net_refcnt = 1;
-               get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+               netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
                sock_inuse_add(net, 1);
        }
+       rtn = net_generic(net, rds_tcp_netid);
        if (rtn->sndbuf_size > 0) {
                sk->sk_sndbuf = rtn->sndbuf_size;
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
        }
        release_sock(sk);
+       return true;
 }
 
 static void rds_tcp_accept_worker(struct work_struct *work)
index dc8d745d68575f019ca96c706efc77125552a5d2..f8b5930d7b34369e6e7febde2b5cebd3dbc9bf62 100644 (file)
@@ -49,7 +49,7 @@ struct rds_tcp_statistics {
 };
 
 /* tcp.c */
-void rds_tcp_tune(struct socket *sock);
+bool rds_tcp_tune(struct socket *sock);
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_restore_callbacks(struct socket *sock,
index 5461d77fff4f43995a97ace920fb8ddd2005adb6..f0c477c5d1db4e355afc370b563652bad4b52905 100644 (file)
@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
        if (ret < 0)
                goto out;
 
-       rds_tcp_tune(sock);
+       if (!rds_tcp_tune(sock)) {
+               ret = -EINVAL;
+               goto out;
+       }
 
        if (isv6) {
                sin6.sin6_family = AF_INET6;
index 09cadd556d1e188fde086f356718ab149d567632..7edf2e69d3fed61bc4ae410cd9be33532262f940 100644 (file)
@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
        __module_get(new_sock->ops->owner);
 
        rds_tcp_keepalive(new_sock);
-       rds_tcp_tune(new_sock);
+       if (!rds_tcp_tune(new_sock)) {
+               ret = -EINVAL;
+               goto out;
+       }
 
        inet = inet_sk(new_sock->sk);
 
index 31fcd279c17767a57f6190f623f3cef25f33cf99..211c757bfc3c44e713155585f37d458a6f569f14 100644 (file)
@@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        struct nlattr *pattr;
        struct tcf_pedit *p;
        int ret = 0, err;
-       int ksize;
+       int i, ksize;
        u32 index;
 
        if (!nla) {
@@ -228,6 +228,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                p->tcfp_nkeys = parm->nkeys;
        }
        memcpy(p->tcfp_keys, parm->keys, ksize);
+       p->tcfp_off_max_hint = 0;
+       for (i = 0; i < p->tcfp_nkeys; ++i) {
+               u32 cur = p->tcfp_keys[i].off;
+
+               /* sanitize the shift value for any later use */
+               p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+                                             p->tcfp_keys[i].shift);
+
+               /* The AT option can read a single byte, we can bound the actual
+                * value with uchar max.
+                */
+               cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+               /* Each key touches 4 bytes starting from the computed offset */
+               p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+       }
 
        p->tcfp_flags = parm->flags;
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -308,13 +324,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
                         struct tcf_result *res)
 {
        struct tcf_pedit *p = to_pedit(a);
+       u32 max_offset;
        int i;
 
-       if (skb_unclone(skb, GFP_ATOMIC))
-               return p->tcf_action;
-
        spin_lock(&p->tcf_lock);
 
+       max_offset = (skb_transport_header_was_set(skb) ?
+                     skb_transport_offset(skb) :
+                     skb_network_offset(skb)) +
+                    p->tcfp_off_max_hint;
+       if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+               goto unlock;
+
        tcf_lastuse_update(&p->tcf_tm);
 
        if (p->tcfp_nkeys > 0) {
@@ -403,6 +424,7 @@ bad:
        p->tcf_qstats.overlimits++;
 done:
        bstats_update(&p->tcf_bstats, skb);
+unlock:
        spin_unlock(&p->tcf_lock);
        return p->tcf_action;
 }
index 51e8eb2933ff47210172cbc3d607be0f32f61a3b..338b9ef806e8202c7b41a3294cf5e8ce72e4f9f8 100644 (file)
@@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
                                }
                                break;
                        }
+                       if (!timeo)
+                               return -EAGAIN;
                        if (signal_pending(current)) {
                                read_done = sock_intr_errno(timeo);
                                break;
                        }
-                       if (!timeo)
-                               return -EAGAIN;
                }
 
                if (!smc_rx_data_available(conn)) {
index 6887840682bb7eee52cd59c02fb01dd94c2899a8..bb6a1a12fbde1824f90abbb8152e7d604c90eb9c 100644 (file)
@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
 struct socket *sock_from_file(struct file *file)
 {
        if (file->f_op == &socket_file_ops)
-               return file->private_data;      /* set in sock_map_fd */
+               return file->private_data;      /* set in sock_alloc_file */
 
        return NULL;
 }
@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
 }
 EXPORT_SYMBOL(sock_create_kern);
 
-int __sys_socket(int family, int type, int protocol)
+static struct socket *__sys_socket_create(int family, int type, int protocol)
 {
-       int retval;
        struct socket *sock;
-       int flags;
+       int retval;
 
        /* Check the SOCK_* constants for consistency.  */
        BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
        BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
        BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
 
-       flags = type & ~SOCK_TYPE_MASK;
-       if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
-               return -EINVAL;
+       if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+               return ERR_PTR(-EINVAL);
        type &= SOCK_TYPE_MASK;
 
+       retval = sock_create(family, type, protocol, &sock);
+       if (retval < 0)
+               return ERR_PTR(retval);
+
+       return sock;
+}
+
+struct file *__sys_socket_file(int family, int type, int protocol)
+{
+       struct socket *sock;
+       struct file *file;
+       int flags;
+
+       sock = __sys_socket_create(family, type, protocol);
+       if (IS_ERR(sock))
+               return ERR_CAST(sock);
+
+       flags = type & ~SOCK_TYPE_MASK;
        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
-       retval = sock_create(family, type, protocol, &sock);
-       if (retval < 0)
-               return retval;
+       file = sock_alloc_file(sock, flags, NULL);
+       if (IS_ERR(file))
+               sock_release(sock);
+
+       return file;
+}
+
+int __sys_socket(int family, int type, int protocol)
+{
+       struct socket *sock;
+       int flags;
+
+       sock = __sys_socket_create(family, type, protocol);
+       if (IS_ERR(sock))
+               return PTR_ERR(sock);
+
+       flags = type & ~SOCK_TYPE_MASK;
+       if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+               flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
        return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 }
index 8ca1d809b78d935b27ee83e22ec7f89ec0fa3aea..f549e4c05defcbc3bfc536aaac3d1e1c23580eaa 100644 (file)
@@ -97,7 +97,8 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
                 * timeout, which would result in reconnections being
                 * done without the correct namespace:
                 */
-               .flags          = RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL |
+               .flags          = RPC_CLNT_CREATE_NOPING |
+                                 RPC_CLNT_CREATE_CONNECTED |
                                  RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
        };
        struct rpc_clnt *clnt;
index 22c28cf43ebae3a1acc20d0004fed2ad32280bd1..e2c6eca0271b364c05c4136a9e9ea4eb1e23602b 100644 (file)
@@ -76,6 +76,7 @@ static int    rpc_encode_header(struct rpc_task *task,
 static int     rpc_decode_header(struct rpc_task *task,
                                  struct xdr_stream *xdr);
 static int     rpc_ping(struct rpc_clnt *clnt);
+static int     rpc_ping_noreply(struct rpc_clnt *clnt);
 static void    rpc_check_timeout(struct rpc_task *task);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
@@ -479,9 +480,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
 
        if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
                int err = rpc_ping(clnt);
-               if ((args->flags & RPC_CLNT_CREATE_IGNORE_NULL_UNAVAIL) &&
-                   err == -EOPNOTSUPP)
-                       err = 0;
+               if (err != 0) {
+                       rpc_shutdown_client(clnt);
+                       return ERR_PTR(err);
+               }
+       } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+               int err = rpc_ping_noreply(clnt);
                if (err != 0) {
                        rpc_shutdown_client(clnt);
                        return ERR_PTR(err);
@@ -2712,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = {
        .p_decode = rpcproc_decode_null,
 };
 
+static const struct rpc_procinfo rpcproc_null_noreply = {
+       .p_encode = rpcproc_encode_null,
+};
+
 static void
 rpc_null_call_prepare(struct rpc_task *task, void *data)
 {
@@ -2765,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
        return status;
 }
 
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+       struct rpc_message msg = {
+               .rpc_proc = &rpcproc_null_noreply,
+       };
+       struct rpc_task_setup task_setup_data = {
+               .rpc_client = clnt,
+               .rpc_message = &msg,
+               .callback_ops = &rpc_null_ops,
+               .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
+       };
+       struct rpc_task *task;
+       int status;
+
+       task = rpc_run_task(&task_setup_data);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+       status = task->tk_status;
+       rpc_put_task(task);
+       return status;
+}
+
 struct rpc_cb_add_xprt_calldata {
        struct rpc_xprt_switch *xps;
        struct rpc_xprt *xprt;
index af875ad4a822d8a79ec79c409e7d3311a9fc7487..3919fe2c58c5c22926a3f4fe31b1438590cc505c 100644 (file)
@@ -1347,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev)
 
                /* Device contexts for RX and TX will be freed in on sk_destruct
                 * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+                * Now release the ref taken above.
                 */
+               if (refcount_dec_and_test(&ctx->refcount))
+                       tls_device_free_ctx(ctx);
        }
 
        up_write(&device_offload_lock);
index e71a312faa1e2ef856c027e1f55f4b79a34246ae..36367e7e3e0a9c5dbdafc1aea2c9d385b8934934 100644 (file)
@@ -1808,11 +1808,9 @@ static int maybe_init_creds(struct scm_cookie *scm,
 static bool unix_skb_scm_eq(struct sk_buff *skb,
                            struct scm_cookie *scm)
 {
-       const struct unix_skb_parms *u = &UNIXCB(skb);
-
-       return u->pid == scm->pid &&
-              uid_eq(u->uid, scm->creds.uid) &&
-              gid_eq(u->gid, scm->creds.gid) &&
+       return UNIXCB(skb).pid == scm->pid &&
+              uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
+              gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
               unix_secdata_eq(scm, skb);
 }
 
index 21e808fcb676c29e2952c3e177a265671e22c4fa..1a3551b6d18bb6ceec45c75ade76e1b7a51d9277 100644 (file)
@@ -3173,6 +3173,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
        } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
                chandef->width =
                        nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+               if (chandef->chan->band == NL80211_BAND_S1GHZ) {
+                       /* User input error for channel width doesn't match channel  */
+                       if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
+                               NL_SET_ERR_MSG_ATTR(extack,
+                                                   attrs[NL80211_ATTR_CHANNEL_WIDTH],
+                                                   "bad channel width");
+                               return -EINVAL;
+                       }
+               }
                if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
                        chandef->center_freq1 =
                                nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
@@ -11657,18 +11666,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
        struct cfg80211_bitrate_mask mask;
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
        struct net_device *dev = info->user_ptr[1];
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
        int err;
 
        if (!rdev->ops->set_bitrate_mask)
                return -EOPNOTSUPP;
 
+       wdev_lock(wdev);
        err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
                                            NL80211_ATTR_TX_RATES, &mask,
                                            dev, true);
        if (err)
-               return err;
+               goto out;
 
-       return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+       err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+out:
+       wdev_unlock(wdev);
+       return err;
 }
 
 static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
index 4a6d8643291064d4dd41c6e886477821b18e8982..6d82bd9eaf8c7375f36d2fc5ada36e307d38de43 100644 (file)
@@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
                if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) {
                        struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data;
 
-                       return s1gop->primary_ch;
+                       return s1gop->oper_ch;
                }
        } else {
                tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen);
index 00bd0ecff5a1bbc11378c9c80218a21359e08f50..f1876ea61fdce29d15be13c9635b0d6cf7c90587 100644 (file)
@@ -3744,7 +3744,7 @@ static int stale_bundle(struct dst_entry *dst)
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 {
        while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
-               dst->dev = dev_net(dev)->loopback_dev;
+               dst->dev = blackhole_netdev;
                dev_hold(dst->dev);
                dev_put(dev);
        }
index 8859fc1935428ed902dda098f0cdd9e5d337507a..3e404e51ec6489225ccb1f4e429f4a09a8dc84d9 100644 (file)
@@ -22,9 +22,9 @@
 #include <unistd.h>
 
 #ifndef landlock_create_ruleset
-static inline int landlock_create_ruleset(
-               const struct landlock_ruleset_attr *const attr,
-               const size_t size, const __u32 flags)
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+                       const size_t size, const __u32 flags)
 {
        return syscall(__NR_landlock_create_ruleset, attr, size, flags);
 }
@@ -32,17 +32,18 @@ static inline int landlock_create_ruleset(
 
 #ifndef landlock_add_rule
 static inline int landlock_add_rule(const int ruleset_fd,
-               const enum landlock_rule_type rule_type,
-               const void *const rule_attr, const __u32 flags)
+                                   const enum landlock_rule_type rule_type,
+                                   const void *const rule_attr,
+                                   const __u32 flags)
 {
-       return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
-                       rule_attr, flags);
+       return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+                      flags);
 }
 #endif
 
 #ifndef landlock_restrict_self
 static inline int landlock_restrict_self(const int ruleset_fd,
-               const __u32 flags)
+                                        const __u32 flags)
 {
        return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
 }
@@ -70,14 +71,17 @@ static int parse_path(char *env_path, const char ***const path_list)
        return num_paths;
 }
 
+/* clang-format off */
+
 #define ACCESS_FILE ( \
        LANDLOCK_ACCESS_FS_EXECUTE | \
        LANDLOCK_ACCESS_FS_WRITE_FILE | \
        LANDLOCK_ACCESS_FS_READ_FILE)
 
-static int populate_ruleset(
-               const char *const env_var, const int ruleset_fd,
-               const __u64 allowed_access)
+/* clang-format on */
+
+static int populate_ruleset(const char *const env_var, const int ruleset_fd,
+                           const __u64 allowed_access)
 {
        int num_paths, i, ret = 1;
        char *env_path_name;
@@ -107,12 +111,10 @@ static int populate_ruleset(
        for (i = 0; i < num_paths; i++) {
                struct stat statbuf;
 
-               path_beneath.parent_fd = open(path_list[i], O_PATH |
-                               O_CLOEXEC);
+               path_beneath.parent_fd = open(path_list[i], O_PATH | O_CLOEXEC);
                if (path_beneath.parent_fd < 0) {
                        fprintf(stderr, "Failed to open \"%s\": %s\n",
-                                       path_list[i],
-                                       strerror(errno));
+                               path_list[i], strerror(errno));
                        goto out_free_name;
                }
                if (fstat(path_beneath.parent_fd, &statbuf)) {
@@ -123,9 +125,10 @@ static int populate_ruleset(
                if (!S_ISDIR(statbuf.st_mode))
                        path_beneath.allowed_access &= ACCESS_FILE;
                if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                                       &path_beneath, 0)) {
-                       fprintf(stderr, "Failed to update the ruleset with \"%s\": %s\n",
-                                       path_list[i], strerror(errno));
+                                     &path_beneath, 0)) {
+                       fprintf(stderr,
+                               "Failed to update the ruleset with \"%s\": %s\n",
+                               path_list[i], strerror(errno));
                        close(path_beneath.parent_fd);
                        goto out_free_name;
                }
@@ -139,6 +142,8 @@ out_free_name:
        return ret;
 }
 
+/* clang-format off */
+
 #define ACCESS_FS_ROUGHLY_READ ( \
        LANDLOCK_ACCESS_FS_EXECUTE | \
        LANDLOCK_ACCESS_FS_READ_FILE | \
@@ -154,64 +159,89 @@ out_free_name:
        LANDLOCK_ACCESS_FS_MAKE_SOCK | \
        LANDLOCK_ACCESS_FS_MAKE_FIFO | \
        LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
-       LANDLOCK_ACCESS_FS_MAKE_SYM)
+       LANDLOCK_ACCESS_FS_MAKE_SYM | \
+       LANDLOCK_ACCESS_FS_REFER)
+
+#define ACCESS_ABI_2 ( \
+       LANDLOCK_ACCESS_FS_REFER)
+
+/* clang-format on */
 
 int main(const int argc, char *const argv[], char *const *const envp)
 {
        const char *cmd_path;
        char *const *cmd_argv;
-       int ruleset_fd;
+       int ruleset_fd, abi;
+       __u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ,
+             access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE;
        struct landlock_ruleset_attr ruleset_attr = {
-               .handled_access_fs = ACCESS_FS_ROUGHLY_READ |
-                       ACCESS_FS_ROUGHLY_WRITE,
+               .handled_access_fs = access_fs_rw,
        };
 
        if (argc < 2) {
-               fprintf(stderr, "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
-                               ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
-               fprintf(stderr, "Launch a command in a restricted environment.\n\n");
+               fprintf(stderr,
+                       "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
+                       ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
+               fprintf(stderr,
+                       "Launch a command in a restricted environment.\n\n");
                fprintf(stderr, "Environment variables containing paths, "
                                "each separated by a colon:\n");
-               fprintf(stderr, "* %s: list of paths allowed to be used in a read-only way.\n",
-                               ENV_FS_RO_NAME);
-               fprintf(stderr, "* %s: list of paths allowed to be used in a read-write way.\n",
-                               ENV_FS_RW_NAME);
-               fprintf(stderr, "\nexample:\n"
-                               "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
-                               "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
-                               "%s bash -i\n",
-                               ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
+               fprintf(stderr,
+                       "* %s: list of paths allowed to be used in a read-only way.\n",
+                       ENV_FS_RO_NAME);
+               fprintf(stderr,
+                       "* %s: list of paths allowed to be used in a read-write way.\n",
+                       ENV_FS_RW_NAME);
+               fprintf(stderr,
+                       "\nexample:\n"
+                       "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
+                       "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
+                       "%s bash -i\n",
+                       ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
                return 1;
        }
 
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
-       if (ruleset_fd < 0) {
+       abi = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
+       if (abi < 0) {
                const int err = errno;
 
-               perror("Failed to create a ruleset");
+               perror("Failed to check Landlock compatibility");
                switch (err) {
                case ENOSYS:
-                       fprintf(stderr, "Hint: Landlock is not supported by the current kernel. "
-                                       "To support it, build the kernel with "
-                                       "CONFIG_SECURITY_LANDLOCK=y and prepend "
-                                       "\"landlock,\" to the content of CONFIG_LSM.\n");
+                       fprintf(stderr,
+                               "Hint: Landlock is not supported by the current kernel. "
+                               "To support it, build the kernel with "
+                               "CONFIG_SECURITY_LANDLOCK=y and prepend "
+                               "\"landlock,\" to the content of CONFIG_LSM.\n");
                        break;
                case EOPNOTSUPP:
-                       fprintf(stderr, "Hint: Landlock is currently disabled. "
-                                       "It can be enabled in the kernel configuration by "
-                                       "prepending \"landlock,\" to the content of CONFIG_LSM, "
-                                       "or at boot time by setting the same content to the "
-                                       "\"lsm\" kernel parameter.\n");
+                       fprintf(stderr,
+                               "Hint: Landlock is currently disabled. "
+                               "It can be enabled in the kernel configuration by "
+                               "prepending \"landlock,\" to the content of CONFIG_LSM, "
+                               "or at boot time by setting the same content to the "
+                               "\"lsm\" kernel parameter.\n");
                        break;
                }
                return 1;
        }
-       if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd,
-                               ACCESS_FS_ROUGHLY_READ)) {
+       /* Best-effort security. */
+       if (abi < 2) {
+               ruleset_attr.handled_access_fs &= ~ACCESS_ABI_2;
+               access_fs_ro &= ~ACCESS_ABI_2;
+               access_fs_rw &= ~ACCESS_ABI_2;
+       }
+
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+       if (ruleset_fd < 0) {
+               perror("Failed to create a ruleset");
+               return 1;
+       }
+       if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) {
                goto err_close_ruleset;
        }
-       if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd,
-                               ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE)) {
+       if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) {
                goto err_close_ruleset;
        }
        if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
@@ -228,7 +258,7 @@ int main(const int argc, char *const argv[], char *const *const envp)
        cmd_argv = argv + 1;
        execvpe(cmd_path, cmd_argv, envp);
        fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path,
-                       strerror(errno));
+               strerror(errno));
        fprintf(stderr, "Hint: access to the binary, the interpreter or "
                        "shared libraries may be denied.\n");
        return 1;
index 9fdd8e7c2a458b3cac9224ee3c4c33e48d30e057..951388334a3fa01d8c2ac23b0a4a2c69d5712dfb 100644 (file)
@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
         * that the custom event is using.
         */
        TP_PROTO(bool preempt,
-                unsigned int prev_state,
                 struct task_struct *prev,
-                struct task_struct *next),
+                struct task_struct *next,
+                unsigned int prev_state),
 
-       TP_ARGS(preempt, prev_state, prev, next),
+       TP_ARGS(preempt, prev, next, prev_state),
 
        /*
         * The next fields are where the customization happens.
index 33c1ed58152294b3cdc82a980cdffeff97110687..f89d3fcff39f2252af5cc5558a00bde0848a44a6 100644 (file)
@@ -222,25 +222,29 @@ cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),
        $(sub_cmd_record_mcount))
 endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
 
-ifdef CONFIG_STACK_VALIDATION
+ifdef CONFIG_OBJTOOL
 
 objtool := $(objtree)/tools/objtool/objtool
 
 objtool_args =                                                         \
-       $(if $(CONFIG_UNWINDER_ORC),orc generate,check)                 \
-       $(if $(part-of-module), --module)                               \
-       $(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt)                     \
-       $(if $(CONFIG_FRAME_POINTER),, --no-fp)                         \
-       $(if $(CONFIG_GCOV_KERNEL), --no-unreachable)                   \
-       $(if $(CONFIG_RETPOLINE), --retpoline)                          \
-       $(if $(CONFIG_X86_SMAP), --uaccess)                             \
+       $(if $(CONFIG_HAVE_JUMP_LABEL_HACK), --hacks=jump_label)        \
+       $(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)              \
+       $(if $(CONFIG_X86_KERNEL_IBT), --ibt)                           \
        $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)             \
-       $(if $(CONFIG_SLS), --sls)
+       $(if $(CONFIG_UNWINDER_ORC), --orc)                             \
+       $(if $(CONFIG_RETPOLINE), --retpoline)                          \
+       $(if $(CONFIG_SLS), --sls)                                      \
+       $(if $(CONFIG_STACK_VALIDATION), --stackval)                    \
+       $(if $(CONFIG_HAVE_STATIC_CALL_INLINE), --static-call)          \
+       --uaccess                                                       \
+       $(if $(linked-object), --link)                                  \
+       $(if $(part-of-module), --module)                               \
+       $(if $(CONFIG_GCOV_KERNEL), --no-unreachable)
 
 cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@)
 cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd)
 
-endif # CONFIG_STACK_VALIDATION
+endif # CONFIG_OBJTOOL
 
 ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),)
 
@@ -303,6 +307,7 @@ quiet_cmd_cc_prelink_modules = LD [M]  $@
 # modules into native code
 $(obj)/%.prelink.o: objtool-enabled = y
 $(obj)/%.prelink.o: part-of-module := y
+$(obj)/%.prelink.o: linked-object := y
 
 $(obj)/%.prelink.o: $(obj)/%.o FORCE
        $(call if_changed,cc_prelink_modules)
index f67153b260c085dad85b23237cedd704888c11ce..692d64a70542a299b0c29295e06b07def0f773c0 100644 (file)
@@ -8,8 +8,6 @@ ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 endif
 export DISABLE_LATENT_ENTROPY_PLUGIN
 
-gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV)         += sancov_plugin.so
-
 gcc-plugin-$(CONFIG_GCC_PLUGIN_STRUCTLEAK)     += structleak_plugin.so
 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE)      \
                += -fplugin-arg-structleak_plugin-verbose
@@ -24,12 +22,6 @@ export DISABLE_STRUCTLEAK_PLUGIN
 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK)              \
                += -DSTRUCTLEAK_PLUGIN
 
-gcc-plugin-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)     += randomize_layout_plugin.so
-gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)              \
-               += -DRANDSTRUCT_PLUGIN
-gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE)  \
-               += -fplugin-arg-randomize_layout_plugin-performance-mode
-
 gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK)      += stackleak_plugin.so
 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)               \
                += -DSTACKLEAK_PLUGIN
@@ -53,13 +45,19 @@ export DISABLE_ARM_SSP_PER_TASK_PLUGIN
 # All the plugin CFLAGS are collected here in case a build target needs to
 # filter them out of the KBUILD_CFLAGS.
 GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
-# The sancov_plugin.so is included via CFLAGS_KCOV, so it is removed here.
-GCC_PLUGINS_CFLAGS := $(filter-out %/sancov_plugin.so, $(GCC_PLUGINS_CFLAGS))
 export GCC_PLUGINS_CFLAGS
 
 # Add the flags to the build!
 KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS)
 
-# All enabled GCC plugins are collected here for building below.
-GCC_PLUGIN := $(gcc-plugin-y)
+# Some plugins are enabled outside of this Makefile, but they still need to
+# be included in GCC_PLUGIN so they can get built.
+gcc-plugin-external-$(CONFIG_GCC_PLUGIN_SANCOV)                        \
+       += sancov_plugin.so
+gcc-plugin-external-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)            \
+       += randomize_layout_plugin.so
+
+# All enabled GCC plugins are collected here for building in
+# scripts/gcc-scripts/Makefile.
+GCC_PLUGIN := $(gcc-plugin-y) $(gcc-plugin-external-y)
 export GCC_PLUGIN
diff --git a/scripts/Makefile.randstruct b/scripts/Makefile.randstruct
new file mode 100644 (file)
index 0000000..24e283e
--- /dev/null
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+randstruct-cflags-y += -DRANDSTRUCT
+
+ifdef CONFIG_GCC_PLUGIN_RANDSTRUCT
+randstruct-cflags-y    \
+       += -fplugin=$(objtree)/scripts/gcc-plugins/randomize_layout_plugin.so
+randstruct-cflags-$(CONFIG_RANDSTRUCT_PERFORMANCE)             \
+       += -fplugin-arg-randomize_layout_plugin-performance-mode
+else
+randstruct-cflags-y    \
+       += -frandomize-layout-seed-file=$(objtree)/scripts/basic/randstruct.seed
+endif
+
+export RANDSTRUCT_CFLAGS := $(randstruct-cflags-y)
+
+KBUILD_CFLAGS  += $(RANDSTRUCT_CFLAGS)
index 8e2da71f1d5fa7548b0ba96af14571193d2ae052..3a07695e3c8963eea64aef4632c30a25458ab333 100755 (executable)
@@ -164,41 +164,44 @@ gen_xchg_fallbacks()
 
 gen_try_cmpxchg_fallback()
 {
+       local cmpxchg="$1"; shift;
        local order="$1"; shift;
 
 cat <<EOF
-#ifndef arch_try_cmpxchg${order}
-#define arch_try_cmpxchg${order}(_ptr, _oldp, _new) \\
+#ifndef arch_try_${cmpxchg}${order}
+#define arch_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\
 ({ \\
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
-       ___r = arch_cmpxchg${order}((_ptr), ___o, (_new)); \\
+       ___r = arch_${cmpxchg}${order}((_ptr), ___o, (_new)); \\
        if (unlikely(___r != ___o)) \\
                *___op = ___r; \\
        likely(___r == ___o); \\
 })
-#endif /* arch_try_cmpxchg${order} */
+#endif /* arch_try_${cmpxchg}${order} */
 
 EOF
 }
 
 gen_try_cmpxchg_fallbacks()
 {
-       printf "#ifndef arch_try_cmpxchg_relaxed\n"
-       printf "#ifdef arch_try_cmpxchg\n"
+       local cmpxchg="$1"; shift;
 
-       gen_basic_fallbacks "arch_try_cmpxchg"
+       printf "#ifndef arch_try_${cmpxchg}_relaxed\n"
+       printf "#ifdef arch_try_${cmpxchg}\n"
 
-       printf "#endif /* arch_try_cmpxchg */\n\n"
+       gen_basic_fallbacks "arch_try_${cmpxchg}"
+
+       printf "#endif /* arch_try_${cmpxchg} */\n\n"
 
        for order in "" "_acquire" "_release" "_relaxed"; do
-               gen_try_cmpxchg_fallback "${order}"
+               gen_try_cmpxchg_fallback "${cmpxchg}" "${order}"
        done
 
-       printf "#else /* arch_try_cmpxchg_relaxed */\n"
+       printf "#else /* arch_try_${cmpxchg}_relaxed */\n"
 
-       gen_order_fallbacks "arch_try_cmpxchg"
+       gen_order_fallbacks "arch_try_${cmpxchg}"
 
-       printf "#endif /* arch_try_cmpxchg_relaxed */\n\n"
+       printf "#endif /* arch_try_${cmpxchg}_relaxed */\n\n"
 }
 
 cat << EOF
@@ -218,7 +221,9 @@ for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64"; do
        gen_xchg_fallbacks "${xchg}"
 done
 
-gen_try_cmpxchg_fallbacks
+for cmpxchg in "cmpxchg" "cmpxchg64"; do
+       gen_try_cmpxchg_fallbacks "${cmpxchg}"
+done
 
 grep '^[a-z]' "$1" | while read name meta args; do
        gen_proto "${meta}" "${name}" "atomic" "int" ${args}
index 68f902731d0180d1ac185a62a496315970e782fb..77c06526a574495f30e970c96a73e8d5d07d4c62 100755 (executable)
@@ -166,7 +166,7 @@ grep '^[a-z]' "$1" | while read name meta args; do
 done
 
 
-for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg" "try_cmpxchg64"; do
        for order in "" "_acquire" "_release" "_relaxed"; do
                gen_xchg "${xchg}" "${order}" ""
                printf "\n"
index 961c91c8a884614a852e620346b6346ecbd4b1a1..07c195f605a1b247760981eb48d387ceaa2b7f46 100644 (file)
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 /fixdep
+/randstruct.seed
index eeb6a38c5551516bc3b1539e5f236ff952fea394..dd289a6725aca60c596ae4c7ec831775b98a495a 100644 (file)
@@ -3,3 +3,14 @@
 # fixdep: used to generate dependency information during build process
 
 hostprogs-always-y     += fixdep
+
+# randstruct: the seed is needed before building the gcc-plugin or
+# before running a Clang kernel build.
+gen-randstruct-seed    := $(srctree)/scripts/gen-randstruct-seed.sh
+quiet_cmd_create_randstruct_seed = GENSEED $@
+cmd_create_randstruct_seed = \
+       $(CONFIG_SHELL) $(gen-randstruct-seed) \
+               $@ $(objtree)/include/generated/randstruct_hash.h
+$(obj)/randstruct.seed: $(gen-randstruct-seed) FORCE
+       $(call if_changed,create_randstruct_seed)
+always-$(CONFIG_RANDSTRUCT) += randstruct.seed
diff --git a/scripts/check-blacklist-hashes.awk b/scripts/check-blacklist-hashes.awk
new file mode 100755 (executable)
index 0000000..107c1d3
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2020, Microsoft Corporation. All rights reserved.
+#
+# Author: Mickaël Salaün <mic@linux.microsoft.com>
+#
+# Check that a CONFIG_SYSTEM_BLACKLIST_HASH_LIST file contains a valid array of
+# hash strings.  Such string must start with a prefix ("tbs" or "bin"), then a
+# colon (":"), and finally an even number of hexadecimal lowercase characters
+# (up to 128).
+
+BEGIN {
+       RS = ","
+}
+{
+       if (!match($0, "^[ \t\n\r]*\"([^\"]*)\"[ \t\n\r]*$", part1)) {
+               print "Not a string (item " NR "):", $0;
+               exit 1;
+       }
+       if (!match(part1[1], "^(tbs|bin):(.*)$", part2)) {
+               print "Unknown prefix (item " NR "):", part1[1];
+               exit 1;
+       }
+       if (!match(part2[2], "^([0-9a-f]+)$", part3)) {
+               print "Not a lowercase hexadecimal string (item " NR "):", part2[2];
+               exit 1;
+       }
+       if (length(part3[1]) > 128) {
+               print "Hash string too long (item " NR "):", part3[1];
+               exit 1;
+       }
+       if (length(part3[1]) % 2 == 1) {
+               print "Not an even number of hexadecimal characters (item " NR "):", part3[1];
+               exit 1;
+       }
+}
index 6c6439f69a725f4bd46210fb3bb2c3e80a49fab2..0e6268d598835e7a6eec49474af1b5a2e3673745 100755 (executable)
 set -o errexit
 set -o nounset
 
-READELF="${CROSS_COMPILE:-}readelf"
-ADDR2LINE="${CROSS_COMPILE:-}addr2line"
-SIZE="${CROSS_COMPILE:-}size"
-NM="${CROSS_COMPILE:-}nm"
-
-command -v awk >/dev/null 2>&1 || die "awk isn't installed"
-command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
-command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed"
-command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed"
-command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed"
-
 usage() {
        echo "usage: faddr2line [--list] <object file> <func+offset> <func+offset>..." >&2
        exit 1
@@ -69,6 +58,14 @@ die() {
        exit 1
 }
 
+READELF="${CROSS_COMPILE:-}readelf"
+ADDR2LINE="${CROSS_COMPILE:-}addr2line"
+AWK="awk"
+
+command -v ${AWK} >/dev/null 2>&1 || die "${AWK} isn't installed"
+command -v ${READELF} >/dev/null 2>&1 || die "${READELF} isn't installed"
+command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed"
+
 # Try to figure out the source directory prefix so we can remove it from the
 # addr2line output.  HACK ALERT: This assumes that start_kernel() is in
 # init/main.c!  This only works for vmlinux.  Otherwise it falls back to
@@ -76,7 +73,7 @@ die() {
 find_dir_prefix() {
        local objfile=$1
 
-       local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
+       local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}')
        [[ -z $start_kernel_addr ]] && return
 
        local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr)
@@ -97,86 +94,133 @@ __faddr2line() {
        local dir_prefix=$3
        local print_warnings=$4
 
-       local func=${func_addr%+*}
+       local sym_name=${func_addr%+*}
        local offset=${func_addr#*+}
        offset=${offset%/*}
-       local size=
-       [[ $func_addr =~ "/" ]] && size=${func_addr#*/}
+       local user_size=
+       [[ $func_addr =~ "/" ]] && user_size=${func_addr#*/}
 
-       if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then
+       if [[ -z $sym_name ]] || [[ -z $offset ]] || [[ $sym_name = $func_addr ]]; then
                warn "bad func+offset $func_addr"
                DONE=1
                return
        fi
 
        # Go through each of the object's symbols which match the func name.
-       # In rare cases there might be duplicates.
-       file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}')
-       while read symbol; do
-               local fields=($symbol)
-               local sym_base=0x${fields[0]}
-               local sym_type=${fields[1]}
-               local sym_end=${fields[3]}
-
-               # calculate the size
-               local sym_size=$(($sym_end - $sym_base))
+       # In rare cases there might be duplicates, in which case we print all
+       # matches.
+       while read line; do
+               local fields=($line)
+               local sym_addr=0x${fields[1]}
+               local sym_elf_size=${fields[2]}
+               local sym_sec=${fields[6]}
+
+               # Get the section size:
+               local sec_size=$(${READELF} --section-headers --wide $objfile |
+                       sed 's/\[ /\[/' |
+                       ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }')
+
+               if [[ -z $sec_size ]]; then
+                       warn "bad section size: section: $sym_sec"
+                       DONE=1
+                       return
+               fi
+
+               # Calculate the symbol size.
+               #
+               # Unfortunately we can't use the ELF size, because kallsyms
+               # also includes the padding bytes in its size calculation.  For
+               # kallsyms, the size calculation is the distance between the
+               # symbol and the next symbol in a sorted list.
+               local sym_size
+               local cur_sym_addr
+               local found=0
+               while read line; do
+                       local fields=($line)
+                       cur_sym_addr=0x${fields[1]}
+                       local cur_sym_elf_size=${fields[2]}
+                       local cur_sym_name=${fields[7]:-}
+
+                       if [[ $cur_sym_addr = $sym_addr ]] &&
+                          [[ $cur_sym_elf_size = $sym_elf_size ]] &&
+                          [[ $cur_sym_name = $sym_name ]]; then
+                               found=1
+                               continue
+                       fi
+
+                       if [[ $found = 1 ]]; then
+                               sym_size=$(($cur_sym_addr - $sym_addr))
+                               [[ $sym_size -lt $sym_elf_size ]] && continue;
+                               found=2
+                               break
+                       fi
+               done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
+
+               if [[ $found = 0 ]]; then
+                       warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size"
+                       DONE=1
+                       return
+               fi
+
+               # If nothing was found after the symbol, assume it's the last
+               # symbol in the section.
+               [[ $found = 1 ]] && sym_size=$(($sec_size - $sym_addr))
+
                if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
-                       warn "bad symbol size: base: $sym_base end: $sym_end"
+                       warn "bad symbol size: sym_addr: $sym_addr cur_sym_addr: $cur_sym_addr"
                        DONE=1
                        return
                fi
+
                sym_size=0x$(printf %x $sym_size)
 
-               # calculate the address
-               local addr=$(($sym_base + $offset))
+               # Calculate the section address from user-supplied offset:
+               local addr=$(($sym_addr + $offset))
                if [[ -z $addr ]] || [[ $addr = 0 ]]; then
-                       warn "bad address: $sym_base + $offset"
+                       warn "bad address: $sym_addr + $offset"
                        DONE=1
                        return
                fi
                addr=0x$(printf %x $addr)
 
-               # weed out non-function symbols
-               if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
-                       [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'"
-                       continue
-               fi
-
-               # if the user provided a size, make sure it matches the symbol's size
-               if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
+               # If the user provided a size, make sure it matches the symbol's size:
+               if [[ -n $user_size ]] && [[ $user_size -ne $sym_size ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)"
+                               echo "skipping $sym_name address at $addr due to size mismatch ($user_size != $sym_size)"
                        continue;
                fi
 
-               # make sure the provided offset is within the symbol's range
+               # Make sure the provided offset is within the symbol's range:
                if [[ $offset -gt $sym_size ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)"
+                               echo "skipping $sym_name address at $addr due to size mismatch ($offset > $sym_size)"
                        continue
                fi
 
-               # separate multiple entries with a blank line
+               # In case of duplicates or multiple addresses specified on the
+               # cmdline, separate multiple entries with a blank line:
                [[ $FIRST = 0 ]] && echo
                FIRST=0
 
-               # pass real address to addr2line
-               echo "$func+$offset/$sym_size:"
-               local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
-               [[ -z $file_lines ]] && return
+               echo "$sym_name+$offset/$sym_size:"
 
+               # Pass section address to addr2line and strip absolute paths
+               # from the output:
+               local output=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
+               [[ -z $output ]] && continue
+
+               # Default output (non --list):
                if [[ $LIST = 0 ]]; then
-                       echo "$file_lines" | while read -r line
+                       echo "$output" | while read -r line
                        do
                                echo $line
                        done
                        DONE=1;
-                       return
+                       continue
                fi
 
-               # show each line with context
-               echo "$file_lines" | while read -r line
+               # For --list, show each line with its corresponding source code:
+               echo "$output" | while read -r line
                do
                        echo
                        echo $line
@@ -184,12 +228,12 @@ __faddr2line() {
                        n1=$[$n-5]
                        n2=$[$n+5]
                        f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
-                       awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f
+                       ${AWK} 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f
                done
 
                DONE=1
 
-       done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
+       done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn')
 }
 
 [[ $# -lt 2 ]] && usage
index 51d81c3f03d6b4bc139abb7ccbb968372a6c1da0..e383cda05367a3d48498b2afaacf265bd3ea74ab 100644 (file)
@@ -46,44 +46,6 @@ config GCC_PLUGIN_LATENT_ENTROPY
           * https://grsecurity.net/
           * https://pax.grsecurity.net/
 
-config GCC_PLUGIN_RANDSTRUCT
-       bool "Randomize layout of sensitive kernel structures"
-       select MODVERSIONS if MODULES
-       help
-         If you say Y here, the layouts of structures that are entirely
-         function pointers (and have not been manually annotated with
-         __no_randomize_layout), or structures that have been explicitly
-         marked with __randomize_layout, will be randomized at compile-time.
-         This can introduce the requirement of an additional information
-         exposure vulnerability for exploits targeting these structure
-         types.
-
-         Enabling this feature will introduce some performance impact,
-         slightly increase memory usage, and prevent the use of forensic
-         tools like Volatility against the system (unless the kernel
-         source tree isn't cleaned after kernel installation).
-
-         The seed used for compilation is located at
-         scripts/gcc-plugins/randomize_layout_seed.h.  It remains after
-         a make clean to allow for external modules to be compiled with
-         the existing seed and will be removed by a make mrproper or
-         make distclean.
-
-         This plugin was ported from grsecurity/PaX. More information at:
-          * https://grsecurity.net/
-          * https://pax.grsecurity.net/
-
-config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
-       bool "Use cacheline-aware structure randomization"
-       depends on GCC_PLUGIN_RANDSTRUCT
-       depends on !COMPILE_TEST        # do not reduce test coverage
-       help
-         If you say Y here, the RANDSTRUCT randomization will make a
-         best effort at restricting randomization to cacheline-sized
-         groups of elements.  It will further not randomize bitfields
-         in structures.  This reduces the performance hit of RANDSTRUCT
-         at the cost of weakened randomization.
-
 config GCC_PLUGIN_ARM_SSP_PER_TASK
        bool
        depends on GCC_PLUGINS && ARM
index 1952d3bb80c6979ab2ffdf2423589cacb4d25602..6f0aecad5d671a015b50421a8d5688e3a265de5c 100644 (file)
@@ -1,12 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0
 
-$(obj)/randomize_layout_plugin.so: $(objtree)/$(obj)/randomize_layout_seed.h
-quiet_cmd_create_randomize_layout_seed = GENSEED $@
+$(obj)/randomize_layout_plugin.so: $(obj)/randomize_layout_seed.h
+quiet_cmd_create_randomize_layout_seed = SEEDHDR $@
 cmd_create_randomize_layout_seed = \
-  $(CONFIG_SHELL) $(srctree)/$(src)/gen-random-seed.sh $@ $(objtree)/include/generated/randomize_layout_hash.h
-$(objtree)/$(obj)/randomize_layout_seed.h: FORCE
+       SEED=$$(cat $(filter-out FORCE,$^) </dev/null); \
+       echo '/*' > $@; \
+       echo ' * This file is automatically generated. Keep it private.' >> $@; \
+       echo ' * Exposing this value will expose the layout of randomized structures.' >> $@; \
+       echo ' */' >> $@; \
+       echo "const char *randstruct_seed = \"$$SEED\";" >> $@
+$(obj)/randomize_layout_seed.h: $(objtree)/scripts/basic/randstruct.seed FORCE
        $(call if_changed,create_randomize_layout_seed)
-targets += randomize_layout_seed.h randomize_layout_hash.h
+targets += randomize_layout_seed.h
 
 # Build rules for plugins
 #
@@ -23,10 +28,11 @@ GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin)
 
 plugin_cxxflags        = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \
                  -include $(srctree)/include/linux/compiler-version.h \
-                  -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \
-                  -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \
-                  -ggdb -Wno-narrowing -Wno-unused-variable \
-                  -Wno-format-diag
+                 -include $(objtree)/include/generated/utsrelease.h \
+                 -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \
+                 -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \
+                 -ggdb -Wno-narrowing -Wno-unused-variable \
+                 -Wno-format-diag
 
 plugin_ldflags = -shared
 
diff --git a/scripts/gcc-plugins/gen-random-seed.sh b/scripts/gcc-plugins/gen-random-seed.sh
deleted file mode 100755 (executable)
index 68af5cc..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-if [ ! -f "$1" ]; then
-       SEED=`od -A n -t x8 -N 32 /dev/urandom | tr -d ' \n'`
-       echo "const char *randstruct_seed = \"$SEED\";" > "$1"
-       HASH=`echo -n "$SEED" | sha256sum | cut -d" " -f1 | tr -d ' \n'`
-       echo "#define RANDSTRUCT_HASHED_SEED \"$HASH\"" > "$2"
-fi
index 8425da41de0dab49c33c2f93e4eb7f1ccf095d79..5d415b2572a8df50e4c95e388e2c6eba248b73c8 100644 (file)
@@ -82,7 +82,7 @@ __visible int plugin_is_GPL_compatible;
 static GTY(()) tree latent_entropy_decl;
 
 static struct plugin_info latent_entropy_plugin_info = {
-       .version        = "201606141920vanilla",
+       .version        = UTS_RELEASE,
        .help           = "disable\tturn off latent entropy instrumentation\n",
 };
 
index 334741a31d0ac383594e8957875ac50b6c300692..ea2aea57040498494aad036c14ab4c0582ebbb0d 100644 (file)
@@ -34,29 +34,11 @@ __visible int plugin_is_GPL_compatible;
 static int performance_mode;
 
 static struct plugin_info randomize_layout_plugin_info = {
-       .version        = "201402201816vanilla",
+       .version        = UTS_RELEASE,
        .help           = "disable\t\t\tdo not activate plugin\n"
                          "performance-mode\tenable cacheline-aware layout randomization\n"
 };
 
-struct whitelist_entry {
-       const char *pathname;
-       const char *lhs;
-       const char *rhs;
-};
-
-static const struct whitelist_entry whitelist[] = {
-       /* NIU overloads mapping with page struct */
-       { "drivers/net/ethernet/sun/niu.c", "page", "address_space" },
-       /* unix_skb_parms via UNIXCB() buffer */
-       { "net/unix/af_unix.c", "unix_skb_parms", "char" },
-       /* big_key payload.data struct splashing */
-       { "security/keys/big_key.c", "path", "void *" },
-       /* walk struct security_hook_heads as an array of struct hlist_head */
-       { "security/security.c", "hlist_head", "security_hook_heads" },
-       { }
-};
-
 /* from old Linux dcache.h */
 static inline unsigned long
 partial_name_hash(unsigned long c, unsigned long prevhash)
@@ -742,60 +724,6 @@ static void handle_local_var_initializers(void)
        }
 }
 
-static bool type_name_eq(gimple stmt, const_tree type_tree, const char *wanted_name)
-{
-       const char *type_name;
-
-       if (type_tree == NULL_TREE)
-               return false;
-
-       switch (TREE_CODE(type_tree)) {
-       case RECORD_TYPE:
-               type_name = TYPE_NAME_POINTER(type_tree);
-               break;
-       case INTEGER_TYPE:
-               if (TYPE_PRECISION(type_tree) == CHAR_TYPE_SIZE)
-                       type_name = "char";
-               else {
-                       INFORM(gimple_location(stmt), "found non-char INTEGER_TYPE cast comparison: %qT\n", type_tree);
-                       debug_tree(type_tree);
-                       return false;
-               }
-               break;
-       case POINTER_TYPE:
-               if (TREE_CODE(TREE_TYPE(type_tree)) == VOID_TYPE) {
-                       type_name = "void *";
-                       break;
-               } else {
-                       INFORM(gimple_location(stmt), "found non-void POINTER_TYPE cast comparison %qT\n", type_tree);
-                       debug_tree(type_tree);
-                       return false;
-               }
-       default:
-               INFORM(gimple_location(stmt), "unhandled cast comparison: %qT\n", type_tree);
-               debug_tree(type_tree);
-               return false;
-       }
-
-       return strcmp(type_name, wanted_name) == 0;
-}
-
-static bool whitelisted_cast(gimple stmt, const_tree lhs_tree, const_tree rhs_tree)
-{
-       const struct whitelist_entry *entry;
-       expanded_location xloc = expand_location(gimple_location(stmt));
-
-       for (entry = whitelist; entry->pathname; entry++) {
-               if (!strstr(xloc.file, entry->pathname))
-                       continue;
-
-               if (type_name_eq(stmt, lhs_tree, entry->lhs) && type_name_eq(stmt, rhs_tree, entry->rhs))
-                       return true;
-       }
-
-       return false;
-}
-
 /*
  * iterate over all statements to find "bad" casts:
  * those where the address of the start of a structure is cast
@@ -872,10 +800,7 @@ static unsigned int find_bad_casts_execute(void)
 #ifndef __DEBUG_PLUGIN
                                if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(ptr_lhs_type)))
 #endif
-                               {
-                                       if (!whitelisted_cast(stmt, ptr_lhs_type, ptr_rhs_type))
-                                               MISMATCH(gimple_location(stmt), "rhs", ptr_lhs_type, ptr_rhs_type);
-                               }
+                               MISMATCH(gimple_location(stmt), "rhs", ptr_lhs_type, ptr_rhs_type);
                                continue;
                        }
 
@@ -898,10 +823,7 @@ static unsigned int find_bad_casts_execute(void)
 #ifndef __DEBUG_PLUGIN
                                if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(op0_type)))
 #endif
-                               {
-                                       if (!whitelisted_cast(stmt, ptr_lhs_type, op0_type))
-                                               MISMATCH(gimple_location(stmt), "op0", ptr_lhs_type, op0_type);
-                               }
+                               MISMATCH(gimple_location(stmt), "op0", ptr_lhs_type, op0_type);
                        } else {
                                const_tree ssa_name_var = SSA_NAME_VAR(rhs1);
                                /* skip bogus type casts introduced by container_of */
@@ -911,10 +833,7 @@ static unsigned int find_bad_casts_execute(void)
 #ifndef __DEBUG_PLUGIN
                                if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(ptr_rhs_type)))
 #endif
-                               {
-                                       if (!whitelisted_cast(stmt, ptr_lhs_type, ptr_rhs_type))
-                                               MISMATCH(gimple_location(stmt), "ssa", ptr_lhs_type, ptr_rhs_type);
-                               }
+                               MISMATCH(gimple_location(stmt), "ssa", ptr_lhs_type, ptr_rhs_type);
                        }
 
                }
index 23bd023a283bd5fcc24e442316ae173925157700..f3d629555b8407741d96ca0d682eaa40de712bd3 100644 (file)
@@ -26,7 +26,7 @@ __visible int plugin_is_GPL_compatible;
 tree sancov_fndecl;
 
 static struct plugin_info sancov_plugin_info = {
-       .version        = "20160402",
+       .version        = UTS_RELEASE,
        .help           = "sancov plugin\n",
 };
 
index 42f0252ee2a4edf3d3e0879ae8920717df84302d..de817d54b8af10ed8bf9263d209209f8422dd766 100644 (file)
@@ -44,7 +44,7 @@ static bool verbose = false;
 static GTY(()) tree track_function_decl;
 
 static struct plugin_info stackleak_plugin_info = {
-       .version = "201707101337",
+       .version = UTS_RELEASE,
        .help = "track-min-size=nn\ttrack stack for functions with a stack frame size >= nn bytes\n"
                "arch=target_arch\tspecify target build arch\n"
                "disable\t\tdo not activate the plugin\n"
index 74e319288389d3ca53fd87cc62d418fe589458c1..86b608a24ec04a9463249f832698db535035ff01 100644 (file)
@@ -37,7 +37,7 @@
 __visible int plugin_is_GPL_compatible;
 
 static struct plugin_info structleak_plugin_info = {
-       .version        = "20190125vanilla",
+       .version        = UTS_RELEASE,
        .help           = "disable\tdo not activate plugin\n"
                          "byref\tinit structs passed by reference\n"
                          "byref-all\tinit anything passed by reference\n"
diff --git a/scripts/gen-randstruct-seed.sh b/scripts/gen-randstruct-seed.sh
new file mode 100755 (executable)
index 0000000..61017b3
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+SEED=$(od -A n -t x8 -N 32 /dev/urandom | tr -d ' \n')
+echo "$SEED" > "$1"
+HASH=$(echo -n "$SEED" | sha256sum | cut -d" " -f1)
+echo "#define RANDSTRUCT_HASHED_SEED \"$HASH\"" > "$2"
index 9361a1ef02c992afce8641c4296c90b2c0e12e54..d7f26f02f1424a649bb0f4b95be337dc9cd99363 100755 (executable)
@@ -108,16 +108,22 @@ objtool_link()
        local objtoolcmd;
        local objtoolopt;
 
-       if is_enabled CONFIG_STACK_VALIDATION && \
-          ( is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ); then
+       if ! is_enabled CONFIG_OBJTOOL; then
+               return;
+       fi
 
-               # Don't perform vmlinux validation unless explicitly requested,
-               # but run objtool on vmlinux.o now that we have an object file.
-               if is_enabled CONFIG_UNWINDER_ORC; then
-                       objtoolcmd="orc generate"
+       if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
+
+               # For LTO and IBT, objtool doesn't run on individual
+               # translation units.  Run everything on vmlinux instead.
+
+               if is_enabled CONFIG_HAVE_JUMP_LABEL_HACK; then
+                       objtoolopt="${objtoolopt} --hacks=jump_label"
                fi
 
-               objtoolopt="${objtoolopt} --lto"
+               if is_enabled CONFIG_HAVE_NOINSTR_HACK; then
+                       objtoolopt="${objtoolopt} --hacks=noinstr"
+               fi
 
                if is_enabled CONFIG_X86_KERNEL_IBT; then
                        objtoolopt="${objtoolopt} --ibt"
@@ -126,34 +132,44 @@ objtool_link()
                if is_enabled CONFIG_FTRACE_MCOUNT_USE_OBJTOOL; then
                        objtoolopt="${objtoolopt} --mcount"
                fi
+
+               if is_enabled CONFIG_UNWINDER_ORC; then
+                       objtoolopt="${objtoolopt} --orc"
+               fi
+
+               if is_enabled CONFIG_RETPOLINE; then
+                       objtoolopt="${objtoolopt} --retpoline"
+               fi
+
+               if is_enabled CONFIG_SLS; then
+                       objtoolopt="${objtoolopt} --sls"
+               fi
+
+               if is_enabled CONFIG_STACK_VALIDATION; then
+                       objtoolopt="${objtoolopt} --stackval"
+               fi
+
+               if is_enabled CONFIG_HAVE_STATIC_CALL_INLINE; then
+                       objtoolopt="${objtoolopt} --static-call"
+               fi
+
+               objtoolopt="${objtoolopt} --uaccess"
        fi
 
-       if is_enabled CONFIG_VMLINUX_VALIDATION; then
+       if is_enabled CONFIG_NOINSTR_VALIDATION; then
                objtoolopt="${objtoolopt} --noinstr"
        fi
 
        if [ -n "${objtoolopt}" ]; then
-               if [ -z "${objtoolcmd}" ]; then
-                       objtoolcmd="check"
-               fi
-               objtoolopt="${objtoolopt} --vmlinux"
-               if ! is_enabled CONFIG_FRAME_POINTER; then
-                       objtoolopt="${objtoolopt} --no-fp"
-               fi
+
                if is_enabled CONFIG_GCOV_KERNEL; then
                        objtoolopt="${objtoolopt} --no-unreachable"
                fi
-               if is_enabled CONFIG_RETPOLINE; then
-                       objtoolopt="${objtoolopt} --retpoline"
-               fi
-               if is_enabled CONFIG_X86_SMAP; then
-                       objtoolopt="${objtoolopt} --uaccess"
-               fi
-               if is_enabled CONFIG_SLS; then
-                       objtoolopt="${objtoolopt} --sls"
-               fi
+
+               objtoolopt="${objtoolopt} --link"
+
                info OBJTOOL ${1}
-               tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1}
+               tools/objtool/objtool ${objtoolopt} ${1}
        fi
 }
 
index 7c20252a90c68d6108caa0207c6769ae761861dc..250925aab101c8eeda2728f78538e81992dda38e 100755 (executable)
@@ -24,9 +24,8 @@ icc)
        echo 16.0.3
        ;;
 llvm)
-       # https://lore.kernel.org/r/YMtib5hKVyNknZt3@osiris/
        if [ "$SRCARCH" = s390 ]; then
-               echo 13.0.0
+               echo 14.0.0
        else
                echo 11.0.0
        fi
diff --git a/scripts/objdump-func b/scripts/objdump-func
new file mode 100755 (executable)
index 0000000..4eb463d
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Disassemble a single function.
+#
+# usage: objdump-func <file> <func>
+
+set -o errexit
+set -o nounset
+
+OBJDUMP="${CROSS_COMPILE:-}objdump"
+
+command -v gawk >/dev/null 2>&1 || die "gawk isn't installed"
+
+usage() {
+       echo "usage: objdump-func <file> <func>" >&2
+       exit 1
+}
+
+[[ $# -lt 2 ]] && usage
+
+OBJ=$1; shift
+FUNC=$1; shift
+
+# Secret feature to allow adding extra objdump args at the end
+EXTRA_ARGS=$@
+
+# Note this also matches compiler-added suffixes like ".cold", etc
+${OBJDUMP} -wdr $EXTRA_ARGS $OBJ | gawk -M -v f=$FUNC '/^$/ { P=0; } $0 ~ "<" f "(\\..*)?>:" { P=1; O=strtonum("0x" $1); } { if (P) { o=strtonum("0x" $1); printf("%04x ", o-O); print $0; } }'
index 91a502bb97e8ab05c3c901c1ec7e6c413a7c71e6..67cd420dcf8900a33245ef33711d782c9d0098a1 100755 (executable)
@@ -67,7 +67,7 @@ deploy_kernel_headers () {
        ) > debian/hdrsrcfiles
 
        {
-               if is_enabled CONFIG_STACK_VALIDATION; then
+               if is_enabled CONFIG_OBJTOOL; then
                        echo tools/objtool/objtool
                fi
 
index f355b3e0e968c53ecbdec7f6a1e3394442c1e633..15520806889ee898568913b1cbdaa451e10d20da 100644 (file)
@@ -59,35 +59,27 @@ int main(int argc, char *argv[])
                exit(2);
        }
 
-       for (i = 0; secclass_map[i].name; i++) {
-               struct security_class_mapping *map = &secclass_map[i];
-               map->name = stoupperx(map->name);
-               for (j = 0; map->perms[j]; j++)
-                       map->perms[j] = stoupperx(map->perms[j]);
-       }
-
-       isids_len = sizeof(initial_sid_to_string) / sizeof (char *);
-       for (i = 1; i < isids_len; i++) {
-               const char *s = initial_sid_to_string[i];
-
-               if (s)
-                       initial_sid_to_string[i] = stoupperx(s);
-       }
-
        fprintf(fout, "/* This file is automatically generated.  Do not edit. */\n");
        fprintf(fout, "#ifndef _SELINUX_FLASK_H_\n#define _SELINUX_FLASK_H_\n\n");
 
        for (i = 0; secclass_map[i].name; i++) {
-               struct security_class_mapping *map = &secclass_map[i];
-               fprintf(fout, "#define SECCLASS_%-39s %2d\n", map->name, i+1);
+               char *name = stoupperx(secclass_map[i].name);
+
+               fprintf(fout, "#define SECCLASS_%-39s %2d\n", name, i+1);
+               free(name);
        }
 
        fprintf(fout, "\n");
 
+       isids_len = sizeof(initial_sid_to_string) / sizeof(char *);
        for (i = 1; i < isids_len; i++) {
                const char *s = initial_sid_to_string[i];
-               if (s)
-                       fprintf(fout, "#define SECINITSID_%-39s %2d\n", s, i);
+               if (s) {
+                       char *sidname = stoupperx(s);
+
+                       fprintf(fout, "#define SECINITSID_%-39s %2d\n", sidname, i);
+                       free(sidname);
+               }
        }
        fprintf(fout, "\n#define SECINITSID_NUM %d\n", i-1);
        fprintf(fout, "\nstatic inline bool security_is_socket_class(u16 kern_tclass)\n");
@@ -96,10 +88,14 @@ int main(int argc, char *argv[])
        fprintf(fout, "\tswitch (kern_tclass) {\n");
        for (i = 0; secclass_map[i].name; i++) {
                static char s[] = "SOCKET";
-               struct security_class_mapping *map = &secclass_map[i];
-               int len = strlen(map->name), l = sizeof(s) - 1;
-               if (len >= l && memcmp(map->name + len - l, s, l) == 0)
-                       fprintf(fout, "\tcase SECCLASS_%s:\n", map->name);
+               int len, l;
+               char *name = stoupperx(secclass_map[i].name);
+
+               len = strlen(name);
+               l = sizeof(s) - 1;
+               if (len >= l && memcmp(name + len - l, s, l) == 0)
+                       fprintf(fout, "\tcase SECCLASS_%s:\n", name);
+               free(name);
        }
        fprintf(fout, "\t\tsock = true;\n");
        fprintf(fout, "\t\tbreak;\n");
@@ -110,33 +106,52 @@ int main(int argc, char *argv[])
        fprintf(fout, "}\n");
 
        fprintf(fout, "\n#endif\n");
-       fclose(fout);
+
+       if (fclose(fout) != 0) {
+               fprintf(stderr, "Could not successfully close %s:  %s\n",
+                       argv[1], strerror(errno));
+               exit(4);
+       }
 
        fout = fopen(argv[2], "w");
        if (!fout) {
                fprintf(stderr, "Could not open %s for writing:  %s\n",
                        argv[2], strerror(errno));
-               exit(4);
+               exit(5);
        }
 
        fprintf(fout, "/* This file is automatically generated.  Do not edit. */\n");
        fprintf(fout, "#ifndef _SELINUX_AV_PERMISSIONS_H_\n#define _SELINUX_AV_PERMISSIONS_H_\n\n");
 
        for (i = 0; secclass_map[i].name; i++) {
-               struct security_class_mapping *map = &secclass_map[i];
-               int len = strlen(map->name);
+               const struct security_class_mapping *map = &secclass_map[i];
+               int len;
+               char *name = stoupperx(map->name);
+
+               len = strlen(name);
                for (j = 0; map->perms[j]; j++) {
+                       char *permname;
+
                        if (j >= 32) {
                                fprintf(stderr, "Too many permissions to fit into an access vector at (%s, %s).\n",
                                        map->name, map->perms[j]);
                                exit(5);
                        }
-                       fprintf(fout, "#define %s__%-*s 0x%08xU\n", map->name,
-                               39-len, map->perms[j], 1U<<j);
+                       permname = stoupperx(map->perms[j]);
+                       fprintf(fout, "#define %s__%-*s 0x%08xU\n", name,
+                               39-len, permname, 1U<<j);
+                       free(permname);
                }
+               free(name);
        }
 
        fprintf(fout, "\n#endif\n");
-       fclose(fout);
+
+       if (fclose(fout) != 0) {
+               fprintf(stderr, "Could not successfully close %s:  %s\n",
+                       argv[2], strerror(errno));
+               exit(6);
+       }
+
        exit(0);
 }
index 105c1c31a316e1ff4d195151382034762f544363..1415604c3d24ebcbe8ad10cbd0df9b89e09e54d9 100644 (file)
@@ -82,7 +82,7 @@ int main(int argc, char *argv[])
 
        /* print out the class permissions */
        for (i = 0; secclass_map[i].name; i++) {
-               struct security_class_mapping *map = &secclass_map[i];
+               const struct security_class_mapping *map = &secclass_map[i];
                fprintf(fout, "class %s\n", map->name);
                fprintf(fout, "{\n");
                for (j = 0; map->perms[j]; j++)
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
 #define SYSTEMLOW "s0"
 #define SYSTEMHIGH "s1:c0.c1"
                for (i = 0; secclass_map[i].name; i++) {
-                       struct security_class_mapping *map = &secclass_map[i];
+                       const struct security_class_mapping *map = &secclass_map[i];
 
                        fprintf(fout, "mlsconstrain %s {\n", map->name);
                        for (j = 0; map->perms[j]; j++)
index 9b2c4925585a3a761019cd8b5358ecf09279a29c..f29e4c65698347dac9ebb1ec1267801c2e25afe4 100644 (file)
@@ -160,20 +160,9 @@ config HARDENED_USERCOPY
          copy_from_user() functions) by rejecting memory ranges that
          are larger than the specified heap object, span multiple
          separately allocated pages, are not on the process stack,
-         or are part of the kernel text. This kills entire classes
+         or are part of the kernel text. This prevents entire classes
          of heap overflow exploits and similar kernel memory exposures.
 
-config HARDENED_USERCOPY_PAGESPAN
-       bool "Refuse to copy allocations that span multiple pages"
-       depends on HARDENED_USERCOPY
-       depends on BROKEN
-       help
-         When a multi-page allocation is done without __GFP_COMP,
-         hardened usercopy will reject attempts to copy it. There are,
-         however, several cases of this in the kernel that have not all
-         been removed. This config is intended to be used only while
-         trying to find such users.
-
 config FORTIFY_SOURCE
        bool "Harden common str/mem functions against buffer overflows"
        depends on ARCH_HAS_FORTIFY_SOURCE
index ded4d7c0d13222cb30bbbbc9cd4148f607b63e69..bd2aabb2c60f926f9484c88e81ecabfbe4a898d8 100644 (file)
@@ -266,4 +266,77 @@ config ZERO_CALL_USED_REGS
 
 endmenu
 
+config CC_HAS_RANDSTRUCT
+       def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null)
+
+choice
+       prompt "Randomize layout of sensitive kernel structures"
+       default RANDSTRUCT_FULL if COMPILE_TEST && (GCC_PLUGINS || CC_HAS_RANDSTRUCT)
+       default RANDSTRUCT_NONE
+       help
+         If you enable this, the layouts of structures that are entirely
+         function pointers (and have not been manually annotated with
+         __no_randomize_layout), or structures that have been explicitly
+         marked with __randomize_layout, will be randomized at compile-time.
+         This can introduce the requirement of an additional information
+         exposure vulnerability for exploits targeting these structure
+         types.
+
+         Enabling this feature will introduce some performance impact,
+         slightly increase memory usage, and prevent the use of forensic
+         tools like Volatility against the system (unless the kernel
+         source tree isn't cleaned after kernel installation).
+
+         The seed used for compilation is in scripts/basic/randomize.seed.
+         It remains after a "make clean" to allow for external modules to
+         be compiled with the existing seed and will be removed by a
+         "make mrproper" or "make distclean". This file should not be made
+         public, or the structure layout can be determined.
+
+       config RANDSTRUCT_NONE
+               bool "Disable structure layout randomization"
+               help
+                 Build normally: no structure layout randomization.
+
+       config RANDSTRUCT_FULL
+               bool "Fully randomize structure layout"
+               depends on CC_HAS_RANDSTRUCT || GCC_PLUGINS
+               select MODVERSIONS if MODULES
+               help
+                 Fully randomize the member layout of sensitive
+                 structures as much as possible, which may have both a
+                 memory size and performance impact.
+
+                 One difference between the Clang and GCC plugin
+                 implementations is the handling of bitfields. The GCC
+                 plugin treats them as fully separate variables,
+                 introducing sometimes significant padding. Clang tries
+                 to keep adjacent bitfields together, but with their bit
+                 ordering randomized.
+
+       config RANDSTRUCT_PERFORMANCE
+               bool "Limit randomization of structure layout to cache-lines"
+               depends on GCC_PLUGINS
+               select MODVERSIONS if MODULES
+               help
+                 Randomization of sensitive kernel structures will make a
+                 best effort at restricting randomization to cacheline-sized
+                 groups of members. It will further not randomize bitfields
+                 in structures. This reduces the performance hit of RANDSTRUCT
+                 at the cost of weakened randomization.
+endchoice
+
+config RANDSTRUCT
+       def_bool !RANDSTRUCT_NONE
+
+config GCC_PLUGIN_RANDSTRUCT
+       def_bool GCC_PLUGINS && RANDSTRUCT
+       help
+         Use GCC plugin to randomize structure layout.
+
+         This plugin was ported from grsecurity/PaX. More
+         information at:
+          * https://grsecurity.net/
+          * https://pax.grsecurity.net/
+
 endmenu
index 4f0eecb67dde09094d79d4e1483bea8baae5417d..900bc540656a247d2c07fab5a67a3a90dd3de824 100644 (file)
@@ -354,13 +354,16 @@ static int apparmor_path_link(struct dentry *old_dentry, const struct path *new_
 }
 
 static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_dentry,
-                               const struct path *new_dir, struct dentry *new_dentry)
+                               const struct path *new_dir, struct dentry *new_dentry,
+                               const unsigned int flags)
 {
        struct aa_label *label;
        int error = 0;
 
        if (!path_mediated_fs(old_dentry))
                return 0;
+       if ((flags & RENAME_EXCHANGE) && !path_mediated_fs(new_dentry))
+               return 0;
 
        label = begin_current_label_crit_section();
        if (!unconfined(label)) {
@@ -374,10 +377,27 @@ static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_d
                        d_backing_inode(old_dentry)->i_mode
                };
 
-               error = aa_path_perm(OP_RENAME_SRC, label, &old_path, 0,
-                                    MAY_READ | AA_MAY_GETATTR | MAY_WRITE |
-                                    AA_MAY_SETATTR | AA_MAY_DELETE,
-                                    &cond);
+               if (flags & RENAME_EXCHANGE) {
+                       struct path_cond cond_exchange = {
+                               i_uid_into_mnt(mnt_userns, d_backing_inode(new_dentry)),
+                               d_backing_inode(new_dentry)->i_mode
+                       };
+
+                       error = aa_path_perm(OP_RENAME_SRC, label, &new_path, 0,
+                                            MAY_READ | AA_MAY_GETATTR | MAY_WRITE |
+                                            AA_MAY_SETATTR | AA_MAY_DELETE,
+                                            &cond_exchange);
+                       if (!error)
+                               error = aa_path_perm(OP_RENAME_DEST, label, &old_path,
+                                                    0, MAY_WRITE | AA_MAY_SETATTR |
+                                                    AA_MAY_CREATE, &cond_exchange);
+               }
+
+               if (!error)
+                       error = aa_path_perm(OP_RENAME_SRC, label, &old_path, 0,
+                                            MAY_READ | AA_MAY_GETATTR | MAY_WRITE |
+                                            AA_MAY_SETATTR | AA_MAY_DELETE,
+                                            &cond);
                if (!error)
                        error = aa_path_perm(OP_RENAME_DEST, label, &new_path,
                                             0, MAY_WRITE | AA_MAY_SETATTR |
index c8c8a4a4e7a00c800f10ab939839ca00bf378691..8a82a6c7f48a44def6d1aa127b66ea8afa3d7e34 100644 (file)
@@ -75,7 +75,8 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                /* v1 API expect signature without xattr type */
                return digsig_verify(keyring, sig + 1, siglen - 1, digest,
                                     digestlen);
-       case 2:
+       case 2: /* regular file data hash based signature */
+       case 3: /* struct ima_file_id data based signature */
                return asymmetric_verify(keyring, sig, siglen, digest,
                                         digestlen);
        }
index 0d44f41d16f87088c56a22b4713098bffbb1a059..f8b8c5004fc7c6fadba3bcdc4a6f49c44b3641cd 100644 (file)
@@ -38,9 +38,6 @@ extern int evm_initialized;
 
 extern int evm_hmac_attrs;
 
-extern struct crypto_shash *hmac_tfm;
-extern struct crypto_shash *hash_tfm;
-
 /* List of EVM protected security xattrs */
 extern struct list_head evm_config_xattrnames;
 
index 0450d79afdc8fc7dd85d77f35b82ea921957f6d7..a733aff02006381826ea7e9c1b588734bbffd4fa 100644 (file)
@@ -26,7 +26,7 @@
 static unsigned char evmkey[MAX_KEY_SIZE];
 static const int evmkey_len = MAX_KEY_SIZE;
 
-struct crypto_shash *hmac_tfm;
+static struct crypto_shash *hmac_tfm;
 static struct crypto_shash *evm_tfm[HASH_ALGO__LAST];
 
 static DEFINE_MUTEX(mutex);
index 7d87772f0ce681971db412ad606a0e84ab1d7a51..cc88f02c7562159c258e0de27dc5f4868144d8a1 100644 (file)
@@ -436,7 +436,7 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
        struct inode *inode = d_backing_inode(dentry);
 
        if (!evm_key_loaded() || !S_ISREG(inode->i_mode) || evm_fixmode)
-               return 0;
+               return INTEGRITY_PASS;
        return evm_verify_hmac(dentry, NULL, NULL, 0, NULL);
 }
 
index f3a9cc201c8c2a0cc276b8bb6f1c4be2559002b9..7249f16257c72c8f72fe17debde1a0b1f7aee845 100644 (file)
@@ -69,10 +69,9 @@ choice
          hash, defined as 20 bytes, and a null terminated pathname,
          limited to 255 characters.  The 'ima-ng' measurement list
          template permits both larger hash digests and longer
-         pathnames.
+         pathnames. The configured default template can be replaced
+         by specifying "ima_template=" on the boot command line.
 
-       config IMA_TEMPLATE
-               bool "ima"
        config IMA_NG_TEMPLATE
                bool "ima-ng (default)"
        config IMA_SIG_TEMPLATE
@@ -82,7 +81,6 @@ endchoice
 config IMA_DEFAULT_TEMPLATE
        string
        depends on IMA
-       default "ima" if IMA_TEMPLATE
        default "ima-ng" if IMA_NG_TEMPLATE
        default "ima-sig" if IMA_SIG_TEMPLATE
 
@@ -102,19 +100,19 @@ choice
 
        config IMA_DEFAULT_HASH_SHA256
                bool "SHA256"
-               depends on CRYPTO_SHA256=y && !IMA_TEMPLATE
+               depends on CRYPTO_SHA256=y
 
        config IMA_DEFAULT_HASH_SHA512
                bool "SHA512"
-               depends on CRYPTO_SHA512=y && !IMA_TEMPLATE
+               depends on CRYPTO_SHA512=y
 
        config IMA_DEFAULT_HASH_WP512
                bool "WP512"
-               depends on CRYPTO_WP512=y && !IMA_TEMPLATE
+               depends on CRYPTO_WP512=y
 
        config IMA_DEFAULT_HASH_SM3
                bool "SM3"
-               depends on CRYPTO_SM3=y && !IMA_TEMPLATE
+               depends on CRYPTO_SM3=y
 endchoice
 
 config IMA_DEFAULT_HASH
index c6805af4621187767d87ba1dd9cce908f8f8f38a..c1e76282b5ee5ad38ae61dc532c3091c75950738 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/xattr.h>
 #include <linux/evm.h>
 #include <linux/iversion.h>
+#include <linux/fsverity.h>
 
 #include "ima.h"
 
@@ -200,6 +201,32 @@ int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
                                allowed_algos);
 }
 
+static int ima_get_verity_digest(struct integrity_iint_cache *iint,
+                                struct ima_max_digest_data *hash)
+{
+       enum hash_algo verity_alg;
+       int ret;
+
+       /*
+        * On failure, 'measure' policy rules will result in a file data
+        * hash containing 0's.
+        */
+       ret = fsverity_get_digest(iint->inode, hash->digest, &verity_alg);
+       if (ret)
+               return ret;
+
+       /*
+        * Unlike in the case of actually calculating the file hash, in
+        * the fsverity case regardless of the hash algorithm, return
+        * the verity digest to be included in the measurement list. A
+        * mismatch between the verity algorithm and the xattr signature
+        * algorithm, if one exists, will be detected later.
+        */
+       hash->hdr.algo = verity_alg;
+       hash->hdr.length = hash_digest_size[verity_alg];
+       return 0;
+}
+
 /*
  * ima_collect_measurement - collect file measurement
  *
@@ -242,16 +269,30 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
         */
        i_version = inode_query_iversion(inode);
        hash.hdr.algo = algo;
+       hash.hdr.length = hash_digest_size[algo];
 
        /* Initialize hash digest to 0's in case of failure */
        memset(&hash.digest, 0, sizeof(hash.digest));
 
-       if (buf)
+       if (iint->flags & IMA_VERITY_REQUIRED) {
+               result = ima_get_verity_digest(iint, &hash);
+               switch (result) {
+               case 0:
+                       break;
+               case -ENODATA:
+                       audit_cause = "no-verity-digest";
+                       break;
+               default:
+                       audit_cause = "invalid-verity-digest";
+                       break;
+               }
+       } else if (buf) {
                result = ima_calc_buffer_hash(buf, size, &hash.hdr);
-       else
+       } else {
                result = ima_calc_file_hash(file, &hash.hdr);
+       }
 
-       if (result && result != -EBADF && result != -EINVAL)
+       if (result == -ENOMEM)
                goto out;
 
        length = sizeof(hash.hdr) + hash.hdr.length;
index 17232bbfb9f96c541804e0c0e0a2ff45f73943f1..cdb84dccd24e01bf13152c0fe7168af5ba8088e8 100644 (file)
@@ -13,7 +13,9 @@
 #include <linux/magic.h>
 #include <linux/ima.h>
 #include <linux/evm.h>
+#include <linux/fsverity.h>
 #include <keys/system_keyring.h>
+#include <uapi/linux/fsverity.h>
 
 #include "ima.h"
 
@@ -183,13 +185,18 @@ enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value,
                return ima_hash_algo;
 
        switch (xattr_value->type) {
+       case IMA_VERITY_DIGSIG:
+               sig = (typeof(sig))xattr_value;
+               if (sig->version != 3 || xattr_len <= sizeof(*sig) ||
+                   sig->hash_algo >= HASH_ALGO__LAST)
+                       return ima_hash_algo;
+               return sig->hash_algo;
        case EVM_IMA_XATTR_DIGSIG:
                sig = (typeof(sig))xattr_value;
                if (sig->version != 2 || xattr_len <= sizeof(*sig)
                    || sig->hash_algo >= HASH_ALGO__LAST)
                        return ima_hash_algo;
                return sig->hash_algo;
-               break;
        case IMA_XATTR_DIGEST_NG:
                /* first byte contains algorithm id */
                ret = xattr_value->data[0];
@@ -225,6 +232,40 @@ int ima_read_xattr(struct dentry *dentry,
        return ret;
 }
 
+/*
+ * calc_file_id_hash - calculate the hash of the ima_file_id struct data
+ * @type: xattr type [enum evm_ima_xattr_type]
+ * @algo: hash algorithm [enum hash_algo]
+ * @digest: pointer to the digest to be hashed
+ * @hash: (out) pointer to the hash
+ *
+ * IMA signature version 3 disambiguates the data that is signed by
+ * indirectly signing the hash of the ima_file_id structure data.
+ *
+ * Signing the ima_file_id struct is currently only supported for
+ * IMA_VERITY_DIGSIG type xattrs.
+ *
+ * Return 0 on success, error code otherwise.
+ */
+static int calc_file_id_hash(enum evm_ima_xattr_type type,
+                            enum hash_algo algo, const u8 *digest,
+                            struct ima_digest_data *hash)
+{
+       struct ima_file_id file_id = {
+               .hash_type = IMA_VERITY_DIGSIG, .hash_algorithm = algo};
+       unsigned int unused = HASH_MAX_DIGESTSIZE - hash_digest_size[algo];
+
+       if (type != IMA_VERITY_DIGSIG)
+               return -EINVAL;
+
+       memcpy(file_id.hash, digest, hash_digest_size[algo]);
+
+       hash->algo = algo;
+       hash->length = hash_digest_size[algo];
+
+       return ima_calc_buffer_hash(&file_id, sizeof(file_id) - unused, hash);
+}
+
 /*
  * xattr_verify - verify xattr digest or signature
  *
@@ -236,7 +277,10 @@ static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
                        struct evm_ima_xattr_data *xattr_value, int xattr_len,
                        enum integrity_status *status, const char **cause)
 {
+       struct ima_max_digest_data hash;
+       struct signature_v2_hdr *sig;
        int rc = -EINVAL, hash_start = 0;
+       int mask;
 
        switch (xattr_value->type) {
        case IMA_XATTR_DIGEST_NG:
@@ -246,7 +290,10 @@ static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
        case IMA_XATTR_DIGEST:
                if (*status != INTEGRITY_PASS_IMMUTABLE) {
                        if (iint->flags & IMA_DIGSIG_REQUIRED) {
-                               *cause = "IMA-signature-required";
+                               if (iint->flags & IMA_VERITY_REQUIRED)
+                                       *cause = "verity-signature-required";
+                               else
+                                       *cause = "IMA-signature-required";
                                *status = INTEGRITY_FAIL;
                                break;
                        }
@@ -274,6 +321,20 @@ static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
                break;
        case EVM_IMA_XATTR_DIGSIG:
                set_bit(IMA_DIGSIG, &iint->atomic_flags);
+
+               mask = IMA_DIGSIG_REQUIRED | IMA_VERITY_REQUIRED;
+               if ((iint->flags & mask) == mask) {
+                       *cause = "verity-signature-required";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+
+               sig = (typeof(sig))xattr_value;
+               if (sig->version >= 3) {
+                       *cause = "invalid-signature-version";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
                rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
                                             (const char *)xattr_value,
                                             xattr_len,
@@ -296,6 +357,44 @@ static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
                } else {
                        *status = INTEGRITY_PASS;
                }
+               break;
+       case IMA_VERITY_DIGSIG:
+               set_bit(IMA_DIGSIG, &iint->atomic_flags);
+
+               if (iint->flags & IMA_DIGSIG_REQUIRED) {
+                       if (!(iint->flags & IMA_VERITY_REQUIRED)) {
+                               *cause = "IMA-signature-required";
+                               *status = INTEGRITY_FAIL;
+                               break;
+                       }
+               }
+
+               sig = (typeof(sig))xattr_value;
+               if (sig->version != 3) {
+                       *cause = "invalid-signature-version";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+
+               rc = calc_file_id_hash(IMA_VERITY_DIGSIG, iint->ima_hash->algo,
+                                      iint->ima_hash->digest, &hash.hdr);
+               if (rc) {
+                       *cause = "sigv3-hashing-error";
+                       *status = INTEGRITY_FAIL;
+                       break;
+               }
+
+               rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
+                                            (const char *)xattr_value,
+                                            xattr_len, hash.digest,
+                                            hash.hdr.length);
+               if (rc) {
+                       *cause = "invalid-verity-signature";
+                       *status = INTEGRITY_FAIL;
+               } else {
+                       *status = INTEGRITY_PASS;
+               }
+
                break;
        default:
                *status = INTEGRITY_UNKNOWN;
@@ -396,8 +495,15 @@ int ima_appraise_measurement(enum ima_hooks func,
                if (rc && rc != -ENODATA)
                        goto out;
 
-               cause = iint->flags & IMA_DIGSIG_REQUIRED ?
-                               "IMA-signature-required" : "missing-hash";
+               if (iint->flags & IMA_DIGSIG_REQUIRED) {
+                       if (iint->flags & IMA_VERITY_REQUIRED)
+                               cause = "verity-signature-required";
+                       else
+                               cause = "IMA-signature-required";
+               } else {
+                       cause = "missing-hash";
+               }
+
                status = INTEGRITY_NOLABEL;
                if (file->f_mode & FMODE_CREATED)
                        iint->flags |= IMA_NEW_FILE;
index 3d3f8c5c502ba9f401ea3973d849f06b1381edce..040b03ddc1c776665c75fde3081a7d9775d17041 100644 (file)
@@ -335,7 +335,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
        hash_algo = ima_get_hash_algo(xattr_value, xattr_len);
 
        rc = ima_collect_measurement(iint, file, buf, size, hash_algo, modsig);
-       if (rc != 0 && rc != -EBADF && rc != -EINVAL)
+       if (rc == -ENOMEM)
                goto out_locked;
 
        if (!pathbuf)   /* ima_rdwr_violation possibly pre-fetched */
@@ -432,7 +432,7 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
 {
        struct ima_template_desc *template = NULL;
-       struct file *file = vma->vm_file;
+       struct file *file;
        char filename[NAME_MAX];
        char *pathbuf = NULL;
        const char *pathname = NULL;
index eea6e92500b8ea03853b119cfab9f5622c6672fb..73917413365b3cead675a1b2b89d05e8322892d7 100644 (file)
@@ -1023,6 +1023,7 @@ enum policy_opt {
        Opt_fowner_gt, Opt_fgroup_gt,
        Opt_uid_lt, Opt_euid_lt, Opt_gid_lt, Opt_egid_lt,
        Opt_fowner_lt, Opt_fgroup_lt,
+       Opt_digest_type,
        Opt_appraise_type, Opt_appraise_flag, Opt_appraise_algos,
        Opt_permit_directio, Opt_pcr, Opt_template, Opt_keyrings,
        Opt_label, Opt_err
@@ -1065,6 +1066,7 @@ static const match_table_t policy_tokens = {
        {Opt_egid_lt, "egid<%s"},
        {Opt_fowner_lt, "fowner<%s"},
        {Opt_fgroup_lt, "fgroup<%s"},
+       {Opt_digest_type, "digest_type=%s"},
        {Opt_appraise_type, "appraise_type=%s"},
        {Opt_appraise_flag, "appraise_flag=%s"},
        {Opt_appraise_algos, "appraise_algos=%s"},
@@ -1172,6 +1174,21 @@ static void check_template_modsig(const struct ima_template_desc *template)
 #undef MSG
 }
 
+/*
+ * Warn if the template does not contain the given field.
+ */
+static void check_template_field(const struct ima_template_desc *template,
+                                const char *field, const char *msg)
+{
+       int i;
+
+       for (i = 0; i < template->num_fields; i++)
+               if (!strcmp(template->fields[i]->field_id, field))
+                       return;
+
+       pr_notice_once("%s", msg);
+}
+
 static bool ima_validate_rule(struct ima_rule_entry *entry)
 {
        /* Ensure that the action is set and is compatible with the flags */
@@ -1214,7 +1231,8 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
                                     IMA_INMASK | IMA_EUID | IMA_PCR |
                                     IMA_FSNAME | IMA_GID | IMA_EGID |
                                     IMA_FGROUP | IMA_DIGSIG_REQUIRED |
-                                    IMA_PERMIT_DIRECTIO | IMA_VALIDATE_ALGOS))
+                                    IMA_PERMIT_DIRECTIO | IMA_VALIDATE_ALGOS |
+                                    IMA_VERITY_REQUIRED))
                        return false;
 
                break;
@@ -1292,6 +1310,18 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
            !(entry->flags & IMA_MODSIG_ALLOWED))
                return false;
 
+       /*
+        * Unlike for regular IMA 'appraise' policy rules where security.ima
+        * xattr may contain either a file hash or signature, the security.ima
+        * xattr for fsverity must contain a file signature (sigv3).  Ensure
+        * that 'appraise' rules for fsverity require file signatures by
+        * checking the IMA_DIGSIG_REQUIRED flag is set.
+        */
+       if (entry->action == APPRAISE &&
+           (entry->flags & IMA_VERITY_REQUIRED) &&
+           !(entry->flags & IMA_DIGSIG_REQUIRED))
+               return false;
+
        return true;
 }
 
@@ -1707,16 +1737,39 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
                                                   LSM_SUBJ_TYPE,
                                                   AUDIT_SUBJ_TYPE);
                        break;
+               case Opt_digest_type:
+                       ima_log_string(ab, "digest_type", args[0].from);
+                       if (entry->flags & IMA_DIGSIG_REQUIRED)
+                               result = -EINVAL;
+                       else if ((strcmp(args[0].from, "verity")) == 0)
+                               entry->flags |= IMA_VERITY_REQUIRED;
+                       else
+                               result = -EINVAL;
+                       break;
                case Opt_appraise_type:
                        ima_log_string(ab, "appraise_type", args[0].from);
-                       if ((strcmp(args[0].from, "imasig")) == 0)
-                               entry->flags |= IMA_DIGSIG_REQUIRED;
-                       else if (IS_ENABLED(CONFIG_IMA_APPRAISE_MODSIG) &&
-                                strcmp(args[0].from, "imasig|modsig") == 0)
-                               entry->flags |= IMA_DIGSIG_REQUIRED |
+
+                       if ((strcmp(args[0].from, "imasig")) == 0) {
+                               if (entry->flags & IMA_VERITY_REQUIRED)
+                                       result = -EINVAL;
+                               else
+                                       entry->flags |= IMA_DIGSIG_REQUIRED;
+                       } else if (strcmp(args[0].from, "sigv3") == 0) {
+                               /* Only fsverity supports sigv3 for now */
+                               if (entry->flags & IMA_VERITY_REQUIRED)
+                                       entry->flags |= IMA_DIGSIG_REQUIRED;
+                               else
+                                       result = -EINVAL;
+                       } else if (IS_ENABLED(CONFIG_IMA_APPRAISE_MODSIG) &&
+                                strcmp(args[0].from, "imasig|modsig") == 0) {
+                               if (entry->flags & IMA_VERITY_REQUIRED)
+                                       result = -EINVAL;
+                               else
+                                       entry->flags |= IMA_DIGSIG_REQUIRED |
                                                IMA_MODSIG_ALLOWED;
-                       else
+                       } else {
                                result = -EINVAL;
+                       }
                        break;
                case Opt_appraise_flag:
                        ima_log_string(ab, "appraise_flag", args[0].from);
@@ -1797,6 +1850,15 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
                check_template_modsig(template_desc);
        }
 
+       /* d-ngv2 template field recommended for unsigned fs-verity digests */
+       if (!result && entry->action == MEASURE &&
+           entry->flags & IMA_VERITY_REQUIRED) {
+               template_desc = entry->template ? entry->template :
+                                                 ima_template_desc_current();
+               check_template_field(template_desc, "d-ngv2",
+                                    "verity rules should include d-ngv2");
+       }
+
        audit_log_format(ab, "res=%d", !result);
        audit_log_end(ab);
        return result;
@@ -2149,11 +2211,15 @@ int ima_policy_show(struct seq_file *m, void *v)
        if (entry->template)
                seq_printf(m, "template=%s ", entry->template->name);
        if (entry->flags & IMA_DIGSIG_REQUIRED) {
-               if (entry->flags & IMA_MODSIG_ALLOWED)
+               if (entry->flags & IMA_VERITY_REQUIRED)
+                       seq_puts(m, "appraise_type=sigv3 ");
+               else if (entry->flags & IMA_MODSIG_ALLOWED)
                        seq_puts(m, "appraise_type=imasig|modsig ");
                else
                        seq_puts(m, "appraise_type=imasig ");
        }
+       if (entry->flags & IMA_VERITY_REQUIRED)
+               seq_puts(m, "digest_type=verity ");
        if (entry->flags & IMA_CHECK_BLACKLIST)
                seq_puts(m, "appraise_flag=check_blacklist ");
        if (entry->flags & IMA_PERMIT_DIRECTIO)
index db1ad6d7a57fbf7d5866ccf4d6cafb397c6f4f3f..c25079faa208859214c2a9d2aa24d18de870f108 100644 (file)
@@ -20,6 +20,8 @@ static struct ima_template_desc builtin_templates[] = {
        {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
        {.name = "ima-ng", .fmt = "d-ng|n-ng"},
        {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
+       {.name = "ima-ngv2", .fmt = "d-ngv2|n-ng"},
+       {.name = "ima-sigv2", .fmt = "d-ngv2|n-ng|sig"},
        {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"},
        {.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"},
        {.name = "evm-sig",
@@ -38,6 +40,8 @@ static const struct ima_template_field supported_fields[] = {
         .field_show = ima_show_template_string},
        {.field_id = "d-ng", .field_init = ima_eventdigest_ng_init,
         .field_show = ima_show_template_digest_ng},
+       {.field_id = "d-ngv2", .field_init = ima_eventdigest_ngv2_init,
+        .field_show = ima_show_template_digest_ngv2},
        {.field_id = "n-ng", .field_init = ima_eventname_ng_init,
         .field_show = ima_show_template_string},
        {.field_id = "sig", .field_init = ima_eventsig_init,
index 7155d17a3b75f52aaa42ad23c0323350db110846..c877f01a54713ea165219b04fb514f25a85d48c0 100644 (file)
@@ -24,11 +24,24 @@ static bool ima_template_hash_algo_allowed(u8 algo)
 enum data_formats {
        DATA_FMT_DIGEST = 0,
        DATA_FMT_DIGEST_WITH_ALGO,
+       DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO,
        DATA_FMT_STRING,
        DATA_FMT_HEX,
        DATA_FMT_UINT
 };
 
+enum digest_type {
+       DIGEST_TYPE_IMA,
+       DIGEST_TYPE_VERITY,
+       DIGEST_TYPE__LAST
+};
+
+#define DIGEST_TYPE_NAME_LEN_MAX 7     /* including NUL */
+static const char * const digest_type_name[DIGEST_TYPE__LAST] = {
+       [DIGEST_TYPE_IMA] = "ima",
+       [DIGEST_TYPE_VERITY] = "verity"
+};
+
 static int ima_write_template_field_data(const void *data, const u32 datalen,
                                         enum data_formats datafmt,
                                         struct ima_field_data *field_data)
@@ -72,8 +85,9 @@ static void ima_show_template_data_ascii(struct seq_file *m,
        u32 buflen = field_data->len;
 
        switch (datafmt) {
+       case DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO:
        case DATA_FMT_DIGEST_WITH_ALGO:
-               buf_ptr = strnchr(field_data->data, buflen, ':');
+               buf_ptr = strrchr(field_data->data, ':');
                if (buf_ptr != field_data->data)
                        seq_printf(m, "%s", field_data->data);
 
@@ -178,6 +192,14 @@ void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
                                     field_data);
 }
 
+void ima_show_template_digest_ngv2(struct seq_file *m, enum ima_show_type show,
+                                  struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show,
+                                    DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO,
+                                    field_data);
+}
+
 void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
                              struct ima_field_data *field_data)
 {
@@ -265,26 +287,35 @@ int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
 }
 
 static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
-                                      u8 hash_algo,
+                                      u8 digest_type, u8 hash_algo,
                                       struct ima_field_data *field_data)
 {
        /*
         * digest formats:
         *  - DATA_FMT_DIGEST: digest
-        *  - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest,
-        *    where <hash algo> is provided if the hash algorithm is not
-        *    SHA1 or MD5
+        *  - DATA_FMT_DIGEST_WITH_ALGO: <hash algo> + ':' + '\0' + digest,
+        *  - DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO:
+        *      <digest type> + ':' + <hash algo> + ':' + '\0' + digest,
+        *
+        *    where 'DATA_FMT_DIGEST' is the original digest format ('d')
+        *      with a hash size limitation of 20 bytes,
+        *    where <digest type> is either "ima" or "verity",
+        *    where <hash algo> is the hash_algo_name[] string.
         */
-       u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
+       u8 buffer[DIGEST_TYPE_NAME_LEN_MAX + CRYPTO_MAX_ALG_NAME + 2 +
+               IMA_MAX_DIGEST_SIZE] = { 0 };
        enum data_formats fmt = DATA_FMT_DIGEST;
        u32 offset = 0;
 
-       if (hash_algo < HASH_ALGO__LAST) {
+       if (digest_type < DIGEST_TYPE__LAST && hash_algo < HASH_ALGO__LAST) {
+               fmt = DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO;
+               offset += 1 + sprintf(buffer, "%s:%s:",
+                                     digest_type_name[digest_type],
+                                     hash_algo_name[hash_algo]);
+       } else if (hash_algo < HASH_ALGO__LAST) {
                fmt = DATA_FMT_DIGEST_WITH_ALGO;
-               offset += snprintf(buffer, CRYPTO_MAX_ALG_NAME + 1, "%s",
-                                  hash_algo_name[hash_algo]);
-               buffer[offset] = ':';
-               offset += 2;
+               offset += 1 + sprintf(buffer, "%s:",
+                                     hash_algo_name[hash_algo]);
        }
 
        if (digest)
@@ -359,7 +390,8 @@ int ima_eventdigest_init(struct ima_event_data *event_data,
        cur_digestsize = hash.hdr.length;
 out:
        return ima_eventdigest_init_common(cur_digest, cur_digestsize,
-                                          HASH_ALGO__LAST, field_data);
+                                          DIGEST_TYPE__LAST, HASH_ALGO__LAST,
+                                          field_data);
 }
 
 /*
@@ -368,8 +400,32 @@ out:
 int ima_eventdigest_ng_init(struct ima_event_data *event_data,
                            struct ima_field_data *field_data)
 {
-       u8 *cur_digest = NULL, hash_algo = HASH_ALGO_SHA1;
+       u8 *cur_digest = NULL, hash_algo = ima_hash_algo;
+       u32 cur_digestsize = 0;
+
+       if (event_data->violation)      /* recording a violation. */
+               goto out;
+
+       cur_digest = event_data->iint->ima_hash->digest;
+       cur_digestsize = event_data->iint->ima_hash->length;
+
+       hash_algo = event_data->iint->ima_hash->algo;
+out:
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize,
+                                          DIGEST_TYPE__LAST, hash_algo,
+                                          field_data);
+}
+
+/*
+ * This function writes the digest of an event (without size limit),
+ * prefixed with both the digest type and hash algorithm.
+ */
+int ima_eventdigest_ngv2_init(struct ima_event_data *event_data,
+                             struct ima_field_data *field_data)
+{
+       u8 *cur_digest = NULL, hash_algo = ima_hash_algo;
        u32 cur_digestsize = 0;
+       u8 digest_type = DIGEST_TYPE_IMA;
 
        if (event_data->violation)      /* recording a violation. */
                goto out;
@@ -378,9 +434,12 @@ int ima_eventdigest_ng_init(struct ima_event_data *event_data,
        cur_digestsize = event_data->iint->ima_hash->length;
 
        hash_algo = event_data->iint->ima_hash->algo;
+       if (event_data->iint->flags & IMA_VERITY_REQUIRED)
+               digest_type = DIGEST_TYPE_VERITY;
 out:
        return ima_eventdigest_init_common(cur_digest, cur_digestsize,
-                                          hash_algo, field_data);
+                                          digest_type, hash_algo,
+                                          field_data);
 }
 
 /*
@@ -415,7 +474,8 @@ int ima_eventdigest_modsig_init(struct ima_event_data *event_data,
        }
 
        return ima_eventdigest_init_common(cur_digest, cur_digestsize,
-                                          hash_algo, field_data);
+                                          DIGEST_TYPE__LAST, hash_algo,
+                                          field_data);
 }
 
 static int ima_eventname_init_common(struct ima_event_data *event_data,
@@ -475,7 +535,9 @@ int ima_eventsig_init(struct ima_event_data *event_data,
 {
        struct evm_ima_xattr_data *xattr_value = event_data->xattr_value;
 
-       if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
+       if (!xattr_value ||
+           (xattr_value->type != EVM_IMA_XATTR_DIGSIG &&
+            xattr_value->type != IMA_VERITY_DIGSIG))
                return ima_eventevmsig_init(event_data, field_data);
 
        return ima_write_template_field_data(xattr_value, event_data->xattr_len,
index c71f1de95753d92a37c87fc9eef744f164f683bb..9f7c335f304f4ff714c741424bc0a446c267024c 100644 (file)
@@ -21,6 +21,8 @@ void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
                              struct ima_field_data *field_data);
 void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
                                 struct ima_field_data *field_data);
+void ima_show_template_digest_ngv2(struct seq_file *m, enum ima_show_type show,
+                                  struct ima_field_data *field_data);
 void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
                              struct ima_field_data *field_data);
 void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
@@ -38,6 +40,8 @@ int ima_eventname_init(struct ima_event_data *event_data,
                       struct ima_field_data *field_data);
 int ima_eventdigest_ng_init(struct ima_event_data *event_data,
                            struct ima_field_data *field_data);
+int ima_eventdigest_ngv2_init(struct ima_event_data *event_data,
+                             struct ima_field_data *field_data);
 int ima_eventdigest_modsig_init(struct ima_event_data *event_data,
                                struct ima_field_data *field_data);
 int ima_eventname_ng_init(struct ima_event_data *event_data,
index 3510e413ea179e4e738649b32220d9c3122b5952..7167a6e99bdc02c9093b70eb90354008262aa486 100644 (file)
@@ -40,6 +40,7 @@
 #define IMA_FAIL_UNVERIFIABLE_SIGS     0x10000000
 #define IMA_MODSIG_ALLOWED     0x20000000
 #define IMA_CHECK_BLACKLIST    0x40000000
+#define IMA_VERITY_REQUIRED    0x80000000
 
 #define IMA_DO_MASK            (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
                                 IMA_HASH | IMA_APPRAISE_SUBMASK)
@@ -78,6 +79,7 @@ enum evm_ima_xattr_type {
        EVM_IMA_XATTR_DIGSIG,
        IMA_XATTR_DIGEST_NG,
        EVM_XATTR_PORTABLE_DIGSIG,
+       IMA_VERITY_DIGSIG,
        IMA_XATTR_LAST
 };
 
@@ -92,7 +94,7 @@ struct evm_xattr {
        u8 digest[SHA1_DIGEST_SIZE];
 } __packed;
 
-#define IMA_MAX_DIGEST_SIZE    64
+#define IMA_MAX_DIGEST_SIZE    HASH_MAX_DIGESTSIZE
 
 struct ima_digest_data {
        u8 algo;
@@ -121,7 +123,14 @@ struct ima_max_digest_data {
 } __packed;
 
 /*
- * signature format v2 - for using with asymmetric keys
+ * signature header format v2 - for using with asymmetric keys
+ *
+ * The signature_v2_hdr struct includes a signature format version
+ * to simplify defining new signature formats.
+ *
+ * signature format:
+ * version 2: regular file data hash based signature
+ * version 3: struct ima_file_id data based signature
  */
 struct signature_v2_hdr {
        uint8_t type;           /* xattr type */
@@ -132,6 +141,20 @@ struct signature_v2_hdr {
        uint8_t sig[];          /* signature payload */
 } __packed;
 
+/*
+ * IMA signature version 3 disambiguates the data that is signed, by
+ * indirectly signing the hash of the ima_file_id structure data,
+ * containing either the fsverity_descriptor struct digest or, in the
+ * future, the regular IMA file hash.
+ *
+ * (The hash of the ima_file_id structure is only of the portion used.)
+ */
+struct ima_file_id {
+       __u8 hash_type;         /* xattr type [enum evm_ima_xattr_type] */
+       __u8 hash_algorithm;    /* Digest algorithm [enum hash_algo] */
+       __u8 hash[HASH_MAX_DIGESTSIZE];
+} __packed;
+
 /* integrity data associated with an inode */
 struct integrity_iint_cache {
        struct rb_node rb_node; /* rooted in integrity_iint_tree */
index 1db4d3b4356dc4d32cd373fceb0bbfc8fb3271c7..8a1124e4d769602ec6a0d990a132e304b7ea7eac 100644 (file)
@@ -16,35 +16,13 @@ static efi_guid_t efi_cert_x509_sha256_guid __initdata =
        EFI_CERT_X509_SHA256_GUID;
 static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID;
 
-/*
- * Blacklist a hash.
- */
-static __init void uefi_blacklist_hash(const char *source, const void *data,
-                                      size_t len, const char *type,
-                                      size_t type_len)
-{
-       char *hash, *p;
-
-       hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL);
-       if (!hash)
-               return;
-       p = memcpy(hash, type, type_len);
-       p += type_len;
-       bin2hex(p, data, len);
-       p += len * 2;
-       *p = 0;
-
-       mark_hash_blacklisted(hash);
-       kfree(hash);
-}
-
 /*
  * Blacklist an X509 TBS hash.
  */
 static __init void uefi_blacklist_x509_tbs(const char *source,
                                           const void *data, size_t len)
 {
-       uefi_blacklist_hash(source, data, len, "tbs:", 4);
+       mark_hash_blacklisted(data, len, BLACKLIST_HASH_X509_TBS);
 }
 
 /*
@@ -53,7 +31,7 @@ static __init void uefi_blacklist_x509_tbs(const char *source,
 static __init void uefi_blacklist_binary(const char *source,
                                         const void *data, size_t len)
 {
-       uefi_blacklist_hash(source, data, len, "bin:", 4);
+       mark_hash_blacklisted(data, len, BLACKLIST_HASH_BINARY);
 }
 
 /*
@@ -73,7 +51,7 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 {
        if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
                return add_to_platform_keyring;
-       return 0;
+       return NULL;
 }
 
 /*
@@ -88,7 +66,7 @@ __init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
                else
                        return add_to_platform_keyring;
        }
-       return 0;
+       return NULL;
 }
 
 /*
@@ -103,5 +81,5 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
                return uefi_blacklist_binary;
        if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
                return uefi_revocation_list_x509;
-       return 0;
+       return NULL;
 }
index 284558f30411eb25004500bc96a49f8a51b3bb5a..212d894a8c0c0f238cc927a51d24ae87649a5483 100644 (file)
@@ -35,3 +35,11 @@ efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
 efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
 
 #endif
+
+#ifndef UEFI_QUIRK_SKIP_CERT
+#define UEFI_QUIRK_SKIP_CERT(vendor, product) \
+                .matches = { \
+                       DMI_MATCH(DMI_BOARD_VENDOR, vendor), \
+                       DMI_MATCH(DMI_PRODUCT_NAME, product), \
+               },
+#endif
index 5f45c3c07dbd481ed087f878a1f286652e6fd235..093894a640dca6f4592268077428832df0449976 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/cred.h>
+#include <linux/dmi.h>
 #include <linux/err.h>
 #include <linux/efi.h>
 #include <linux/slab.h>
 #include "../integrity.h"
 #include "keyring_handler.h"
 
+/*
+ * On T2 Macs reading the db and dbx efi variables to load UEFI Secure Boot
+ * certificates causes occurrence of a page fault in Apple's firmware and
+ * a crash disabling EFI runtime services. The following quirk skips reading
+ * these variables.
+ */
+static const struct dmi_system_id uefi_skip_cert[] = {
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,2") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,3") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,4") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,2") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,3") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,4") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,2") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir9,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacMini8,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") },
+       { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") },
+       { }
+};
+
 /*
  * Look to see if a UEFI variable called MokIgnoreDB exists and return true if
  * it does.
@@ -138,6 +164,13 @@ static int __init load_uefi_certs(void)
        unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0;
        efi_status_t status;
        int rc = 0;
+       const struct dmi_system_id *dmi_id;
+
+       dmi_id = dmi_first_match(uefi_skip_cert);
+       if (dmi_id) {
+               pr_err("Reading UEFI Secure Boot Certs is not supported on T2 Macs.\n");
+               return false;
+       }
 
        if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
                return false;
index 0e30b361e1c1e386322e1d1a2b5930b9d10b8ee7..abb03a1b2a5c14788b785bf80537003af1e8cc64 100644 (file)
@@ -70,23 +70,19 @@ config BIG_KEYS
 
 config TRUSTED_KEYS
        tristate "TRUSTED KEYS"
-       depends on KEYS && TCG_TPM
-       select CRYPTO
-       select CRYPTO_HMAC
-       select CRYPTO_SHA1
-       select CRYPTO_HASH_INFO
-       select ASN1_ENCODER
-       select OID_REGISTRY
-       select ASN1
+       depends on KEYS
        help
          This option provides support for creating, sealing, and unsealing
          keys in the kernel. Trusted keys are random number symmetric keys,
-         generated and RSA-sealed by the TPM. The TPM only unseals the keys,
-         if the boot PCRs and other criteria match.  Userspace will only ever
-         see encrypted blobs.
+         generated and sealed by a trust source selected at kernel boot-time.
+         Userspace will only ever see encrypted blobs.
 
          If you are unsure as to whether this is required, answer N.
 
+if TRUSTED_KEYS
+source "security/keys/trusted-keys/Kconfig"
+endif
+
 config ENCRYPTED_KEYS
        tristate "ENCRYPTED KEYS"
        depends on KEYS
index d17e5f09eeb895ea761e34c83f56ded50230a974..c3367622c683be8678e9224f2fc041741f3c70e8 100644 (file)
 /*
  * Layout of key payload words.
  */
-enum {
-       big_key_data,
-       big_key_path,
-       big_key_path_2nd_part,
-       big_key_len,
+struct big_key_payload {
+       u8 *data;
+       struct path path;
+       size_t length;
 };
+#define to_big_key_payload(payload)                    \
+       (struct big_key_payload *)((payload).data)
 
 /*
  * If the data is under this limit, there's no point creating a shm file to
@@ -55,7 +56,7 @@ struct key_type key_type_big_key = {
  */
 int big_key_preparse(struct key_preparsed_payload *prep)
 {
-       struct path *path = (struct path *)&prep->payload.data[big_key_path];
+       struct big_key_payload *payload = to_big_key_payload(prep->payload);
        struct file *file;
        u8 *buf, *enckey;
        ssize_t written;
@@ -63,13 +64,15 @@ int big_key_preparse(struct key_preparsed_payload *prep)
        size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE;
        int ret;
 
+       BUILD_BUG_ON(sizeof(*payload) != sizeof(prep->payload.data));
+
        if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
                return -EINVAL;
 
        /* Set an arbitrary quota */
        prep->quotalen = 16;
 
-       prep->payload.data[big_key_len] = (void *)(unsigned long)datalen;
+       payload->length = datalen;
 
        if (datalen > BIG_KEY_FILE_THRESHOLD) {
                /* Create a shmem file to store the data in.  This will permit the data
@@ -117,9 +120,9 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                /* Pin the mount and dentry to the key so that we can open it again
                 * later
                 */
-               prep->payload.data[big_key_data] = enckey;
-               *path = file->f_path;
-               path_get(path);
+               payload->data = enckey;
+               payload->path = file->f_path;
+               path_get(&payload->path);
                fput(file);
                kvfree_sensitive(buf, enclen);
        } else {
@@ -129,7 +132,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                if (!data)
                        return -ENOMEM;
 
-               prep->payload.data[big_key_data] = data;
+               payload->data = data;
                memcpy(data, prep->data, prep->datalen);
        }
        return 0;
@@ -148,12 +151,11 @@ error:
  */
 void big_key_free_preparse(struct key_preparsed_payload *prep)
 {
-       if (prep->datalen > BIG_KEY_FILE_THRESHOLD) {
-               struct path *path = (struct path *)&prep->payload.data[big_key_path];
+       struct big_key_payload *payload = to_big_key_payload(prep->payload);
 
-               path_put(path);
-       }
-       kfree_sensitive(prep->payload.data[big_key_data]);
+       if (prep->datalen > BIG_KEY_FILE_THRESHOLD)
+               path_put(&payload->path);
+       kfree_sensitive(payload->data);
 }
 
 /*
@@ -162,13 +164,12 @@ void big_key_free_preparse(struct key_preparsed_payload *prep)
  */
 void big_key_revoke(struct key *key)
 {
-       struct path *path = (struct path *)&key->payload.data[big_key_path];
+       struct big_key_payload *payload = to_big_key_payload(key->payload);
 
        /* clear the quota */
        key_payload_reserve(key, 0);
-       if (key_is_positive(key) &&
-           (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD)
-               vfs_truncate(path, 0);
+       if (key_is_positive(key) && payload->length > BIG_KEY_FILE_THRESHOLD)
+               vfs_truncate(&payload->path, 0);
 }
 
 /*
@@ -176,17 +177,15 @@ void big_key_revoke(struct key *key)
  */
 void big_key_destroy(struct key *key)
 {
-       size_t datalen = (size_t)key->payload.data[big_key_len];
-
-       if (datalen > BIG_KEY_FILE_THRESHOLD) {
-               struct path *path = (struct path *)&key->payload.data[big_key_path];
+       struct big_key_payload *payload = to_big_key_payload(key->payload);
 
-               path_put(path);
-               path->mnt = NULL;
-               path->dentry = NULL;
+       if (payload->length > BIG_KEY_FILE_THRESHOLD) {
+               path_put(&payload->path);
+               payload->path.mnt = NULL;
+               payload->path.dentry = NULL;
        }
-       kfree_sensitive(key->payload.data[big_key_data]);
-       key->payload.data[big_key_data] = NULL;
+       kfree_sensitive(payload->data);
+       payload->data = NULL;
 }
 
 /*
@@ -211,14 +210,14 @@ int big_key_update(struct key *key, struct key_preparsed_payload *prep)
  */
 void big_key_describe(const struct key *key, struct seq_file *m)
 {
-       size_t datalen = (size_t)key->payload.data[big_key_len];
+       struct big_key_payload *payload = to_big_key_payload(key->payload);
 
        seq_puts(m, key->description);
 
        if (key_is_positive(key))
                seq_printf(m, ": %zu [%s]",
-                          datalen,
-                          datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
+                          payload->length,
+                          payload->length > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
 }
 
 /*
@@ -227,16 +226,16 @@ void big_key_describe(const struct key *key, struct seq_file *m)
  */
 long big_key_read(const struct key *key, char *buffer, size_t buflen)
 {
-       size_t datalen = (size_t)key->payload.data[big_key_len];
+       struct big_key_payload *payload = to_big_key_payload(key->payload);
+       size_t datalen = payload->length;
        long ret;
 
        if (!buffer || buflen < datalen)
                return datalen;
 
        if (datalen > BIG_KEY_FILE_THRESHOLD) {
-               struct path *path = (struct path *)&key->payload.data[big_key_path];
                struct file *file;
-               u8 *buf, *enckey = (u8 *)key->payload.data[big_key_data];
+               u8 *buf, *enckey = payload->data;
                size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE;
                loff_t pos = 0;
 
@@ -244,7 +243,7 @@ long big_key_read(const struct key *key, char *buffer, size_t buflen)
                if (!buf)
                        return -ENOMEM;
 
-               file = dentry_open(path, O_RDONLY, current_cred());
+               file = dentry_open(&payload->path, O_RDONLY, current_cred());
                if (IS_ERR(file)) {
                        ret = PTR_ERR(file);
                        goto error;
@@ -274,7 +273,7 @@ error:
                kvfree_sensitive(buf, enclen);
        } else {
                ret = datalen;
-               memcpy(buffer, key->payload.data[big_key_data], datalen);
+               memcpy(buffer, payload->data, datalen);
        }
 
        return ret;
diff --git a/security/keys/trusted-keys/Kconfig b/security/keys/trusted-keys/Kconfig
new file mode 100644 (file)
index 0000000..dbfdd85
--- /dev/null
@@ -0,0 +1,38 @@
+config TRUSTED_KEYS_TPM
+       bool "TPM-based trusted keys"
+       depends on TCG_TPM >= TRUSTED_KEYS
+       default y
+       select CRYPTO
+       select CRYPTO_HMAC
+       select CRYPTO_SHA1
+       select CRYPTO_HASH_INFO
+       select ASN1_ENCODER
+       select OID_REGISTRY
+       select ASN1
+       help
+         Enable use of the Trusted Platform Module (TPM) as trusted key
+         backend. Trusted keys are random number symmetric keys,
+         which will be generated and RSA-sealed by the TPM.
+         The TPM only unseals the keys, if the boot PCRs and other
+         criteria match.
+
+config TRUSTED_KEYS_TEE
+       bool "TEE-based trusted keys"
+       depends on TEE >= TRUSTED_KEYS
+       default y
+       help
+         Enable use of the Trusted Execution Environment (TEE) as trusted
+         key backend.
+
+config TRUSTED_KEYS_CAAM
+       bool "CAAM-based trusted keys"
+       depends on CRYPTO_DEV_FSL_CAAM_JR >= TRUSTED_KEYS
+       select CRYPTO_DEV_FSL_CAAM_BLOB_GEN
+       default y
+       help
+         Enable use of NXP's Cryptographic Accelerator and Assurance Module
+         (CAAM) as trusted key backend.
+
+if !TRUSTED_KEYS_TPM && !TRUSTED_KEYS_TEE && !TRUSTED_KEYS_CAAM
+comment "No trust source selected!"
+endif
index feb8b6c3cc79cb6c2235ff90523b1bf5303d09c1..735aa0bc08efc2fb799f3b724918485bb00d146c 100644 (file)
@@ -5,10 +5,12 @@
 
 obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
 trusted-y += trusted_core.o
-trusted-y += trusted_tpm1.o
+trusted-$(CONFIG_TRUSTED_KEYS_TPM) += trusted_tpm1.o
 
 $(obj)/trusted_tpm2.o: $(obj)/tpm2key.asn1.h
-trusted-y += trusted_tpm2.o
-trusted-y += tpm2key.asn1.o
+trusted-$(CONFIG_TRUSTED_KEYS_TPM) += trusted_tpm2.o
+trusted-$(CONFIG_TRUSTED_KEYS_TPM) += tpm2key.asn1.o
 
-trusted-$(CONFIG_TEE) += trusted_tee.o
+trusted-$(CONFIG_TRUSTED_KEYS_TEE) += trusted_tee.o
+
+trusted-$(CONFIG_TRUSTED_KEYS_CAAM) += trusted_caam.o
diff --git a/security/keys/trusted-keys/trusted_caam.c b/security/keys/trusted-keys/trusted_caam.c
new file mode 100644 (file)
index 0000000..e3415c5
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Pengutronix, Ahmad Fatoum <kernel@pengutronix.de>
+ */
+
+#include <keys/trusted_caam.h>
+#include <keys/trusted-type.h>
+#include <linux/build_bug.h>
+#include <linux/key-type.h>
+#include <soc/fsl/caam-blob.h>
+
+static struct caam_blob_priv *blobifier;
+
+#define KEYMOD "SECURE_KEY"
+
+static_assert(MAX_KEY_SIZE + CAAM_BLOB_OVERHEAD <= CAAM_BLOB_MAX_LEN);
+static_assert(MAX_BLOB_SIZE <= CAAM_BLOB_MAX_LEN);
+
+static int trusted_caam_seal(struct trusted_key_payload *p, char *datablob)
+{
+       int ret;
+       struct caam_blob_info info = {
+               .input  = p->key,  .input_len   = p->key_len,
+               .output = p->blob, .output_len  = MAX_BLOB_SIZE,
+               .key_mod = KEYMOD, .key_mod_len = sizeof(KEYMOD) - 1,
+       };
+
+       ret = caam_encap_blob(blobifier, &info);
+       if (ret)
+               return ret;
+
+       p->blob_len = info.output_len;
+       return 0;
+}
+
+static int trusted_caam_unseal(struct trusted_key_payload *p, char *datablob)
+{
+       int ret;
+       struct caam_blob_info info = {
+               .input   = p->blob,  .input_len  = p->blob_len,
+               .output  = p->key,   .output_len = MAX_KEY_SIZE,
+               .key_mod = KEYMOD,  .key_mod_len = sizeof(KEYMOD) - 1,
+       };
+
+       ret = caam_decap_blob(blobifier, &info);
+       if (ret)
+               return ret;
+
+       p->key_len = info.output_len;
+       return 0;
+}
+
+static int trusted_caam_init(void)
+{
+       int ret;
+
+       blobifier = caam_blob_gen_init();
+       if (IS_ERR(blobifier))
+               return PTR_ERR(blobifier);
+
+       ret = register_key_type(&key_type_trusted);
+       if (ret)
+               caam_blob_gen_exit(blobifier);
+
+       return ret;
+}
+
+static void trusted_caam_exit(void)
+{
+       unregister_key_type(&key_type_trusted);
+       caam_blob_gen_exit(blobifier);
+}
+
+struct trusted_key_ops trusted_key_caam_ops = {
+       .migratable = 0, /* non-migratable */
+       .init = trusted_caam_init,
+       .seal = trusted_caam_seal,
+       .unseal = trusted_caam_unseal,
+       .exit = trusted_caam_exit,
+};
index 9b9d3ef79cbe3501c1e218cc43ee006ede69c072..c6fc50d67214c401f95f3e5aff3f17032eb8c950 100644 (file)
@@ -9,6 +9,7 @@
 #include <keys/user-type.h>
 #include <keys/trusted-type.h>
 #include <keys/trusted_tee.h>
+#include <keys/trusted_caam.h>
 #include <keys/trusted_tpm.h>
 #include <linux/capability.h>
 #include <linux/err.h>
 #include <linux/key-type.h>
 #include <linux/module.h>
 #include <linux/parser.h>
+#include <linux/random.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/static_call.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
+static char *trusted_rng = "default";
+module_param_named(rng, trusted_rng, charp, 0);
+MODULE_PARM_DESC(rng, "Select trusted key RNG");
+
 static char *trusted_key_source;
 module_param_named(source, trusted_key_source, charp, 0);
-MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)");
+MODULE_PARM_DESC(source, "Select trusted keys source (tpm, tee or caam)");
 
 static const struct trusted_key_source trusted_key_sources[] = {
-#if IS_REACHABLE(CONFIG_TCG_TPM)
+#if defined(CONFIG_TRUSTED_KEYS_TPM)
        { "tpm", &trusted_key_tpm_ops },
 #endif
-#if IS_REACHABLE(CONFIG_TEE)
+#if defined(CONFIG_TRUSTED_KEYS_TEE)
        { "tee", &trusted_key_tee_ops },
 #endif
+#if defined(CONFIG_TRUSTED_KEYS_CAAM)
+       { "caam", &trusted_key_caam_ops },
+#endif
 };
 
 DEFINE_STATIC_CALL_NULL(trusted_key_init, *trusted_key_sources[0].ops->init);
@@ -312,8 +321,14 @@ struct key_type key_type_trusted = {
 };
 EXPORT_SYMBOL_GPL(key_type_trusted);
 
+static int kernel_get_random(unsigned char *key, size_t key_len)
+{
+       return get_random_bytes_wait(key, key_len) ?: key_len;
+}
+
 static int __init init_trusted(void)
 {
+       int (*get_random)(unsigned char *key, size_t key_len);
        int i, ret = 0;
 
        for (i = 0; i < ARRAY_SIZE(trusted_key_sources); i++) {
@@ -322,6 +337,28 @@ static int __init init_trusted(void)
                            strlen(trusted_key_sources[i].name)))
                        continue;
 
+               /*
+                * We always support trusted.rng="kernel" and "default" as
+                * well as trusted.rng=$trusted.source if the trust source
+                * defines its own get_random callback.
+                */
+               get_random = trusted_key_sources[i].ops->get_random;
+               if (trusted_rng && strcmp(trusted_rng, "default")) {
+                       if (!strcmp(trusted_rng, "kernel")) {
+                               get_random = kernel_get_random;
+                       } else if (strcmp(trusted_rng, trusted_key_sources[i].name) ||
+                                  !get_random) {
+                               pr_warn("Unsupported RNG. Supported: kernel");
+                               if (get_random)
+                                       pr_cont(", %s", trusted_key_sources[i].name);
+                               pr_cont(", default\n");
+                               return -EINVAL;
+                       }
+               }
+
+               if (!get_random)
+                       get_random = kernel_get_random;
+
                static_call_update(trusted_key_init,
                                   trusted_key_sources[i].ops->init);
                static_call_update(trusted_key_seal,
@@ -329,7 +366,7 @@ static int __init init_trusted(void)
                static_call_update(trusted_key_unseal,
                                   trusted_key_sources[i].ops->unseal);
                static_call_update(trusted_key_get_random,
-                                  trusted_key_sources[i].ops->get_random);
+                                  get_random);
                static_call_update(trusted_key_exit,
                                   trusted_key_sources[i].ops->exit);
                migratable = trusted_key_sources[i].ops->migratable;
index 6725af24c6841486a0b9e4695e6372dcfb5be947..ec6c37f04a1919b82c7dc7590d3615697568df4c 100644 (file)
@@ -15,7 +15,7 @@
 #include "setup.h"
 
 static int hook_cred_prepare(struct cred *const new,
-               const struct cred *const old, const gfp_t gfp)
+                            const struct cred *const old, const gfp_t gfp)
 {
        struct landlock_ruleset *const old_dom = landlock_cred(old)->domain;
 
@@ -42,5 +42,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
 __init void landlock_add_cred_hooks(void)
 {
        security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
-                       LANDLOCK_NAME);
+                          LANDLOCK_NAME);
 }
index 5f99d3decade6f76d3c14a2d2e90230c59127c50..af89ab00e6d10f83815a88f725281dd9e11a4795 100644 (file)
@@ -20,8 +20,8 @@ struct landlock_cred_security {
        struct landlock_ruleset *domain;
 };
 
-static inline struct landlock_cred_security *landlock_cred(
-               const struct cred *cred)
+static inline struct landlock_cred_security *
+landlock_cred(const struct cred *cred)
 {
        return cred->security + landlock_blob_sizes.lbs_cred;
 }
@@ -34,8 +34,8 @@ static inline const struct landlock_ruleset *landlock_get_current_domain(void)
 /*
  * The call needs to come from an RCU read-side critical section.
  */
-static inline const struct landlock_ruleset *landlock_get_task_domain(
-               const struct task_struct *const task)
+static inline const struct landlock_ruleset *
+landlock_get_task_domain(const struct task_struct *const task)
 {
        return landlock_cred(__task_cred(task))->domain;
 }
index 97b8e421f617178489f948a71d71cdefa7079982..ec5a6247cd3e7569e08c72dbcd6db4339d53ca7a 100644 (file)
@@ -4,6 +4,7 @@
  *
  * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
  * Copyright © 2018-2020 ANSSI
+ * Copyright © 2021-2022 Microsoft Corporation
  */
 
 #include <linux/atomic.h>
@@ -141,23 +142,26 @@ retry:
 }
 
 /* All access rights that can be tied to files. */
+/* clang-format off */
 #define ACCESS_FILE ( \
        LANDLOCK_ACCESS_FS_EXECUTE | \
        LANDLOCK_ACCESS_FS_WRITE_FILE | \
        LANDLOCK_ACCESS_FS_READ_FILE)
+/* clang-format on */
 
 /*
  * @path: Should have been checked by get_path_from_fd().
  */
 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
-               const struct path *const path, u32 access_rights)
+                           const struct path *const path,
+                           access_mask_t access_rights)
 {
        int err;
        struct landlock_object *object;
 
        /* Files only get access rights that make sense. */
-       if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) !=
-                       ACCESS_FILE)
+       if (!d_is_dir(path->dentry) &&
+           (access_rights | ACCESS_FILE) != ACCESS_FILE)
                return -EINVAL;
        if (WARN_ON_ONCE(ruleset->num_layers != 1))
                return -EINVAL;
@@ -180,84 +184,352 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
 
 /* Access-control management */
 
-static inline u64 unmask_layers(
-               const struct landlock_ruleset *const domain,
-               const struct path *const path, const u32 access_request,
-               u64 layer_mask)
+/*
+ * The lifetime of the returned rule is tied to @domain.
+ *
+ * Returns NULL if no rule is found or if @dentry is negative.
+ */
+static inline const struct landlock_rule *
+find_rule(const struct landlock_ruleset *const domain,
+         const struct dentry *const dentry)
 {
        const struct landlock_rule *rule;
        const struct inode *inode;
-       size_t i;
 
-       if (d_is_negative(path->dentry))
-               /* Ignore nonexistent leafs. */
-               return layer_mask;
-       inode = d_backing_inode(path->dentry);
+       /* Ignores nonexistent leafs. */
+       if (d_is_negative(dentry))
+               return NULL;
+
+       inode = d_backing_inode(dentry);
        rcu_read_lock();
-       rule = landlock_find_rule(domain,
-                       rcu_dereference(landlock_inode(inode)->object));
+       rule = landlock_find_rule(
+               domain, rcu_dereference(landlock_inode(inode)->object));
        rcu_read_unlock();
+       return rule;
+}
+
+/*
+ * @layer_masks is read and may be updated according to the access request and
+ * the matching rule.
+ *
+ * Returns true if the request is allowed (i.e. relevant layer masks for the
+ * request are empty).
+ */
+static inline bool
+unmask_layers(const struct landlock_rule *const rule,
+             const access_mask_t access_request,
+             layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+       size_t layer_level;
+
+       if (!access_request || !layer_masks)
+               return true;
        if (!rule)
-               return layer_mask;
+               return false;
 
        /*
         * An access is granted if, for each policy layer, at least one rule
-        * encountered on the pathwalk grants the requested accesses,
-        * regardless of their position in the layer stack.  We must then check
+        * encountered on the pathwalk grants the requested access,
+        * regardless of its position in the layer stack.  We must then check
         * the remaining layers for each inode, from the first added layer to
-        * the last one.
+        * the last one.  When there is multiple requested accesses, for each
+        * policy layer, the full set of requested accesses may not be granted
+        * by only one rule, but by the union (binary OR) of multiple rules.
+        * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
         */
-       for (i = 0; i < rule->num_layers; i++) {
-               const struct landlock_layer *const layer = &rule->layers[i];
-               const u64 layer_level = BIT_ULL(layer->level - 1);
+       for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
+               const struct landlock_layer *const layer =
+                       &rule->layers[layer_level];
+               const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
+               const unsigned long access_req = access_request;
+               unsigned long access_bit;
+               bool is_empty;
 
-               /* Checks that the layer grants access to the full request. */
-               if ((layer->access & access_request) == access_request) {
-                       layer_mask &= ~layer_level;
-
-                       if (layer_mask == 0)
-                               return layer_mask;
+               /*
+                * Records in @layer_masks which layer grants access to each
+                * requested access.
+                */
+               is_empty = true;
+               for_each_set_bit(access_bit, &access_req,
+                                ARRAY_SIZE(*layer_masks)) {
+                       if (layer->access & BIT_ULL(access_bit))
+                               (*layer_masks)[access_bit] &= ~layer_bit;
+                       is_empty = is_empty && !(*layer_masks)[access_bit];
                }
+               if (is_empty)
+                       return true;
        }
-       return layer_mask;
+       return false;
 }
 
-static int check_access_path(const struct landlock_ruleset *const domain,
-               const struct path *const path, u32 access_request)
+/*
+ * Allows access to pseudo filesystems that will never be mountable (e.g.
+ * sockfs, pipefs), but can still be reachable through
+ * /proc/<pid>/fd/<file-descriptor>
+ */
+static inline bool is_nouser_or_private(const struct dentry *dentry)
 {
-       bool allowed = false;
-       struct path walker_path;
-       u64 layer_mask;
-       size_t i;
+       return (dentry->d_sb->s_flags & SB_NOUSER) ||
+              (d_is_positive(dentry) &&
+               unlikely(IS_PRIVATE(d_backing_inode(dentry))));
+}
 
-       /* Make sure all layers can be checked. */
-       BUILD_BUG_ON(BITS_PER_TYPE(layer_mask) < LANDLOCK_MAX_NUM_LAYERS);
+static inline access_mask_t
+get_handled_accesses(const struct landlock_ruleset *const domain)
+{
+       access_mask_t access_dom = 0;
+       unsigned long access_bit;
+
+       for (access_bit = 0; access_bit < LANDLOCK_NUM_ACCESS_FS;
+            access_bit++) {
+               size_t layer_level;
+
+               for (layer_level = 0; layer_level < domain->num_layers;
+                    layer_level++) {
+                       if (domain->fs_access_masks[layer_level] &
+                           BIT_ULL(access_bit)) {
+                               access_dom |= BIT_ULL(access_bit);
+                               break;
+                       }
+               }
+       }
+       return access_dom;
+}
+
+static inline access_mask_t
+init_layer_masks(const struct landlock_ruleset *const domain,
+                const access_mask_t access_request,
+                layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+       access_mask_t handled_accesses = 0;
+       size_t layer_level;
 
+       memset(layer_masks, 0, sizeof(*layer_masks));
+       /* An empty access request can happen because of O_WRONLY | O_RDWR. */
        if (!access_request)
                return 0;
+
+       /* Saves all handled accesses per layer. */
+       for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
+               const unsigned long access_req = access_request;
+               unsigned long access_bit;
+
+               for_each_set_bit(access_bit, &access_req,
+                                ARRAY_SIZE(*layer_masks)) {
+                       if (domain->fs_access_masks[layer_level] &
+                           BIT_ULL(access_bit)) {
+                               (*layer_masks)[access_bit] |=
+                                       BIT_ULL(layer_level);
+                               handled_accesses |= BIT_ULL(access_bit);
+                       }
+               }
+       }
+       return handled_accesses;
+}
+
+/*
+ * Check that a destination file hierarchy has more restrictions than a source
+ * file hierarchy.  This is only used for link and rename actions.
+ *
+ * @layer_masks_child2: Optional child masks.
+ */
+static inline bool no_more_access(
+       const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+       const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
+       const bool child1_is_directory,
+       const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+       const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
+       const bool child2_is_directory)
+{
+       unsigned long access_bit;
+
+       for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
+            access_bit++) {
+               /* Ignores accesses that only make sense for directories. */
+               const bool is_file_access =
+                       !!(BIT_ULL(access_bit) & ACCESS_FILE);
+
+               if (child1_is_directory || is_file_access) {
+                       /*
+                        * Checks if the destination restrictions are a
+                        * superset of the source ones (i.e. inherited access
+                        * rights without child exceptions):
+                        * restrictions(parent2) >= restrictions(child1)
+                        */
+                       if ((((*layer_masks_parent1)[access_bit] &
+                             (*layer_masks_child1)[access_bit]) |
+                            (*layer_masks_parent2)[access_bit]) !=
+                           (*layer_masks_parent2)[access_bit])
+                               return false;
+               }
+
+               if (!layer_masks_child2)
+                       continue;
+               if (child2_is_directory || is_file_access) {
+                       /*
+                        * Checks inverted restrictions for RENAME_EXCHANGE:
+                        * restrictions(parent1) >= restrictions(child2)
+                        */
+                       if ((((*layer_masks_parent2)[access_bit] &
+                             (*layer_masks_child2)[access_bit]) |
+                            (*layer_masks_parent1)[access_bit]) !=
+                           (*layer_masks_parent1)[access_bit])
+                               return false;
+               }
+       }
+       return true;
+}
+
+/*
+ * Removes @layer_masks accesses that are not requested.
+ *
+ * Returns true if the request is allowed, false otherwise.
+ */
+static inline bool
+scope_to_request(const access_mask_t access_request,
+                layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+       const unsigned long access_req = access_request;
+       unsigned long access_bit;
+
+       if (WARN_ON_ONCE(!layer_masks))
+               return true;
+
+       for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
+               (*layer_masks)[access_bit] = 0;
+       return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
+}
+
+/*
+ * Returns true if there is at least one access right different than
+ * LANDLOCK_ACCESS_FS_REFER.
+ */
+static inline bool
+is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
+         const access_mask_t access_request)
+{
+       unsigned long access_bit;
+       /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
+       const unsigned long access_check = access_request &
+                                          ~LANDLOCK_ACCESS_FS_REFER;
+
+       if (!layer_masks)
+               return false;
+
+       for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
+               if ((*layer_masks)[access_bit])
+                       return true;
+       }
+       return false;
+}
+
+/**
+ * check_access_path_dual - Check accesses for requests with a common path
+ *
+ * @domain: Domain to check against.
+ * @path: File hierarchy to walk through.
+ * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is
+ *     equal to @layer_masks_parent2 (if any).  This is tied to the unique
+ *     requested path for most actions, or the source in case of a refer action
+ *     (i.e. rename or link), or the source and destination in case of
+ *     RENAME_EXCHANGE.
+ * @layer_masks_parent1: Pointer to a matrix of layer masks per access
+ *     masks, identifying the layers that forbid a specific access.  Bits from
+ *     this matrix can be unset according to the @path walk.  An empty matrix
+ *     means that @domain allows all possible Landlock accesses (i.e. not only
+ *     those identified by @access_request_parent1).  This matrix can
+ *     initially refer to domain layer masks and, when the accesses for the
+ *     destination and source are the same, to requested layer masks.
+ * @dentry_child1: Dentry to the initial child of the parent1 path.  This
+ *     pointer must be NULL for non-refer actions (i.e. not link nor rename).
+ * @access_request_parent2: Similar to @access_request_parent1 but for a
+ *     request involving a source and a destination.  This refers to the
+ *     destination, except in case of RENAME_EXCHANGE where it also refers to
+ *     the source.  Must be set to 0 when using a simple path request.
+ * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer
+ *     action.  This must be NULL otherwise.
+ * @dentry_child2: Dentry to the initial child of the parent2 path.  This
+ *     pointer is only set for RENAME_EXCHANGE actions and must be NULL
+ *     otherwise.
+ *
+ * This helper first checks that the destination has a superset of restrictions
+ * compared to the source (if any) for a common path.  Because of
+ * RENAME_EXCHANGE actions, source and destinations may be swapped.  It then
+ * checks that the collected accesses and the remaining ones are enough to
+ * allow the request.
+ *
+ * Returns:
+ * - 0 if the access request is granted;
+ * - -EACCES if it is denied because of access right other than
+ *   LANDLOCK_ACCESS_FS_REFER;
+ * - -EXDEV if the renaming or linking would be a privileged escalation
+ *   (according to each layered policies), or if LANDLOCK_ACCESS_FS_REFER is
+ *   not allowed by the source or the destination.
+ */
+static int check_access_path_dual(
+       const struct landlock_ruleset *const domain,
+       const struct path *const path,
+       const access_mask_t access_request_parent1,
+       layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+       const struct dentry *const dentry_child1,
+       const access_mask_t access_request_parent2,
+       layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+       const struct dentry *const dentry_child2)
+{
+       bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
+            child1_is_directory = true, child2_is_directory = true;
+       struct path walker_path;
+       access_mask_t access_masked_parent1, access_masked_parent2;
+       layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
+               _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
+       layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
+       (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
+
+       if (!access_request_parent1 && !access_request_parent2)
+               return 0;
        if (WARN_ON_ONCE(!domain || !path))
                return 0;
-       /*
-        * Allows access to pseudo filesystems that will never be mountable
-        * (e.g. sockfs, pipefs), but can still be reachable through
-        * /proc/<pid>/fd/<file-descriptor> .
-        */
-       if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
-                       (d_is_positive(path->dentry) &&
-                        unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
+       if (is_nouser_or_private(path->dentry))
                return 0;
-       if (WARN_ON_ONCE(domain->num_layers < 1))
+       if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1))
                return -EACCES;
 
-       /* Saves all layers handling a subset of requested accesses. */
-       layer_mask = 0;
-       for (i = 0; i < domain->num_layers; i++) {
-               if (domain->fs_access_masks[i] & access_request)
-                       layer_mask |= BIT_ULL(i);
+       if (unlikely(layer_masks_parent2)) {
+               if (WARN_ON_ONCE(!dentry_child1))
+                       return -EACCES;
+               /*
+                * For a double request, first check for potential privilege
+                * escalation by looking at domain handled accesses (which are
+                * a superset of the meaningful requested accesses).
+                */
+               access_masked_parent1 = access_masked_parent2 =
+                       get_handled_accesses(domain);
+               is_dom_check = true;
+       } else {
+               if (WARN_ON_ONCE(dentry_child1 || dentry_child2))
+                       return -EACCES;
+               /* For a simple request, only check for requested accesses. */
+               access_masked_parent1 = access_request_parent1;
+               access_masked_parent2 = access_request_parent2;
+               is_dom_check = false;
+       }
+
+       if (unlikely(dentry_child1)) {
+               unmask_layers(find_rule(domain, dentry_child1),
+                             init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+                                              &_layer_masks_child1),
+                             &_layer_masks_child1);
+               layer_masks_child1 = &_layer_masks_child1;
+               child1_is_directory = d_is_dir(dentry_child1);
+       }
+       if (unlikely(dentry_child2)) {
+               unmask_layers(find_rule(domain, dentry_child2),
+                             init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+                                              &_layer_masks_child2),
+                             &_layer_masks_child2);
+               layer_masks_child2 = &_layer_masks_child2;
+               child2_is_directory = d_is_dir(dentry_child2);
        }
-       /* An access request not handled by the domain is allowed. */
-       if (layer_mask == 0)
-               return 0;
 
        walker_path = *path;
        path_get(&walker_path);
@@ -267,15 +539,54 @@ static int check_access_path(const struct landlock_ruleset *const domain,
         */
        while (true) {
                struct dentry *parent_dentry;
+               const struct landlock_rule *rule;
 
-               layer_mask = unmask_layers(domain, &walker_path,
-                               access_request, layer_mask);
-               if (layer_mask == 0) {
-                       /* Stops when a rule from each layer grants access. */
-                       allowed = true;
-                       break;
+               /*
+                * If at least all accesses allowed on the destination are
+                * already allowed on the source, respectively if there is at
+                * least as much as restrictions on the destination than on the
+                * source, then we can safely refer files from the source to
+                * the destination without risking a privilege escalation.
+                * This also applies in the case of RENAME_EXCHANGE, which
+                * implies checks on both direction.  This is crucial for
+                * standalone multilayered security policies.  Furthermore,
+                * this helps avoid policy writers to shoot themselves in the
+                * foot.
+                */
+               if (unlikely(is_dom_check &&
+                            no_more_access(
+                                    layer_masks_parent1, layer_masks_child1,
+                                    child1_is_directory, layer_masks_parent2,
+                                    layer_masks_child2,
+                                    child2_is_directory))) {
+                       allowed_parent1 = scope_to_request(
+                               access_request_parent1, layer_masks_parent1);
+                       allowed_parent2 = scope_to_request(
+                               access_request_parent2, layer_masks_parent2);
+
+                       /* Stops when all accesses are granted. */
+                       if (allowed_parent1 && allowed_parent2)
+                               break;
+
+                       /*
+                        * Now, downgrades the remaining checks from domain
+                        * handled accesses to requested accesses.
+                        */
+                       is_dom_check = false;
+                       access_masked_parent1 = access_request_parent1;
+                       access_masked_parent2 = access_request_parent2;
                }
 
+               rule = find_rule(domain, walker_path.dentry);
+               allowed_parent1 = unmask_layers(rule, access_masked_parent1,
+                                               layer_masks_parent1);
+               allowed_parent2 = unmask_layers(rule, access_masked_parent2,
+                                               layer_masks_parent2);
+
+               /* Stops when a rule from each layer grants access. */
+               if (allowed_parent1 && allowed_parent2)
+                       break;
+
 jump_up:
                if (walker_path.dentry == walker_path.mnt->mnt_root) {
                        if (follow_up(&walker_path)) {
@@ -286,7 +597,6 @@ jump_up:
                                 * Stops at the real root.  Denies access
                                 * because not all layers have granted access.
                                 */
-                               allowed = false;
                                break;
                        }
                }
@@ -296,7 +606,8 @@ jump_up:
                         * access to internal filesystems (e.g. nsfs, which is
                         * reachable through /proc/<pid>/ns/<namespace>).
                         */
-                       allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
+                       allowed_parent1 = allowed_parent2 =
+                               !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
                        break;
                }
                parent_dentry = dget_parent(walker_path.dentry);
@@ -304,11 +615,40 @@ jump_up:
                walker_path.dentry = parent_dentry;
        }
        path_put(&walker_path);
-       return allowed ? 0 : -EACCES;
+
+       if (allowed_parent1 && allowed_parent2)
+               return 0;
+
+       /*
+        * This prioritizes EACCES over EXDEV for all actions, including
+        * renames with RENAME_EXCHANGE.
+        */
+       if (likely(is_eacces(layer_masks_parent1, access_request_parent1) ||
+                  is_eacces(layer_masks_parent2, access_request_parent2)))
+               return -EACCES;
+
+       /*
+        * Gracefully forbids reparenting if the destination directory
+        * hierarchy is not a superset of restrictions of the source directory
+        * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the
+        * source or the destination.
+        */
+       return -EXDEV;
+}
+
+static inline int check_access_path(const struct landlock_ruleset *const domain,
+                                   const struct path *const path,
+                                   access_mask_t access_request)
+{
+       layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+
+       access_request = init_layer_masks(domain, access_request, &layer_masks);
+       return check_access_path_dual(domain, path, access_request,
+                                     &layer_masks, NULL, 0, NULL, NULL);
 }
 
 static inline int current_check_access_path(const struct path *const path,
-               const u32 access_request)
+                                           const access_mask_t access_request)
 {
        const struct landlock_ruleset *const dom =
                landlock_get_current_domain();
@@ -318,6 +658,239 @@ static inline int current_check_access_path(const struct path *const path,
        return check_access_path(dom, path, access_request);
 }
 
+static inline access_mask_t get_mode_access(const umode_t mode)
+{
+       switch (mode & S_IFMT) {
+       case S_IFLNK:
+               return LANDLOCK_ACCESS_FS_MAKE_SYM;
+       case 0:
+               /* A zero mode translates to S_IFREG. */
+       case S_IFREG:
+               return LANDLOCK_ACCESS_FS_MAKE_REG;
+       case S_IFDIR:
+               return LANDLOCK_ACCESS_FS_MAKE_DIR;
+       case S_IFCHR:
+               return LANDLOCK_ACCESS_FS_MAKE_CHAR;
+       case S_IFBLK:
+               return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
+       case S_IFIFO:
+               return LANDLOCK_ACCESS_FS_MAKE_FIFO;
+       case S_IFSOCK:
+               return LANDLOCK_ACCESS_FS_MAKE_SOCK;
+       default:
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+}
+
+static inline access_mask_t maybe_remove(const struct dentry *const dentry)
+{
+       if (d_is_negative(dentry))
+               return 0;
+       return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
+                                 LANDLOCK_ACCESS_FS_REMOVE_FILE;
+}
+
+/**
+ * collect_domain_accesses - Walk through a file path and collect accesses
+ *
+ * @domain: Domain to check against.
+ * @mnt_root: Last directory to check.
+ * @dir: Directory to start the walk from.
+ * @layer_masks_dom: Where to store the collected accesses.
+ *
+ * This helper is useful to begin a path walk from the @dir directory to a
+ * @mnt_root directory used as a mount point.  This mount point is the common
+ * ancestor between the source and the destination of a renamed and linked
+ * file.  While walking from @dir to @mnt_root, we record all the domain's
+ * allowed accesses in @layer_masks_dom.
+ *
+ * This is similar to check_access_path_dual() but much simpler because it only
+ * handles walking on the same mount point and only check one set of accesses.
+ *
+ * Returns:
+ * - true if all the domain access rights are allowed for @dir;
+ * - false if the walk reached @mnt_root.
+ */
+static bool collect_domain_accesses(
+       const struct landlock_ruleset *const domain,
+       const struct dentry *const mnt_root, struct dentry *dir,
+       layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
+{
+       unsigned long access_dom;
+       bool ret = false;
+
+       if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
+               return true;
+       if (is_nouser_or_private(dir))
+               return true;
+
+       access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+                                     layer_masks_dom);
+
+       dget(dir);
+       while (true) {
+               struct dentry *parent_dentry;
+
+               /* Gets all layers allowing all domain accesses. */
+               if (unmask_layers(find_rule(domain, dir), access_dom,
+                                 layer_masks_dom)) {
+                       /*
+                        * Stops when all handled accesses are allowed by at
+                        * least one rule in each layer.
+                        */
+                       ret = true;
+                       break;
+               }
+
+               /* We should not reach a root other than @mnt_root. */
+               if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir)))
+                       break;
+
+               parent_dentry = dget_parent(dir);
+               dput(dir);
+               dir = parent_dentry;
+       }
+       dput(dir);
+       return ret;
+}
+
+/**
+ * current_check_refer_path - Check if a rename or link action is allowed
+ *
+ * @old_dentry: File or directory requested to be moved or linked.
+ * @new_dir: Destination parent directory.
+ * @new_dentry: Destination file or directory.
+ * @removable: Sets to true if it is a rename operation.
+ * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE.
+ *
+ * Because of its unprivileged constraints, Landlock relies on file hierarchies
+ * (and not only inodes) to tie access rights to files.  Being able to link or
+ * rename a file hierarchy brings some challenges.  Indeed, moving or linking a
+ * file (i.e. creating a new reference to an inode) can have an impact on the
+ * actions allowed for a set of files if it would change its parent directory
+ * (i.e. reparenting).
+ *
+ * To avoid trivial access right bypasses, Landlock first checks if the file or
+ * directory requested to be moved would gain new access rights inherited from
+ * its new hierarchy.  Before returning any error, Landlock then checks that
+ * the parent source hierarchy and the destination hierarchy would allow the
+ * link or rename action.  If it is not the case, an error with EACCES is
+ * returned to inform user space that there is no way to remove or create the
+ * requested source file type.  If it should be allowed but the new inherited
+ * access rights would be greater than the source access rights, then the
+ * kernel returns an error with EXDEV.  Prioritizing EACCES over EXDEV enables
+ * user space to abort the whole operation if there is no way to do it, or to
+ * manually copy the source to the destination if this remains allowed, e.g.
+ * because file creation is allowed on the destination directory but not direct
+ * linking.
+ *
+ * To achieve this goal, the kernel needs to compare two file hierarchies: the
+ * one identifying the source file or directory (including itself), and the
+ * destination one.  This can be seen as a multilayer partial ordering problem.
+ * The kernel walks through these paths and collects in a matrix the access
+ * rights that are denied per layer.  These matrices are then compared to see
+ * if the destination one has more (or the same) restrictions as the source
+ * one.  If this is the case, the requested action will not return EXDEV, which
+ * doesn't mean the action is allowed.  The parent hierarchy of the source
+ * (i.e. parent directory), and the destination hierarchy must also be checked
+ * to verify that they explicitly allow such action (i.e.  referencing,
+ * creation and potentially removal rights).  The kernel implementation is then
+ * required to rely on potentially four matrices of access rights: one for the
+ * source file or directory (i.e. the child), a potentially other one for the
+ * other source/destination (in case of RENAME_EXCHANGE), one for the source
+ * parent hierarchy and a last one for the destination hierarchy.  These
+ * ephemeral matrices take some space on the stack, which limits the number of
+ * layers to a deemed reasonable number: 16.
+ *
+ * Returns:
+ * - 0 if access is allowed;
+ * - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
+ * - -EACCES if file removal or creation is denied.
+ */
+static int current_check_refer_path(struct dentry *const old_dentry,
+                                   const struct path *const new_dir,
+                                   struct dentry *const new_dentry,
+                                   const bool removable, const bool exchange)
+{
+       const struct landlock_ruleset *const dom =
+               landlock_get_current_domain();
+       bool allow_parent1, allow_parent2;
+       access_mask_t access_request_parent1, access_request_parent2;
+       struct path mnt_dir;
+       layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
+               layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+
+       if (!dom)
+               return 0;
+       if (WARN_ON_ONCE(dom->num_layers < 1))
+               return -EACCES;
+       if (unlikely(d_is_negative(old_dentry)))
+               return -ENOENT;
+       if (exchange) {
+               if (unlikely(d_is_negative(new_dentry)))
+                       return -ENOENT;
+               access_request_parent1 =
+                       get_mode_access(d_backing_inode(new_dentry)->i_mode);
+       } else {
+               access_request_parent1 = 0;
+       }
+       access_request_parent2 =
+               get_mode_access(d_backing_inode(old_dentry)->i_mode);
+       if (removable) {
+               access_request_parent1 |= maybe_remove(old_dentry);
+               access_request_parent2 |= maybe_remove(new_dentry);
+       }
+
+       /* The mount points are the same for old and new paths, cf. EXDEV. */
+       if (old_dentry->d_parent == new_dir->dentry) {
+               /*
+                * The LANDLOCK_ACCESS_FS_REFER access right is not required
+                * for same-directory referer (i.e. no reparenting).
+                */
+               access_request_parent1 = init_layer_masks(
+                       dom, access_request_parent1 | access_request_parent2,
+                       &layer_masks_parent1);
+               return check_access_path_dual(dom, new_dir,
+                                             access_request_parent1,
+                                             &layer_masks_parent1, NULL, 0,
+                                             NULL, NULL);
+       }
+
+       /* Backward compatibility: no reparenting support. */
+       if (!(get_handled_accesses(dom) & LANDLOCK_ACCESS_FS_REFER))
+               return -EXDEV;
+
+       access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER;
+       access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER;
+
+       /* Saves the common mount point. */
+       mnt_dir.mnt = new_dir->mnt;
+       mnt_dir.dentry = new_dir->mnt->mnt_root;
+
+       /* new_dir->dentry is equal to new_dentry->d_parent */
+       allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
+                                               old_dentry->d_parent,
+                                               &layer_masks_parent1);
+       allow_parent2 = collect_domain_accesses(
+               dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);
+
+       if (allow_parent1 && allow_parent2)
+               return 0;
+
+       /*
+        * To be able to compare source and destination domain access rights,
+        * take into account the @old_dentry access rights aggregated with its
+        * parent access rights.  This will be useful to compare with the
+        * destination parent access rights.
+        */
+       return check_access_path_dual(dom, &mnt_dir, access_request_parent1,
+                                     &layer_masks_parent1, old_dentry,
+                                     access_request_parent2,
+                                     &layer_masks_parent2,
+                                     exchange ? new_dentry : NULL);
+}
+
 /* Inode hooks */
 
 static void hook_inode_free_security(struct inode *const inode)
@@ -436,8 +1009,8 @@ static void hook_sb_delete(struct super_block *const sb)
        if (prev_inode)
                iput(prev_inode);
        /* Waits for pending iput() in release_inode(). */
-       wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(
-                               &landlock_superblock(sb)->inode_refs));
+       wait_var_event(&landlock_superblock(sb)->inode_refs,
+                      !atomic_long_read(&landlock_superblock(sb)->inode_refs));
 }
 
 /*
@@ -459,8 +1032,8 @@ static void hook_sb_delete(struct super_block *const sb)
  * a dedicated user space option would be required (e.g. as a ruleset flag).
  */
 static int hook_sb_mount(const char *const dev_name,
-               const struct path *const path, const char *const type,
-               const unsigned long flags, void *const data)
+                        const struct path *const path, const char *const type,
+                        const unsigned long flags, void *const data)
 {
        if (!landlock_get_current_domain())
                return 0;
@@ -468,7 +1041,7 @@ static int hook_sb_mount(const char *const dev_name,
 }
 
 static int hook_move_mount(const struct path *const from_path,
-               const struct path *const to_path)
+                          const struct path *const to_path)
 {
        if (!landlock_get_current_domain())
                return 0;
@@ -502,7 +1075,7 @@ static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
  * view of the filesystem.
  */
 static int hook_sb_pivotroot(const struct path *const old_path,
-               const struct path *const new_path)
+                            const struct path *const new_path)
 {
        if (!landlock_get_current_domain())
                return 0;
@@ -511,97 +1084,34 @@ static int hook_sb_pivotroot(const struct path *const old_path,
 
 /* Path hooks */
 
-static inline u32 get_mode_access(const umode_t mode)
-{
-       switch (mode & S_IFMT) {
-       case S_IFLNK:
-               return LANDLOCK_ACCESS_FS_MAKE_SYM;
-       case 0:
-               /* A zero mode translates to S_IFREG. */
-       case S_IFREG:
-               return LANDLOCK_ACCESS_FS_MAKE_REG;
-       case S_IFDIR:
-               return LANDLOCK_ACCESS_FS_MAKE_DIR;
-       case S_IFCHR:
-               return LANDLOCK_ACCESS_FS_MAKE_CHAR;
-       case S_IFBLK:
-               return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
-       case S_IFIFO:
-               return LANDLOCK_ACCESS_FS_MAKE_FIFO;
-       case S_IFSOCK:
-               return LANDLOCK_ACCESS_FS_MAKE_SOCK;
-       default:
-               WARN_ON_ONCE(1);
-               return 0;
-       }
-}
-
-/*
- * Creating multiple links or renaming may lead to privilege escalations if not
- * handled properly.  Indeed, we must be sure that the source doesn't gain more
- * privileges by being accessible from the destination.  This is getting more
- * complex when dealing with multiple layers.  The whole picture can be seen as
- * a multilayer partial ordering problem.  A future version of Landlock will
- * deal with that.
- */
 static int hook_path_link(struct dentry *const old_dentry,
-               const struct path *const new_dir,
-               struct dentry *const new_dentry)
-{
-       const struct landlock_ruleset *const dom =
-               landlock_get_current_domain();
-
-       if (!dom)
-               return 0;
-       /* The mount points are the same for old and new paths, cf. EXDEV. */
-       if (old_dentry->d_parent != new_dir->dentry)
-               /* Gracefully forbids reparenting. */
-               return -EXDEV;
-       if (unlikely(d_is_negative(old_dentry)))
-               return -ENOENT;
-       return check_access_path(dom, new_dir,
-                       get_mode_access(d_backing_inode(old_dentry)->i_mode));
-}
-
-static inline u32 maybe_remove(const struct dentry *const dentry)
+                         const struct path *const new_dir,
+                         struct dentry *const new_dentry)
 {
-       if (d_is_negative(dentry))
-               return 0;
-       return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
-               LANDLOCK_ACCESS_FS_REMOVE_FILE;
+       return current_check_refer_path(old_dentry, new_dir, new_dentry, false,
+                                       false);
 }
 
 static int hook_path_rename(const struct path *const old_dir,
-               struct dentry *const old_dentry,
-               const struct path *const new_dir,
-               struct dentry *const new_dentry)
+                           struct dentry *const old_dentry,
+                           const struct path *const new_dir,
+                           struct dentry *const new_dentry,
+                           const unsigned int flags)
 {
-       const struct landlock_ruleset *const dom =
-               landlock_get_current_domain();
-
-       if (!dom)
-               return 0;
-       /* The mount points are the same for old and new paths, cf. EXDEV. */
-       if (old_dir->dentry != new_dir->dentry)
-               /* Gracefully forbids reparenting. */
-               return -EXDEV;
-       if (unlikely(d_is_negative(old_dentry)))
-               return -ENOENT;
-       /* RENAME_EXCHANGE is handled because directories are the same. */
-       return check_access_path(dom, old_dir, maybe_remove(old_dentry) |
-                       maybe_remove(new_dentry) |
-                       get_mode_access(d_backing_inode(old_dentry)->i_mode));
+       /* old_dir refers to old_dentry->d_parent and new_dir->mnt */
+       return current_check_refer_path(old_dentry, new_dir, new_dentry, true,
+                                       !!(flags & RENAME_EXCHANGE));
 }
 
 static int hook_path_mkdir(const struct path *const dir,
-               struct dentry *const dentry, const umode_t mode)
+                          struct dentry *const dentry, const umode_t mode)
 {
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
 }
 
 static int hook_path_mknod(const struct path *const dir,
-               struct dentry *const dentry, const umode_t mode,
-               const unsigned int dev)
+                          struct dentry *const dentry, const umode_t mode,
+                          const unsigned int dev)
 {
        const struct landlock_ruleset *const dom =
                landlock_get_current_domain();
@@ -612,28 +1122,29 @@ static int hook_path_mknod(const struct path *const dir,
 }
 
 static int hook_path_symlink(const struct path *const dir,
-               struct dentry *const dentry, const char *const old_name)
+                            struct dentry *const dentry,
+                            const char *const old_name)
 {
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
 }
 
 static int hook_path_unlink(const struct path *const dir,
-               struct dentry *const dentry)
+                           struct dentry *const dentry)
 {
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
 }
 
 static int hook_path_rmdir(const struct path *const dir,
-               struct dentry *const dentry)
+                          struct dentry *const dentry)
 {
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
 }
 
 /* File hooks */
 
-static inline u32 get_file_access(const struct file *const file)
+static inline access_mask_t get_file_access(const struct file *const file)
 {
-       u32 access = 0;
+       access_mask_t access = 0;
 
        if (file->f_mode & FMODE_READ) {
                /* A directory can only be opened in read mode. */
@@ -688,5 +1199,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
 __init void landlock_add_fs_hooks(void)
 {
        security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
-                       LANDLOCK_NAME);
+                          LANDLOCK_NAME);
 }
index 187284b421c9d88c13e454930f48ec7023db630e..8db7acf9109b6a8788a5b28392a727ef2b8626b8 100644 (file)
@@ -50,14 +50,14 @@ struct landlock_superblock_security {
        atomic_long_t inode_refs;
 };
 
-static inline struct landlock_inode_security *landlock_inode(
-               const struct inode *const inode)
+static inline struct landlock_inode_security *
+landlock_inode(const struct inode *const inode)
 {
        return inode->i_security + landlock_blob_sizes.lbs_inode;
 }
 
-static inline struct landlock_superblock_security *landlock_superblock(
-               const struct super_block *const superblock)
+static inline struct landlock_superblock_security *
+landlock_superblock(const struct super_block *const superblock)
 {
        return superblock->s_security + landlock_blob_sizes.lbs_superblock;
 }
@@ -65,6 +65,7 @@ static inline struct landlock_superblock_security *landlock_superblock(
 __init void landlock_add_fs_hooks(void);
 
 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
-               const struct path *const path, u32 access_hierarchy);
+                           const struct path *const path,
+                           access_mask_t access_hierarchy);
 
 #endif /* _SECURITY_LANDLOCK_FS_H */
index 2a0a1095ee27e617d60b6bc47c074553a0e481b6..b54184ab9439de9750ed63d2c654114ddf36d9ee 100644 (file)
@@ -9,13 +9,19 @@
 #ifndef _SECURITY_LANDLOCK_LIMITS_H
 #define _SECURITY_LANDLOCK_LIMITS_H
 
+#include <linux/bitops.h>
 #include <linux/limits.h>
 #include <uapi/linux/landlock.h>
 
-#define LANDLOCK_MAX_NUM_LAYERS                64
+/* clang-format off */
+
+#define LANDLOCK_MAX_NUM_LAYERS                16
 #define LANDLOCK_MAX_NUM_RULES         U32_MAX
 
-#define LANDLOCK_LAST_ACCESS_FS                LANDLOCK_ACCESS_FS_MAKE_SYM
+#define LANDLOCK_LAST_ACCESS_FS                LANDLOCK_ACCESS_FS_REFER
 #define LANDLOCK_MASK_ACCESS_FS                ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
+#define LANDLOCK_NUM_ACCESS_FS         __const_hweight64(LANDLOCK_MASK_ACCESS_FS)
+
+/* clang-format on */
 
 #endif /* _SECURITY_LANDLOCK_LIMITS_H */
index d674fdf9ff04f52e682c701b91c563d25a9481d2..1f50612f0185006838447a4ae42dbd247123d28a 100644 (file)
@@ -17,9 +17,9 @@
 
 #include "object.h"
 
-struct landlock_object *landlock_create_object(
-               const struct landlock_object_underops *const underops,
-               void *const underobj)
+struct landlock_object *
+landlock_create_object(const struct landlock_object_underops *const underops,
+                      void *const underobj)
 {
        struct landlock_object *new_object;
 
index 3f80674c6c8d3b1478bb1d58fa4ef528b1f30404..5f28c35e8aa8c6615f8941830e10b6b5f08801e9 100644 (file)
@@ -76,9 +76,9 @@ struct landlock_object {
        };
 };
 
-struct landlock_object *landlock_create_object(
-               const struct landlock_object_underops *const underops,
-               void *const underobj);
+struct landlock_object *
+landlock_create_object(const struct landlock_object_underops *const underops,
+                      void *const underobj);
 
 void landlock_put_object(struct landlock_object *const object);
 
index f55b82446de2196dbbe885eb5d1943f2dde0d15e..4c5b9cd71286125944d8658f36f2802a0e338b6a 100644 (file)
@@ -30,7 +30,7 @@
  * means a subset of) the @child domain.
  */
 static bool domain_scope_le(const struct landlock_ruleset *const parent,
-               const struct landlock_ruleset *const child)
+                           const struct landlock_ruleset *const child)
 {
        const struct landlock_hierarchy *walker;
 
@@ -48,7 +48,7 @@ static bool domain_scope_le(const struct landlock_ruleset *const parent,
 }
 
 static bool task_is_scoped(const struct task_struct *const parent,
-               const struct task_struct *const child)
+                          const struct task_struct *const child)
 {
        bool is_scoped;
        const struct landlock_ruleset *dom_parent, *dom_child;
@@ -62,7 +62,7 @@ static bool task_is_scoped(const struct task_struct *const parent,
 }
 
 static int task_ptrace(const struct task_struct *const parent,
-               const struct task_struct *const child)
+                      const struct task_struct *const child)
 {
        /* Quick return for non-landlocked tasks. */
        if (!landlocked(parent))
@@ -86,7 +86,7 @@ static int task_ptrace(const struct task_struct *const parent,
  * granted, -errno if denied.
  */
 static int hook_ptrace_access_check(struct task_struct *const child,
-               const unsigned int mode)
+                                   const unsigned int mode)
 {
        return task_ptrace(current, child);
 }
@@ -116,5 +116,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
 __init void landlock_add_ptrace_hooks(void)
 {
        security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
-                       LANDLOCK_NAME);
+                          LANDLOCK_NAME);
 }
index ec72b9262bf38ab5570692d1758d563aacedae4f..996484f98bfdedb435444458dd9bbc1705ea6f9f 100644 (file)
@@ -28,8 +28,9 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
 {
        struct landlock_ruleset *new_ruleset;
 
-       new_ruleset = kzalloc(struct_size(new_ruleset, fs_access_masks,
-                               num_layers), GFP_KERNEL_ACCOUNT);
+       new_ruleset =
+               kzalloc(struct_size(new_ruleset, fs_access_masks, num_layers),
+                       GFP_KERNEL_ACCOUNT);
        if (!new_ruleset)
                return ERR_PTR(-ENOMEM);
        refcount_set(&new_ruleset->usage, 1);
@@ -44,7 +45,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
        return new_ruleset;
 }
 
-struct landlock_ruleset *landlock_create_ruleset(const u32 fs_access_mask)
+struct landlock_ruleset *
+landlock_create_ruleset(const access_mask_t fs_access_mask)
 {
        struct landlock_ruleset *new_ruleset;
 
@@ -66,11 +68,10 @@ static void build_check_rule(void)
        BUILD_BUG_ON(rule.num_layers < LANDLOCK_MAX_NUM_LAYERS);
 }
 
-static struct landlock_rule *create_rule(
-               struct landlock_object *const object,
-               const struct landlock_layer (*const layers)[],
-               const u32 num_layers,
-               const struct landlock_layer *const new_layer)
+static struct landlock_rule *
+create_rule(struct landlock_object *const object,
+           const struct landlock_layer (*const layers)[], const u32 num_layers,
+           const struct landlock_layer *const new_layer)
 {
        struct landlock_rule *new_rule;
        u32 new_num_layers;
@@ -85,7 +86,7 @@ static struct landlock_rule *create_rule(
                new_num_layers = num_layers;
        }
        new_rule = kzalloc(struct_size(new_rule, layers, new_num_layers),
-                       GFP_KERNEL_ACCOUNT);
+                          GFP_KERNEL_ACCOUNT);
        if (!new_rule)
                return ERR_PTR(-ENOMEM);
        RB_CLEAR_NODE(&new_rule->node);
@@ -94,7 +95,7 @@ static struct landlock_rule *create_rule(
        new_rule->num_layers = new_num_layers;
        /* Copies the original layer stack. */
        memcpy(new_rule->layers, layers,
-                       flex_array_size(new_rule, layers, num_layers));
+              flex_array_size(new_rule, layers, num_layers));
        if (new_layer)
                /* Adds a copy of @new_layer on the layer stack. */
                new_rule->layers[new_rule->num_layers - 1] = *new_layer;
@@ -142,9 +143,9 @@ static void build_check_ruleset(void)
  * access rights.
  */
 static int insert_rule(struct landlock_ruleset *const ruleset,
-               struct landlock_object *const object,
-               const struct landlock_layer (*const layers)[],
-               size_t num_layers)
+                      struct landlock_object *const object,
+                      const struct landlock_layer (*const layers)[],
+                      size_t num_layers)
 {
        struct rb_node **walker_node;
        struct rb_node *parent_node = NULL;
@@ -156,8 +157,8 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
                return -ENOENT;
        walker_node = &(ruleset->root.rb_node);
        while (*walker_node) {
-               struct landlock_rule *const this = rb_entry(*walker_node,
-                               struct landlock_rule, node);
+               struct landlock_rule *const this =
+                       rb_entry(*walker_node, struct landlock_rule, node);
 
                if (this->object != object) {
                        parent_node = *walker_node;
@@ -194,7 +195,7 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
                 * ruleset and a domain.
                 */
                new_rule = create_rule(object, &this->layers, this->num_layers,
-                               &(*layers)[0]);
+                                      &(*layers)[0]);
                if (IS_ERR(new_rule))
                        return PTR_ERR(new_rule);
                rb_replace_node(&this->node, &new_rule->node, &ruleset->root);
@@ -228,13 +229,14 @@ static void build_check_layer(void)
 
 /* @ruleset must be locked by the caller. */
 int landlock_insert_rule(struct landlock_ruleset *const ruleset,
-               struct landlock_object *const object, const u32 access)
+                        struct landlock_object *const object,
+                        const access_mask_t access)
 {
-       struct landlock_layer layers[] = {{
+       struct landlock_layer layers[] = { {
                .access = access,
                /* When @level is zero, insert_rule() extends @ruleset. */
                .level = 0,
-       }};
+       } };
 
        build_check_layer();
        return insert_rule(ruleset, object, &layers, ARRAY_SIZE(layers));
@@ -257,7 +259,7 @@ static void put_hierarchy(struct landlock_hierarchy *hierarchy)
 }
 
 static int merge_ruleset(struct landlock_ruleset *const dst,
-               struct landlock_ruleset *const src)
+                        struct landlock_ruleset *const src)
 {
        struct landlock_rule *walker_rule, *next_rule;
        int err = 0;
@@ -282,11 +284,11 @@ static int merge_ruleset(struct landlock_ruleset *const dst,
        dst->fs_access_masks[dst->num_layers - 1] = src->fs_access_masks[0];
 
        /* Merges the @src tree. */
-       rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
-                       &src->root, node) {
-               struct landlock_layer layers[] = {{
+       rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, &src->root,
+                                            node) {
+               struct landlock_layer layers[] = { {
                        .level = dst->num_layers,
-               }};
+               } };
 
                if (WARN_ON_ONCE(walker_rule->num_layers != 1)) {
                        err = -EINVAL;
@@ -298,7 +300,7 @@ static int merge_ruleset(struct landlock_ruleset *const dst,
                }
                layers[0].access = walker_rule->layers[0].access;
                err = insert_rule(dst, walker_rule->object, &layers,
-                               ARRAY_SIZE(layers));
+                                 ARRAY_SIZE(layers));
                if (err)
                        goto out_unlock;
        }
@@ -310,7 +312,7 @@ out_unlock:
 }
 
 static int inherit_ruleset(struct landlock_ruleset *const parent,
-               struct landlock_ruleset *const child)
+                          struct landlock_ruleset *const child)
 {
        struct landlock_rule *walker_rule, *next_rule;
        int err = 0;
@@ -325,9 +327,10 @@ static int inherit_ruleset(struct landlock_ruleset *const parent,
 
        /* Copies the @parent tree. */
        rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
-                       &parent->root, node) {
+                                            &parent->root, node) {
                err = insert_rule(child, walker_rule->object,
-                               &walker_rule->layers, walker_rule->num_layers);
+                                 &walker_rule->layers,
+                                 walker_rule->num_layers);
                if (err)
                        goto out_unlock;
        }
@@ -338,7 +341,7 @@ static int inherit_ruleset(struct landlock_ruleset *const parent,
        }
        /* Copies the parent layer stack and leaves a space for the new layer. */
        memcpy(child->fs_access_masks, parent->fs_access_masks,
-                       flex_array_size(parent, fs_access_masks, parent->num_layers));
+              flex_array_size(parent, fs_access_masks, parent->num_layers));
 
        if (WARN_ON_ONCE(!parent->hierarchy)) {
                err = -EINVAL;
@@ -358,8 +361,7 @@ static void free_ruleset(struct landlock_ruleset *const ruleset)
        struct landlock_rule *freeme, *next;
 
        might_sleep();
-       rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root,
-                       node)
+       rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root, node)
                free_rule(freeme);
        put_hierarchy(ruleset->hierarchy);
        kfree(ruleset);
@@ -397,9 +399,9 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
  * Returns the intersection of @parent and @ruleset, or returns @parent if
  * @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
  */
-struct landlock_ruleset *landlock_merge_ruleset(
-               struct landlock_ruleset *const parent,
-               struct landlock_ruleset *const ruleset)
+struct landlock_ruleset *
+landlock_merge_ruleset(struct landlock_ruleset *const parent,
+                      struct landlock_ruleset *const ruleset)
 {
        struct landlock_ruleset *new_dom;
        u32 num_layers;
@@ -421,8 +423,8 @@ struct landlock_ruleset *landlock_merge_ruleset(
        new_dom = create_ruleset(num_layers);
        if (IS_ERR(new_dom))
                return new_dom;
-       new_dom->hierarchy = kzalloc(sizeof(*new_dom->hierarchy),
-                       GFP_KERNEL_ACCOUNT);
+       new_dom->hierarchy =
+               kzalloc(sizeof(*new_dom->hierarchy), GFP_KERNEL_ACCOUNT);
        if (!new_dom->hierarchy) {
                err = -ENOMEM;
                goto out_put_dom;
@@ -449,9 +451,9 @@ out_put_dom:
 /*
  * The returned access has the same lifetime as @ruleset.
  */
-const struct landlock_rule *landlock_find_rule(
-               const struct landlock_ruleset *const ruleset,
-               const struct landlock_object *const object)
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+                  const struct landlock_object *const object)
 {
        const struct rb_node *node;
 
@@ -459,8 +461,8 @@ const struct landlock_rule *landlock_find_rule(
                return NULL;
        node = ruleset->root.rb_node;
        while (node) {
-               struct landlock_rule *this = rb_entry(node,
-                               struct landlock_rule, node);
+               struct landlock_rule *this =
+                       rb_entry(node, struct landlock_rule, node);
 
                if (this->object == object)
                        return this;
index 2d3ed7ec5a0ab5114ee897e3f2c65641bbc491a7..d43231b783e4fbb422935bff0a9da074ed765d98 100644 (file)
@@ -9,13 +9,26 @@
 #ifndef _SECURITY_LANDLOCK_RULESET_H
 #define _SECURITY_LANDLOCK_RULESET_H
 
+#include <linux/bitops.h>
+#include <linux/build_bug.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
 
+#include "limits.h"
 #include "object.h"
 
+typedef u16 access_mask_t;
+/* Makes sure all filesystem access rights can be stored. */
+static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
+/* Makes sure for_each_set_bit() and for_each_clear_bit() calls are OK. */
+static_assert(sizeof(unsigned long) >= sizeof(access_mask_t));
+
+typedef u16 layer_mask_t;
+/* Makes sure all layers can be checked. */
+static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS);
+
 /**
  * struct landlock_layer - Access rights for a given layer
  */
@@ -28,7 +41,7 @@ struct landlock_layer {
         * @access: Bitfield of allowed actions on the kernel object.  They are
         * relative to the object type (e.g. %LANDLOCK_ACTION_FS_READ).
         */
-       u16 access;
+       access_mask_t access;
 };
 
 /**
@@ -135,26 +148,28 @@ struct landlock_ruleset {
                         * layers are set once and never changed for the
                         * lifetime of the ruleset.
                         */
-                       u16 fs_access_masks[];
+                       access_mask_t fs_access_masks[];
                };
        };
 };
 
-struct landlock_ruleset *landlock_create_ruleset(const u32 fs_access_mask);
+struct landlock_ruleset *
+landlock_create_ruleset(const access_mask_t fs_access_mask);
 
 void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
 void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
 
 int landlock_insert_rule(struct landlock_ruleset *const ruleset,
-               struct landlock_object *const object, const u32 access);
+                        struct landlock_object *const object,
+                        const access_mask_t access);
 
-struct landlock_ruleset *landlock_merge_ruleset(
-               struct landlock_ruleset *const parent,
-               struct landlock_ruleset *const ruleset);
+struct landlock_ruleset *
+landlock_merge_ruleset(struct landlock_ruleset *const parent,
+                      struct landlock_ruleset *const ruleset);
 
-const struct landlock_rule *landlock_find_rule(
-               const struct landlock_ruleset *const ruleset,
-               const struct landlock_object *const object);
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+                  const struct landlock_object *const object);
 
 static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
 {
index 7e27ce394020de5cb2ef9854a88616e9b80ed999..735a0865ea113512488cc1ec988c0690c3bc0bd6 100644 (file)
  * @src: User space pointer or NULL.
  * @usize: (Alleged) size of the data pointed to by @src.
  */
-static __always_inline int copy_min_struct_from_user(void *const dst,
-               const size_t ksize, const size_t ksize_min,
-               const void __user *const src, const size_t usize)
+static __always_inline int
+copy_min_struct_from_user(void *const dst, const size_t ksize,
+                         const size_t ksize_min, const void __user *const src,
+                         const size_t usize)
 {
        /* Checks buffer inconsistencies. */
        BUILD_BUG_ON(!dst);
@@ -93,7 +94,7 @@ static void build_check_abi(void)
 /* Ruleset handling */
 
 static int fop_ruleset_release(struct inode *const inode,
-               struct file *const filp)
+                              struct file *const filp)
 {
        struct landlock_ruleset *ruleset = filp->private_data;
 
@@ -102,15 +103,15 @@ static int fop_ruleset_release(struct inode *const inode,
 }
 
 static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
-               const size_t size, loff_t *const ppos)
+                             const size_t size, loff_t *const ppos)
 {
        /* Dummy handler to enable FMODE_CAN_READ. */
        return -EINVAL;
 }
 
 static ssize_t fop_dummy_write(struct file *const filp,
-               const char __user *const buf, const size_t size,
-               loff_t *const ppos)
+                              const char __user *const buf, const size_t size,
+                              loff_t *const ppos)
 {
        /* Dummy handler to enable FMODE_CAN_WRITE. */
        return -EINVAL;
@@ -128,7 +129,7 @@ static const struct file_operations ruleset_fops = {
        .write = fop_dummy_write,
 };
 
-#define LANDLOCK_ABI_VERSION   1
+#define LANDLOCK_ABI_VERSION 2
 
 /**
  * sys_landlock_create_ruleset - Create a new ruleset
@@ -168,22 +169,23 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
                return -EOPNOTSUPP;
 
        if (flags) {
-               if ((flags == LANDLOCK_CREATE_RULESET_VERSION)
-                               && !attr && !size)
+               if ((flags == LANDLOCK_CREATE_RULESET_VERSION) && !attr &&
+                   !size)
                        return LANDLOCK_ABI_VERSION;
                return -EINVAL;
        }
 
        /* Copies raw user space buffer. */
        err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr),
-                       offsetofend(typeof(ruleset_attr), handled_access_fs),
-                       attr, size);
+                                       offsetofend(typeof(ruleset_attr),
+                                                   handled_access_fs),
+                                       attr, size);
        if (err)
                return err;
 
        /* Checks content (and 32-bits cast). */
        if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) !=
-                       LANDLOCK_MASK_ACCESS_FS)
+           LANDLOCK_MASK_ACCESS_FS)
                return -EINVAL;
 
        /* Checks arguments and transforms to kernel struct. */
@@ -193,7 +195,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
 
        /* Creates anonymous FD referring to the ruleset. */
        ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
-                       ruleset, O_RDWR | O_CLOEXEC);
+                                     ruleset, O_RDWR | O_CLOEXEC);
        if (ruleset_fd < 0)
                landlock_put_ruleset(ruleset);
        return ruleset_fd;
@@ -204,7 +206,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
  * landlock_put_ruleset() on the return value.
  */
 static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
-               const fmode_t mode)
+                                                   const fmode_t mode)
 {
        struct fd ruleset_f;
        struct landlock_ruleset *ruleset;
@@ -244,8 +246,8 @@ static int get_path_from_fd(const s32 fd, struct path *const path)
        struct fd f;
        int err = 0;
 
-       BUILD_BUG_ON(!__same_type(fd,
-               ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
+       BUILD_BUG_ON(!__same_type(
+               fd, ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
 
        /* Handles O_PATH. */
        f = fdget_raw(fd);
@@ -257,10 +259,10 @@ static int get_path_from_fd(const s32 fd, struct path *const path)
         * pipefs).
         */
        if ((f.file->f_op == &ruleset_fops) ||
-                       (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
-                       (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
-                       d_is_negative(f.file->f_path.dentry) ||
-                       IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
+           (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
+           (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
+           d_is_negative(f.file->f_path.dentry) ||
+           IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
                err = -EBADFD;
                goto out_fdput;
        }
@@ -290,19 +292,18 @@ out_fdput:
  *
  * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
  * - EINVAL: @flags is not 0, or inconsistent access in the rule (i.e.
- *   &landlock_path_beneath_attr.allowed_access is not a subset of the rule's
- *   accesses);
+ *   &landlock_path_beneath_attr.allowed_access is not a subset of the
+ *   ruleset handled accesses);
  * - ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access);
  * - EBADF: @ruleset_fd is not a file descriptor for the current thread, or a
  *   member of @rule_attr is not a file descriptor as expected;
  * - EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of
- *   @rule_attr is not the expected file descriptor type (e.g. file open
- *   without O_PATH);
+ *   @rule_attr is not the expected file descriptor type;
  * - EPERM: @ruleset_fd has no write access to the underlying ruleset;
  * - EFAULT: @rule_attr inconsistency.
  */
-SYSCALL_DEFINE4(landlock_add_rule,
-               const int, ruleset_fd, const enum landlock_rule_type, rule_type,
+SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
+               const enum landlock_rule_type, rule_type,
                const void __user *const, rule_attr, const __u32, flags)
 {
        struct landlock_path_beneath_attr path_beneath_attr;
@@ -317,20 +318,24 @@ SYSCALL_DEFINE4(landlock_add_rule,
        if (flags)
                return -EINVAL;
 
-       if (rule_type != LANDLOCK_RULE_PATH_BENEATH)
-               return -EINVAL;
-
-       /* Copies raw user space buffer, only one type for now. */
-       res = copy_from_user(&path_beneath_attr, rule_attr,
-                       sizeof(path_beneath_attr));
-       if (res)
-               return -EFAULT;
-
        /* Gets and checks the ruleset. */
        ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
        if (IS_ERR(ruleset))
                return PTR_ERR(ruleset);
 
+       if (rule_type != LANDLOCK_RULE_PATH_BENEATH) {
+               err = -EINVAL;
+               goto out_put_ruleset;
+       }
+
+       /* Copies raw user space buffer, only one type for now. */
+       res = copy_from_user(&path_beneath_attr, rule_attr,
+                            sizeof(path_beneath_attr));
+       if (res) {
+               err = -EFAULT;
+               goto out_put_ruleset;
+       }
+
        /*
         * Informs about useless rule: empty allowed_access (i.e. deny rules)
         * are ignored in path walks.
@@ -344,7 +349,7 @@ SYSCALL_DEFINE4(landlock_add_rule,
         * (ruleset->fs_access_masks[0] is automatically upgraded to 64-bits).
         */
        if ((path_beneath_attr.allowed_access | ruleset->fs_access_masks[0]) !=
-                       ruleset->fs_access_masks[0]) {
+           ruleset->fs_access_masks[0]) {
                err = -EINVAL;
                goto out_put_ruleset;
        }
@@ -356,7 +361,7 @@ SYSCALL_DEFINE4(landlock_add_rule,
 
        /* Imports the new rule. */
        err = landlock_append_fs_rule(ruleset, &path,
-                       path_beneath_attr.allowed_access);
+                                     path_beneath_attr.allowed_access);
        path_put(&path);
 
 out_put_ruleset:
@@ -389,8 +394,8 @@ out_put_ruleset:
  * - E2BIG: The maximum number of stacked rulesets is reached for the current
  *   thread.
  */
-SYSCALL_DEFINE2(landlock_restrict_self,
-               const int, ruleset_fd, const __u32, flags)
+SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
+               flags)
 {
        struct landlock_ruleset *new_dom, *ruleset;
        struct cred *new_cred;
@@ -400,18 +405,18 @@ SYSCALL_DEFINE2(landlock_restrict_self,
        if (!landlock_initialized)
                return -EOPNOTSUPP;
 
-       /* No flag for now. */
-       if (flags)
-               return -EINVAL;
-
        /*
         * Similar checks as for seccomp(2), except that an -EPERM may be
         * returned.
         */
        if (!task_no_new_privs(current) &&
-                       !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
                return -EPERM;
 
+       /* No flag for now. */
+       if (flags)
+               return -EINVAL;
+
        /* Gets and checks the ruleset. */
        ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
        if (IS_ERR(ruleset))
index b12f7d986b1e3bdb04c0c63a36bd200280048802..ad4e6756c03863df854f0b85625a3da7d45082f0 100644 (file)
@@ -78,11 +78,8 @@ static void check_pinning_enforcement(struct super_block *mnt_sb)
         * device, allow sysctl to change modes for testing.
         */
        if (mnt_sb->s_bdev) {
-               char bdev[BDEVNAME_SIZE];
-
                ro = bdev_read_only(mnt_sb->s_bdev);
-               bdevname(mnt_sb->s_bdev, bdev);
-               pr_info("%s (%u:%u): %s\n", bdev,
+               pr_info("%pg (%u:%u): %s\n", mnt_sb->s_bdev,
                        MAJOR(mnt_sb->s_bdev->bd_dev),
                        MINOR(mnt_sb->s_bdev->bd_dev),
                        ro ? "read-only" : "writable");
index 1897cbf6fc6905f86912d391c9a51fad2cff83d9..78a278f28e49ab0a59ffffbc81bd61bcc4bc28de 100644 (file)
@@ -433,6 +433,9 @@ static void dump_common_audit_data(struct audit_buffer *ab,
                audit_log_format(ab, " lockdown_reason=\"%s\"",
                                 lockdown_reasons[a->u.reason]);
                break;
+       case LSM_AUDIT_DATA_ANONINODE:
+               audit_log_format(ab, " anonclass=%s", a->u.anonclass);
+               break;
        } /* switch (a->type) */
 }
 
index b7cf5cbfdc677a37017e1e493f76d5644365bbf0..188b8f7822206042dfbefb2764e611029a0b78a0 100644 (file)
@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
        [LOCKDOWN_DEBUGFS] = "debugfs access",
        [LOCKDOWN_XMON_WR] = "xmon write access",
        [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
+       [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
        [LOCKDOWN_INTEGRITY_MAX] = "integrity",
        [LOCKDOWN_KCORE] = "/proc/kcore access",
        [LOCKDOWN_KPROBES] = "use of kprobes",
        [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
+       [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
        [LOCKDOWN_PERF] = "unsafe use of perf",
        [LOCKDOWN_TRACEFS] = "use of tracefs",
        [LOCKDOWN_XMON_RW] = "xmon read and write access",
@@ -365,13 +367,12 @@ static void __init ordered_lsm_init(void)
 
 int __init early_security_init(void)
 {
-       int i;
-       struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
        struct lsm_info *lsm;
 
-       for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
-            i++)
-               INIT_HLIST_HEAD(&list[i]);
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
+       INIT_HLIST_HEAD(&security_hook_heads.NAME);
+#include "linux/lsm_hook_defs.h"
+#undef LSM_HOOK
 
        for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
                if (!lsm->enabled)
@@ -478,7 +479,7 @@ static int lsm_append(const char *new, char **result)
  * Each LSM has to register its hooks with the infrastructure.
  */
 void __init security_add_hooks(struct security_hook_list *hooks, int count,
-                               char *lsm)
+                               const char *lsm)
 {
        int i;
 
@@ -1197,15 +1198,8 @@ int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
                     (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry)))))
                return 0;
 
-       if (flags & RENAME_EXCHANGE) {
-               int err = call_int_hook(path_rename, 0, new_dir, new_dentry,
-                                       old_dir, old_dentry);
-               if (err)
-                       return err;
-       }
-
        return call_int_hook(path_rename, 0, old_dir, old_dentry, new_dir,
-                               new_dentry);
+                               new_dentry, flags);
 }
 EXPORT_SYMBOL(security_path_rename);
 
index abcd9740d10f0cf12a58106b6fac44a6f003197e..9a43af0ebd7de7685f557a8d1ebb54153c3f5938 100644 (file)
@@ -668,7 +668,7 @@ static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
        struct common_audit_data *ad = a;
        struct selinux_audit_data *sad = ad->selinux_audit_data;
        u32 av = sad->audited;
-       const char **perms;
+       const char *const *perms;
        int i, perm;
 
        audit_log_format(ab, "avc:  %s ", sad->denied ? "denied" : "granted");
@@ -1059,7 +1059,7 @@ int avc_has_extended_perms(struct selinux_state *state,
 
        node = avc_lookup(state->avc, ssid, tsid, tclass);
        if (unlikely(!node)) {
-               node = avc_compute_av(state, ssid, tsid, tclass, &avd, xp_node);
+               avc_compute_av(state, ssid, tsid, tclass, &avd, xp_node);
        } else {
                memcpy(&avd, &node->ae.avd, sizeof(avd));
                xp_node = node->ae.xp_node;
@@ -1151,7 +1151,7 @@ inline int avc_has_perm_noaudit(struct selinux_state *state,
 
        node = avc_lookup(state->avc, ssid, tsid, tclass);
        if (unlikely(!node))
-               node = avc_compute_av(state, ssid, tsid, tclass, avd, &xp_node);
+               avc_compute_av(state, ssid, tsid, tclass, avd, &xp_node);
        else
                memcpy(avd, &node->ae.avd, sizeof(*avd));
 
index e9e959343de98db83a35d8b0592fe4129dc5a0c1..beceb89f68d9cb5277c4cfcb091153965b4f8d51 100644 (file)
@@ -145,7 +145,7 @@ static int __init checkreqprot_setup(char *str)
        if (!kstrtoul(str, 0, &checkreqprot)) {
                selinux_checkreqprot_boot = checkreqprot ? 1 : 0;
                if (checkreqprot)
-                       pr_warn("SELinux: checkreqprot set to 1 via kernel parameter.  This is deprecated and will be rejected in a future kernel release.\n");
+                       pr_err("SELinux: checkreqprot set to 1 via kernel parameter.  This is deprecated and will be rejected in a future kernel release.\n");
        }
        return 1;
 }
@@ -2964,8 +2964,8 @@ static int selinux_inode_init_security_anon(struct inode *inode,
         * allowed to actually create this type of anonymous inode.
         */
 
-       ad.type = LSM_AUDIT_DATA_INODE;
-       ad.u.inode = inode;
+       ad.type = LSM_AUDIT_DATA_ANONINODE;
+       ad.u.anonclass = name ? (const char *)name->name : "?";
 
        return avc_has_perm(&selinux_state,
                            tsec->sid,
@@ -6487,7 +6487,6 @@ static int selinux_setprocattr(const char *name, void *value, size_t size)
                        goto abort_change;
 
                /* Only allow single threaded processes to change context */
-               error = -EPERM;
                if (!current_is_single_threaded()) {
                        error = security_bounded_transition(&selinux_state,
                                                            tsec->sid, sid);
@@ -7294,6 +7293,8 @@ static __init int selinux_init(void)
 
        memset(&selinux_state, 0, sizeof(selinux_state));
        enforcing_set(&selinux_state, selinux_enforcing_boot);
+       if (CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE)
+               pr_err("SELinux: CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE is non-zero.  This is deprecated and will be rejected in a future kernel release.\n");
        checkreqprot_set(&selinux_state, selinux_checkreqprot_boot);
        selinux_avc_init(&selinux_state.avc);
        mutex_init(&selinux_state.status_lock);
index 073a3d34a0d21783639a2b8d44e7b1ef29b6e527..1cba83d17f415a435fe7a5d9af8002c94c0a26cc 100644 (file)
@@ -12,6 +12,9 @@
 #ifndef _SELINUX_AUDIT_H
 #define _SELINUX_AUDIT_H
 
+#include <linux/audit.h>
+#include <linux/types.h>
+
 /**
  *     selinux_audit_rule_init - alloc/init an selinux audit rule structure.
  *     @field: the field this rule refers to
@@ -51,7 +54,7 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule);
  *     @rule: rule to be checked
  *     Returns 1 if there are selinux fields specified in the rule, 0 otherwise.
  */
-int selinux_audit_rule_known(struct audit_krule *krule);
+int selinux_audit_rule_known(struct audit_krule *rule);
 
 #endif /* _SELINUX_AUDIT_H */
 
index 00f78be48283685b1782227147df7ff8ed8fc7b3..2b372f98f2d7c97248e1c0eeba0915213124b5b4 100644 (file)
@@ -104,6 +104,7 @@ int slow_avc_audit(struct selinux_state *state,
 
 /**
  * avc_audit - Audit the granting or denial of permissions.
+ * @state: SELinux state
  * @ssid: source security identifier
  * @tsid: target security identifier
  * @tclass: target security class
index 88c384c5c09e667bc9ce845c6d54b1b6277a7927..42912c917fd40c53b4278cd5e820ac3e4686add4 100644 (file)
@@ -7,7 +7,7 @@
 #ifndef _SELINUX_AVC_SS_H_
 #define _SELINUX_AVC_SS_H_
 
-#include "flask.h"
+#include <linux/types.h>
 
 struct selinux_avc;
 int avc_ss_reset(struct selinux_avc *avc, u32 seqno);
@@ -18,7 +18,7 @@ struct security_class_mapping {
        const char *perms[sizeof(u32) * 8 + 1];
 };
 
-extern struct security_class_mapping secclass_map[];
+extern const struct security_class_mapping secclass_map[];
 
 #endif /* _SELINUX_AVC_SS_H_ */
 
index 35aac62a662e067bd9af8fa75fe68ff79cb18fb4..ff757ae5f25379ee7fb49887abc0524715a7661b 100644 (file)
@@ -38,7 +38,7 @@
  * Note: The name for any socket class should be suffixed by "socket",
  *      and doesn't contain more than one substr of "socket".
  */
-struct security_class_mapping secclass_map[] = {
+const struct security_class_mapping secclass_map[] = {
        { "security",
          { "compute_av", "compute_create", "compute_member",
            "check_context", "load_policy", "compute_relabel",
index e6ac1d23320b6728cc1ccd5521daea599d2207b4..c992f83b0aae00ec467acb2cd87d42b195ab7435 100644 (file)
@@ -14,6 +14,8 @@
 #ifndef _SELINUX_IB_PKEY_H
 #define _SELINUX_IB_PKEY_H
 
+#include <linux/types.h>
+
 #ifdef CONFIG_SECURITY_INFINIBAND
 void sel_ib_pkey_flush(void);
 int sel_ib_pkey_sid(u64 subnet_prefix, u16 pkey, u32 *sid);
index 5d332aeb8b6c616dabd371b2aaf1b9b61d0ed62e..60820517aa438652b70bc307f7fe1eecd7d52fd2 100644 (file)
@@ -1,6 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-static const char *initial_sid_to_string[] =
-{
+static const char *const initial_sid_to_string[] = {
        NULL,
        "kernel",
        "security",
index e3f784a85840209a4b63bffbb5d4b6c06b3be895..9b8b655a8cd3154e1ec16af654aa453c9c0e5009 100644 (file)
@@ -17,6 +17,8 @@
 #ifndef _SELINUX_NETNODE_H
 #define _SELINUX_NETNODE_H
 
+#include <linux/types.h>
+
 void sel_netnode_flush(void);
 
 int sel_netnode_sid(void *addr, u16 family, u32 *sid);
index 31bc16e29cd1bc6c674763ff73d9575e5526469b..9096a8289948a4524dcb53eec50266b400fa7445 100644 (file)
@@ -16,6 +16,8 @@
 #ifndef _SELINUX_NETPORT_H
 #define _SELINUX_NETPORT_H
 
+#include <linux/types.h>
+
 void sel_netport_flush(void);
 
 int sel_netport_sid(u8 protocol, u16 pnum, u32 *sid);
index 2680aa21205ce2c43db315c9fdf26fb4fa9353e2..f35d3458e71deddc1550d443ba56dbd3936e2410 100644 (file)
@@ -16,6 +16,6 @@ enum {
 };
 #define POLICYDB_CAP_MAX (__POLICYDB_CAP_MAX - 1)
 
-extern const char *selinux_policycap_names[__POLICYDB_CAP_MAX];
+extern const char *const selinux_policycap_names[__POLICYDB_CAP_MAX];
 
 #endif /* _SELINUX_POLICYCAP_H_ */
index 100da7d043dbf9a2409e5344596668b52a1084cc..2a87fc3702b81a59f71e1eabc786ac8f1a305dd3 100644 (file)
@@ -5,7 +5,7 @@
 #include "policycap.h"
 
 /* Policy capability names */
-const char *selinux_policycap_names[__POLICYDB_CAP_MAX] = {
+const char *const selinux_policycap_names[__POLICYDB_CAP_MAX] = {
        "network_peer_controls",
        "open_perms",
        "extended_socket_class",
index ace4bd13e8084502a844c119737bde7ed102627f..393aff41d3ef89db689738b45b17f9d8c5958fb4 100644 (file)
@@ -16,6 +16,8 @@
 #include <linux/rcupdate.h>
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/printk.h>
 #include "flask.h"
 #include "policycap.h"
 
@@ -150,6 +152,8 @@ static inline bool checkreqprot_get(const struct selinux_state *state)
 
 static inline void checkreqprot_set(struct selinux_state *state, bool value)
 {
+       if (value)
+               pr_err("SELinux: https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-checkreqprot\n");
        WRITE_ONCE(state->checkreqprot, value);
 }
 
index 74159400eeee7a43ade2000440620f0392b0de85..c758398602007157aecb2fd6e48e8dfec3515dd0 100644 (file)
@@ -8,7 +8,9 @@
 #ifndef _SELINUX_XFRM_H_
 #define _SELINUX_XFRM_H_
 
+#include <linux/lsm_audit.h>
 #include <net/flow.h>
+#include <net/xfrm.h>
 
 int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
                              struct xfrm_user_sec_ctx *uctx,
index d8ceee9e0d6f82eaa1b20c8c976e6c8ba230f076..2ee7b4ed43ef818c1898aa36621476ce448db795 100644 (file)
@@ -25,8 +25,7 @@ struct nlmsg_perm {
        u32     perm;
 };
 
-static const struct nlmsg_perm nlmsg_route_perms[] =
-{
+static const struct nlmsg_perm nlmsg_route_perms[] = {
        { RTM_NEWLINK,          NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_DELLINK,          NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
        { RTM_GETLINK,          NETLINK_ROUTE_SOCKET__NLMSG_READ  },
@@ -97,16 +96,14 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
        { RTM_GETTUNNEL,        NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
-static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
-{
+static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = {
        { TCPDIAG_GETSOCK,      NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
        { DCCPDIAG_GETSOCK,     NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
        { SOCK_DIAG_BY_FAMILY,  NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
        { SOCK_DESTROY,         NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE },
 };
 
-static const struct nlmsg_perm nlmsg_xfrm_perms[] =
-{
+static const struct nlmsg_perm nlmsg_xfrm_perms[] = {
        { XFRM_MSG_NEWSA,       NETLINK_XFRM_SOCKET__NLMSG_WRITE },
        { XFRM_MSG_DELSA,       NETLINK_XFRM_SOCKET__NLMSG_WRITE },
        { XFRM_MSG_GETSA,       NETLINK_XFRM_SOCKET__NLMSG_READ  },
@@ -134,8 +131,7 @@ static const struct nlmsg_perm nlmsg_xfrm_perms[] =
        { XFRM_MSG_GETDEFAULT,  NETLINK_XFRM_SOCKET__NLMSG_READ  },
 };
 
-static const struct nlmsg_perm nlmsg_audit_perms[] =
-{
+static const struct nlmsg_perm nlmsg_audit_perms[] = {
        { AUDIT_GET,            NETLINK_AUDIT_SOCKET__NLMSG_READ     },
        { AUDIT_SET,            NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
        { AUDIT_LIST,           NETLINK_AUDIT_SOCKET__NLMSG_READPRIV },
index 097c6d866ec4db19e2c799c865164d2a71378706..8fcdd494af273f402a073d948cb6d6879c98cc12 100644 (file)
@@ -293,6 +293,8 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf,
         *       kernel releases until eventually it is removed
         */
        pr_err("SELinux:  Runtime disable is deprecated, use selinux=0 on the kernel cmdline.\n");
+       pr_err("SELinux:  https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable\n");
+       ssleep(5);
 
        if (count >= PAGE_SIZE)
                return -ENOMEM;
@@ -755,11 +757,13 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf,
                char comm[sizeof(current->comm)];
 
                memcpy(comm, current->comm, sizeof(comm));
-               pr_warn_once("SELinux: %s (%d) set checkreqprot to 1. This is deprecated and will be rejected in a future kernel release.\n",
-                            comm, current->pid);
+               pr_err("SELinux: %s (%d) set checkreqprot to 1. This is deprecated and will be rejected in a future kernel release.\n",
+                      comm, current->pid);
        }
 
        checkreqprot_set(fsi->state, (new_value ? 1 : 0));
+       if (new_value)
+               ssleep(5);
        length = count;
 
        selinux_ima_measure_state(fsi->state);
index cfdae20792e116a83a4478bd6d12a79fcc27fd27..8480ec6c6e75f69699da6ce7951a732561ce172d 100644 (file)
@@ -40,15 +40,15 @@ static inline int avtab_hash(const struct avtab_key *keyp, u32 mask)
 
        u32 hash = 0;
 
-#define mix(input) { \
-       u32 v = input; \
-       v *= c1; \
-       v = (v << r1) | (v >> (32 - r1)); \
-       v *= c2; \
-       hash ^= v; \
-       hash = (hash << r2) | (hash >> (32 - r2)); \
-       hash = hash * m + n; \
-}
+#define mix(input) do { \
+               u32 v = input; \
+               v *= c1; \
+               v = (v << r1) | (v >> (32 - r1)); \
+               v *= c2; \
+               hash ^= v; \
+               hash = (hash << r2) | (hash >> (32 - r2)); \
+               hash = hash * m + n; \
+       } while (0)
 
        mix(keyp->target_class);
        mix(keyp->target_type);
@@ -385,7 +385,7 @@ void avtab_hash_eval(struct avtab *h, char *tag)
               chain2_len_sum);
 }
 
-static uint16_t spec_order[] = {
+static const uint16_t spec_order[] = {
        AVTAB_ALLOWED,
        AVTAB_AUDITDENY,
        AVTAB_AUDITALLOW,
index 0ae4e4e57a401ebb36f09ba59a4e7b60b9c67846..3fb8f9026e9be80dfa448212052f18d7b9f67f1c 100644 (file)
@@ -179,7 +179,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
                        kmem_cache_free(hashtab_node_cachep, cur);
                }
        }
-       kmem_cache_free(hashtab_node_cachep, new);
+       kfree(new->htable);
+       memset(new, 0, sizeof(*new));
        return -ENOMEM;
 }
 
index d036e1238e77182d5203fa6ca874fd37333291b8..adcfb63b3550db98d0151c78fbd99588437125c3 100644 (file)
@@ -61,7 +61,7 @@ struct policydb_compat_info {
 };
 
 /* These need to be updated if SYM_NUM or OCON_NUM changes */
-static struct policydb_compat_info policydb_compat[] = {
+static const struct policydb_compat_info policydb_compat[] = {
        {
                .version        = POLICYDB_VERSION_BASE,
                .sym_num        = SYM_NUM - 3,
@@ -159,18 +159,16 @@ static struct policydb_compat_info policydb_compat[] = {
        },
 };
 
-static struct policydb_compat_info *policydb_lookup_compat(int version)
+static const struct policydb_compat_info *policydb_lookup_compat(int version)
 {
        int i;
-       struct policydb_compat_info *info = NULL;
 
        for (i = 0; i < ARRAY_SIZE(policydb_compat); i++) {
-               if (policydb_compat[i].version == version) {
-                       info = &policydb_compat[i];
-                       break;
-               }
+               if (policydb_compat[i].version == version)
+                       return &policydb_compat[i];
        }
-       return info;
+
+       return NULL;
 }
 
 /*
@@ -314,8 +312,7 @@ static int cat_destroy(void *key, void *datum, void *p)
        return 0;
 }
 
-static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) =
-{
+static int (*const destroy_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
        common_destroy,
        cls_destroy,
        role_destroy,
@@ -670,8 +667,7 @@ static int cat_index(void *key, void *datum, void *datap)
        return 0;
 }
 
-static int (*index_f[SYM_NUM]) (void *key, void *datum, void *datap) =
-{
+static int (*const index_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
        common_index,
        class_index,
        role_index,
@@ -1639,8 +1635,8 @@ bad:
        return rc;
 }
 
-static int (*read_f[SYM_NUM]) (struct policydb *p, struct symtab *s, void *fp) =
-{
+static int (*const read_f[SYM_NUM]) (struct policydb *p,
+                                    struct symtab *s, void *fp) = {
        common_read,
        class_read,
        role_read,
@@ -2211,7 +2207,7 @@ out:
        return rc;
 }
 
-static int ocontext_read(struct policydb *p, struct policydb_compat_info *info,
+static int ocontext_read(struct policydb *p, const struct policydb_compat_info *info,
                         void *fp)
 {
        int i, j, rc;
@@ -2407,7 +2403,7 @@ int policydb_read(struct policydb *p, void *fp)
        u32 len, nprim, nel, perm;
 
        char *policydb_str;
-       struct policydb_compat_info *info;
+       const struct policydb_compat_info *info;
 
        policydb_init(p);
 
@@ -3241,9 +3237,7 @@ static int user_write(void *vkey, void *datum, void *ptr)
        return 0;
 }
 
-static int (*write_f[SYM_NUM]) (void *key, void *datum,
-                               void *datap) =
-{
+static int (*const write_f[SYM_NUM]) (void *key, void *datum, void *datap) = {
        common_write,
        class_write,
        role_write,
@@ -3254,7 +3248,7 @@ static int (*write_f[SYM_NUM]) (void *key, void *datum,
        cat_write,
 };
 
-static int ocontext_write(struct policydb *p, struct policydb_compat_info *info,
+static int ocontext_write(struct policydb *p, const struct policydb_compat_info *info,
                          void *fp)
 {
        unsigned int i, j, rc;
@@ -3611,7 +3605,7 @@ int policydb_write(struct policydb *p, void *fp)
        __le32 buf[4];
        u32 config;
        size_t len;
-       struct policydb_compat_info *info;
+       const struct policydb_compat_info *info;
 
        /*
         * refuse to write policy older than compressed avtab
index 6901dc07680de727bc924b9054d87492972b56f0..69b2734311a69e7df415b1fd373aa4e84c430666 100644 (file)
@@ -99,7 +99,7 @@ static void context_struct_compute_av(struct policydb *policydb,
                                      struct extended_perms *xperms);
 
 static int selinux_set_mapping(struct policydb *pol,
-                              struct security_class_mapping *map,
+                              const struct security_class_mapping *map,
                               struct selinux_map *out_map)
 {
        u16 i, j;
@@ -121,7 +121,7 @@ static int selinux_set_mapping(struct policydb *pol,
        /* Store the raw class and permission values */
        j = 0;
        while (map[j].name) {
-               struct security_class_mapping *p_in = map + (j++);
+               const struct security_class_mapping *p_in = map + (j++);
                struct selinux_mapping *p_out = out_map->mapping + j;
 
                /* An empty class string skips ahead */
@@ -358,27 +358,27 @@ static int constraint_expr_eval(struct policydb *policydb,
                                l2 = &(tcontext->range.level[1]);
                                goto mls_ops;
 mls_ops:
-                       switch (e->op) {
-                       case CEXPR_EQ:
-                               s[++sp] = mls_level_eq(l1, l2);
-                               continue;
-                       case CEXPR_NEQ:
-                               s[++sp] = !mls_level_eq(l1, l2);
-                               continue;
-                       case CEXPR_DOM:
-                               s[++sp] = mls_level_dom(l1, l2);
-                               continue;
-                       case CEXPR_DOMBY:
-                               s[++sp] = mls_level_dom(l2, l1);
-                               continue;
-                       case CEXPR_INCOMP:
-                               s[++sp] = mls_level_incomp(l2, l1);
-                               continue;
-                       default:
-                               BUG();
-                               return 0;
-                       }
-                       break;
+                               switch (e->op) {
+                               case CEXPR_EQ:
+                                       s[++sp] = mls_level_eq(l1, l2);
+                                       continue;
+                               case CEXPR_NEQ:
+                                       s[++sp] = !mls_level_eq(l1, l2);
+                                       continue;
+                               case CEXPR_DOM:
+                                       s[++sp] = mls_level_dom(l1, l2);
+                                       continue;
+                               case CEXPR_DOMBY:
+                                       s[++sp] = mls_level_dom(l2, l1);
+                                       continue;
+                               case CEXPR_INCOMP:
+                                       s[++sp] = mls_level_incomp(l2, l1);
+                                       continue;
+                               default:
+                                       BUG();
+                                       return 0;
+                               }
+                               break;
                        default:
                                BUG();
                                return 0;
@@ -2980,7 +2980,6 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
        }
 
 retry:
-       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
index 658eab05599e6db8fb08ab37a8965b71da7dc265..9e61014073cc847902caaeed7386196a0a21409a 100644 (file)
@@ -1192,7 +1192,6 @@ static ssize_t smk_write_net4addr(struct file *file, const char __user *buf,
                        rc = -EINVAL;
                        goto free_out;
                }
-               m = BEBITS;
                masks = 32;
        }
        if (masks > BEBITS) {
index b6a31901f289477793b29af2456cd3ee18b225e0..71e82d855ebfca43bc3d9f0bb7a702bbcd0ce023 100644 (file)
@@ -264,17 +264,26 @@ static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_di
  * @old_dentry: Pointer to "struct dentry".
  * @new_parent: Pointer to "struct path".
  * @new_dentry: Pointer to "struct dentry".
+ * @flags: Rename options.
  *
  * Returns 0 on success, negative value otherwise.
  */
 static int tomoyo_path_rename(const struct path *old_parent,
                              struct dentry *old_dentry,
                              const struct path *new_parent,
-                             struct dentry *new_dentry)
+                             struct dentry *new_dentry,
+                             const unsigned int flags)
 {
        struct path path1 = { .mnt = old_parent->mnt, .dentry = old_dentry };
        struct path path2 = { .mnt = new_parent->mnt, .dentry = new_dentry };
 
+       if (flags & RENAME_EXCHANGE) {
+               const int err = tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path2,
+                               &path1);
+
+               if (err)
+                       return err;
+       }
        return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2);
 }
 
index 69cbc79fbb716a902961aedd7e383004be18985e..2aaaa68071744ec9c7e25062788dbb821255cdb6 100644 (file)
@@ -1094,7 +1094,8 @@ wavefront_send_sample (snd_wavefront_t *dev,
 
                        if (dataptr < data_end) {
                
-                               __get_user (sample_short, dataptr);
+                               if (get_user(sample_short, dataptr))
+                                       return -EFAULT;
                                dataptr += skip;
                
                                if (data_is_unsigned) { /* GUS ? */
index cf531c1efa132433ba9414d3f1e44bf54d3f380a..ad292df7d805cae2e70d9875e4e00822051c85c0 100644 (file)
@@ -937,6 +937,9 @@ static int alc_init(struct hda_codec *codec)
        return 0;
 }
 
+#define alc_free       snd_hda_gen_free
+
+#ifdef CONFIG_PM
 static inline void alc_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
@@ -950,9 +953,6 @@ static inline void alc_shutup(struct hda_codec *codec)
                alc_shutup_pins(codec);
 }
 
-#define alc_free       snd_hda_gen_free
-
-#ifdef CONFIG_PM
 static void alc_power_eapd(struct hda_codec *codec)
 {
        alc_auto_setup_eapd(codec, false);
@@ -966,9 +966,7 @@ static int alc_suspend(struct hda_codec *codec)
                spec->power_hook(codec);
        return 0;
 }
-#endif
 
-#ifdef CONFIG_PM
 static int alc_resume(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
@@ -6780,6 +6778,41 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec,
        }
 }
 
+static void alc_fixup_dell4_mic_no_presence_quiet(struct hda_codec *codec,
+                                                 const struct hda_fixup *fix,
+                                                 int action)
+{
+       struct alc_spec *spec = codec->spec;
+       struct hda_input_mux *imux = &spec->gen.input_mux;
+       int i;
+
+       alc269_fixup_limit_int_mic_boost(codec, fix, action);
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               /**
+                * Set the vref of pin 0x19 (Headset Mic) and pin 0x1b (Headphone Mic)
+                * to Hi-Z to avoid pop noises at startup and when plugging and
+                * unplugging headphones.
+                */
+               snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ);
+               snd_hda_codec_set_pin_target(codec, 0x1b, PIN_VREFHIZ);
+               break;
+       case HDA_FIXUP_ACT_PROBE:
+               /**
+                * Make the internal mic (0x12) the default input source to
+                * prevent pop noises on cold boot.
+                */
+               for (i = 0; i < imux->num_items; i++) {
+                       if (spec->gen.imux_pins[i] == 0x12) {
+                               spec->gen.cur_mux[0] = i;
+                               break;
+                       }
+               }
+               break;
+       }
+}
+
 enum {
        ALC269_FIXUP_GPIO2,
        ALC269_FIXUP_SONY_VAIO,
@@ -6821,6 +6854,7 @@ enum {
        ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
        ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
        ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+       ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET,
        ALC269_FIXUP_HEADSET_MODE,
        ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
        ALC269_FIXUP_ASPIRE_HEADSET_MIC,
@@ -7012,6 +7046,7 @@ enum {
        ALC245_FIXUP_CS35L41_SPI_4,
        ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED,
        ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
+       ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8808,6 +8843,21 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC285_FIXUP_HP_MUTE_LED,
        },
+       [ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_dell4_mic_no_presence_quiet,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+       },
+       [ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x02a1112c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8898,6 +8948,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1028, 0x0a38, "Dell Latitude 7520", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET),
        SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK),
        SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK),
@@ -9040,6 +9091,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8995, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x103c, 0x89a4, "HP ProBook 440 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89a6, "HP ProBook 450 G9", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89ac, "HP EliteBook 640 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89ae, "HP EliteBook 650 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9290,6 +9342,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
        SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
        SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS),
+       SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
        SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
@@ -9297,6 +9357,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
        SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
        SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
+       SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
 
 #if 0
        /* Below is a quirk table taken from the old code.
index 0ea39565e62329c98bf283bcff25826c0fd0a11b..40a5e3eb4ef262ec0b589b3bf0c5aa7f1ed0632a 100644 (file)
@@ -3235,6 +3235,15 @@ YAMAHA_DEVICE(0x7010, "UB99"),
        }
 },
 
+/* Rane SL-1 */
+{
+       USB_DEVICE(0x13e5, 0x0001),
+       .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_AUDIO_STANDARD_INTERFACE
+        }
+},
+
 /* disabled due to regression for other devices;
  * see https://bugzilla.kernel.org/show_bug.cgi?id=199905
  */
index ab9f3da49941fc97f715e9270044f1cdd12ff291..fbbe59054c3fb1da77878095f8d74319d9bced41 100644 (file)
@@ -1822,6 +1822,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_IGNORE_CTL_ERROR),
        DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */
                   QUIRK_FLAG_IGNORE_CTL_ERROR),
+       DEVICE_FLG(0x0711, 0x5800, /* MCT Trigger 5 USB-to-HDMI */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
        DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */
                   QUIRK_FLAG_GET_SAMPLE_RATE),
        DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */
index ecbc294fc59a1cda405a6b50bb48df36ca3487e5..26d1b3987887c698f80fb50c1cbc2c56e8b2f6da 100644 (file)
@@ -143,12 +143,12 @@ void xen_snd_front_evtchnl_flush(struct xen_snd_front_evtchnl *channel)
 static void evtchnl_free(struct xen_snd_front_info *front_info,
                         struct xen_snd_front_evtchnl *channel)
 {
-       unsigned long page = 0;
+       void *page = NULL;
 
        if (channel->type == EVTCHNL_TYPE_REQ)
-               page = (unsigned long)channel->u.req.ring.sring;
+               page = channel->u.req.ring.sring;
        else if (channel->type == EVTCHNL_TYPE_EVT)
-               page = (unsigned long)channel->u.evt.page;
+               page = channel->u.evt.page;
 
        if (!page)
                return;
@@ -167,10 +167,7 @@ static void evtchnl_free(struct xen_snd_front_info *front_info,
                xenbus_free_evtchn(front_info->xb_dev, channel->port);
 
        /* End access and free the page. */
-       if (channel->gref != GRANT_INVALID_REF)
-               gnttab_end_foreign_access(channel->gref, page);
-       else
-               free_page(page);
+       xenbus_teardown_ring(&page, 1, &channel->gref);
 
        memset(channel, 0, sizeof(*channel));
 }
@@ -196,8 +193,7 @@ static int evtchnl_alloc(struct xen_snd_front_info *front_info, int index,
                         enum xen_snd_front_evtchnl_type type)
 {
        struct xenbus_device *xb_dev = front_info->xb_dev;
-       unsigned long page;
-       grant_ref_t gref;
+       void *page;
        irq_handler_t handler;
        char *handler_name = NULL;
        int ret;
@@ -207,12 +203,9 @@ static int evtchnl_alloc(struct xen_snd_front_info *front_info, int index,
        channel->index = index;
        channel->front_info = front_info;
        channel->state = EVTCHNL_STATE_DISCONNECTED;
-       channel->gref = GRANT_INVALID_REF;
-       page = get_zeroed_page(GFP_KERNEL);
-       if (!page) {
-               ret = -ENOMEM;
+       ret = xenbus_setup_ring(xb_dev, GFP_KERNEL, &page, 1, &channel->gref);
+       if (ret)
                goto fail;
-       }
 
        handler_name = kasprintf(GFP_KERNEL, "%s-%s", XENSND_DRIVER_NAME,
                                 type == EVTCHNL_TYPE_REQ ?
@@ -226,33 +219,18 @@ static int evtchnl_alloc(struct xen_snd_front_info *front_info, int index,
        mutex_init(&channel->ring_io_lock);
 
        if (type == EVTCHNL_TYPE_REQ) {
-               struct xen_sndif_sring *sring = (struct xen_sndif_sring *)page;
+               struct xen_sndif_sring *sring = page;
 
                init_completion(&channel->u.req.completion);
                mutex_init(&channel->u.req.req_io_lock);
-               SHARED_RING_INIT(sring);
-               FRONT_RING_INIT(&channel->u.req.ring, sring, XEN_PAGE_SIZE);
-
-               ret = xenbus_grant_ring(xb_dev, sring, 1, &gref);
-               if (ret < 0) {
-                       channel->u.req.ring.sring = NULL;
-                       goto fail;
-               }
+               XEN_FRONT_RING_INIT(&channel->u.req.ring, sring, XEN_PAGE_SIZE);
 
                handler = evtchnl_interrupt_req;
        } else {
-               ret = gnttab_grant_foreign_access(xb_dev->otherend_id,
-                                                 virt_to_gfn((void *)page), 0);
-               if (ret < 0)
-                       goto fail;
-
-               channel->u.evt.page = (struct xensnd_event_page *)page;
-               gref = ret;
+               channel->u.evt.page = page;
                handler = evtchnl_interrupt_evt;
        }
 
-       channel->gref = gref;
-
        ret = xenbus_alloc_evtchn(xb_dev, &channel->port);
        if (ret < 0)
                goto fail;
@@ -279,8 +257,6 @@ static int evtchnl_alloc(struct xen_snd_front_info *front_info, int index,
        return 0;
 
 fail:
-       if (page)
-               free_page(page);
        kfree(handler_name);
        dev_err(&xb_dev->dev, "Failed to allocate ring: %d\n", ret);
        return ret;
index cbe51fd1ec1522055415973d4218e212b2efc7e1..3675fba705647dcf7f9732136ca072fee115b682 100644 (file)
 
 struct xen_snd_front_info;
 
-#ifndef GRANT_INVALID_REF
-/*
- * FIXME: usage of grant reference 0 as invalid grant reference:
- * grant reference 0 is valid, but never exposed to a PV driver,
- * because of the fact it is already in use/reserved by the PV console.
- */
-#define GRANT_INVALID_REF      0
-#endif
-
 /* Timeout in ms to wait for backend to respond. */
 #define VSND_WAIT_BACK_MS      3000
 
index db2f7b8ebed59a0d067c48e5dc19f3ac9927d9e2..c074e42fd92f589b730fa2f169f27616987a242b 100644 (file)
@@ -24,6 +24,7 @@ help:
        @echo '  intel-speed-select     - Intel Speed Select tool'
        @echo '  kvm_stat               - top-like utility for displaying kvm statistics'
        @echo '  leds                   - LEDs  tools'
+       @echo '  nolibc                 - nolibc headers testing and installation'
        @echo '  objtool                - an ELF object analysis tool'
        @echo '  pci                    - PCI tools'
        @echo '  perf                   - Linux performance measurement and analysis tool'
@@ -31,6 +32,9 @@ help:
        @echo '  bootconfig             - boot config tool'
        @echo '  spi                    - spi tools'
        @echo '  tmon                   - thermal monitoring and tuning tool'
+       @echo '  thermometer            - temperature capture tool'
+       @echo '  thermal-engine         - thermal monitoring tool'
+       @echo '  thermal                - thermal library'
        @echo '  tracing                - misc tracing tools'
        @echo '  turbostat              - Intel CPU idle stats and freq reporting tool'
        @echo '  usb                    - USB testing tools'
@@ -74,6 +78,9 @@ bpf/%: FORCE
 libapi: FORCE
        $(call descend,lib/api)
 
+nolibc_%: FORCE
+       $(call descend,include/nolibc,$(patsubst nolibc_%,%,$@))
+
 # The perf build does not follow the descend function setup,
 # invoking it via it's own make rule.
 PERF_O   = $(if $(O),$(O)/tools/perf,)
@@ -85,12 +92,21 @@ perf: FORCE
 selftests: FORCE
        $(call descend,testing/$@)
 
+thermal: FORCE
+       $(call descend,lib/$@)
+
 turbostat x86_energy_perf_policy intel-speed-select: FORCE
        $(call descend,power/x86/$@)
 
 tmon: FORCE
        $(call descend,thermal/$@)
 
+thermometer: FORCE
+       $(call descend,thermal/$@)
+
+thermal-engine: FORCE thermal
+       $(call descend,thermal/$@)
+
 freefall: FORCE
        $(call descend,laptop/$@)
 
@@ -101,7 +117,7 @@ all: acpi cgroup counter cpupower gpio hv firewire \
                perf selftests bootconfig spi turbostat usb \
                virtio vm bpf x86_energy_perf_policy \
                tmon freefall iio objtool kvm_stat wmi \
-               pci debugging tracing
+               pci debugging tracing thermal thermometer thermal-engine
 
 acpi_install:
        $(call descend,power/$(@:_install=),install)
@@ -115,12 +131,21 @@ cgroup_install counter_install firewire_install gpio_install hv_install iio_inst
 selftests_install:
        $(call descend,testing/$(@:_install=),install)
 
+thermal_install:
+       $(call descend,lib/$(@:_install=),install)
+
 turbostat_install x86_energy_perf_policy_install intel-speed-select_install:
        $(call descend,power/x86/$(@:_install=),install)
 
 tmon_install:
        $(call descend,thermal/$(@:_install=),install)
 
+thermometer_install:
+       $(call descend,thermal/$(@:_install=),install)
+
+thermal-engine_install:
+       $(call descend,thermal/$(@:_install=),install)
+
 freefall_install:
        $(call descend,laptop/$(@:_install=),install)
 
@@ -133,7 +158,7 @@ install: acpi_install cgroup_install counter_install cpupower_install gpio_insta
                virtio_install vm_install bpf_install x86_energy_perf_policy_install \
                tmon_install freefall_install objtool_install kvm_stat_install \
                wmi_install pci_install debugging_install intel-speed-select_install \
-               tracing_install
+               tracing_install thermometer_install thermal-engine_install
 
 acpi_clean:
        $(call descend,power/acpi,clean)
@@ -160,9 +185,18 @@ perf_clean:
 selftests_clean:
        $(call descend,testing/$(@:_clean=),clean)
 
+thermal_clean:
+       $(call descend,lib/thermal,clean)
+
 turbostat_clean x86_energy_perf_policy_clean intel-speed-select_clean:
        $(call descend,power/x86/$(@:_clean=),clean)
 
+thermometer_clean:
+       $(call descend,thermal/thermometer,clean)
+
+thermal-engine_clean:
+       $(call descend,thermal/thermal-engine,clean)
+
 tmon_clean:
        $(call descend,thermal/tmon,clean)
 
@@ -177,6 +211,6 @@ clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_cl
                vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
                freefall_clean build_clean libbpf_clean libsubcmd_clean \
                gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
-               intel-speed-select_clean tracing_clean
+               intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean
 
 .PHONY: FORCE
index 174e7d83fcbdb22f32d523ad0355e310d33701ec..765e9e752d038ed67e8a3a2f69ee2f6dea6f6631 100644 (file)
@@ -49,7 +49,7 @@ union ibs_op_ctl {
        };
 };
 
-/* MSR 0xc0011035: IBS Op Data 2 */
+/* MSR 0xc0011035: IBS Op Data 1 */
 union ibs_op_data {
        __u64 val;
        struct {
index 1231d63f836d81386f122c1022a7f5a25d503ef1..1ae0fab7d902b0a722563cccf89aa770e4eb7f35 100644 (file)
  * cpu_feature_enabled().
  */
 
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP  0
-#else
-# define DISABLE_SMAP  (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
 #ifdef CONFIG_X86_UMIP
 # define DISABLE_UMIP  0
 #else
@@ -80,7 +74,7 @@
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 (DISABLE_PTI)
 #define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK9 (DISABLE_SGX)
 #define DISABLED_MASK10        0
 #define DISABLED_MASK11        0
 #define DISABLED_MASK12        0
index ae61f464043a11fbe78d7fcebb5f2266a1b291a7..c6a48d0ef9ff06489a7a32340f2ee90e1b159efc 100644 (file)
@@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA :=                  \
          llvm-version                   \
          clang                          \
          libbpf                         \
+         libbpf-btf__load_from_kernel_by_id \
          libpfm4                        \
          libdebuginfod                 \
          clang-bpf-co-re
index de66e1cc073481c5f840ea2a6146ac0c4fe2f9cb..cb4a2a4fa2e48ebf444a0b1604b0a2619b26a58c 100644 (file)
@@ -57,6 +57,7 @@ FILES=                                          \
          test-lzma.bin                          \
          test-bpf.bin                           \
          test-libbpf.bin                        \
+         test-libbpf-btf__load_from_kernel_by_id.bin   \
          test-get_cpuid.bin                     \
          test-sdt.bin                           \
          test-cxx.bin                           \
@@ -287,6 +288,9 @@ $(OUTPUT)test-bpf.bin:
 $(OUTPUT)test-libbpf.bin:
        $(BUILD) -lbpf
 
+$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
+       $(BUILD) -lbpf
+
 $(OUTPUT)test-sdt.bin:
        $(BUILD)
 
diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
new file mode 100644 (file)
index 0000000..f7c0844
--- /dev/null
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+       return btf__load_from_kernel_by_id(20151128, NULL);
+}
diff --git a/tools/certs/print-cert-tbs-hash.sh b/tools/certs/print-cert-tbs-hash.sh
new file mode 100755 (executable)
index 0000000..c93df53
--- /dev/null
@@ -0,0 +1,91 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2020, Microsoft Corporation. All rights reserved.
+#
+# Author: Mickaël Salaün <mic@linux.microsoft.com>
+#
+# Compute and print the To Be Signed (TBS) hash of a certificate.  This is used
+# as description of keys in the blacklist keyring to identify certificates.
+# This output should be redirected, without newline, in a file (hash0.txt) and
+# signed to create a PKCS#7 file (hash0.p7s).  Both of these files can then be
+# loaded in the kernel with.
+#
+# Exemple on a workstation:
+# ./print-cert-tbs-hash.sh certificate-to-invalidate.pem > hash0.txt
+# openssl smime -sign -in hash0.txt -inkey builtin-private-key.pem \
+#               -signer builtin-certificate.pem -certfile certificate-chain.pem \
+#               -noattr -binary -outform DER -out hash0.p7s
+#
+# Exemple on a managed system:
+# keyctl padd blacklist "$(< hash0.txt)" %:.blacklist < hash0.p7s
+
+set -u -e -o pipefail
+
+CERT="${1:-}"
+BASENAME="$(basename -- "${BASH_SOURCE[0]}")"
+
+if [ $# -ne 1 ] || [ ! -f "${CERT}" ]; then
+       echo "usage: ${BASENAME} <certificate>" >&2
+       exit 1
+fi
+
+# Checks that it is indeed a certificate (PEM or DER encoded) and exclude the
+# optional PEM text header.
+if ! PEM="$(openssl x509 -inform DER -in "${CERT}" 2>/dev/null || openssl x509 -in "${CERT}")"; then
+       echo "ERROR: Failed to parse certificate" >&2
+       exit 1
+fi
+
+# TBSCertificate starts at the second entry.
+# Cf. https://tools.ietf.org/html/rfc3280#section-4.1
+#
+# Exemple of first lines printed by openssl asn1parse:
+#    0:d=0  hl=4 l= 763 cons: SEQUENCE
+#    4:d=1  hl=4 l= 483 cons: SEQUENCE
+#    8:d=2  hl=2 l=   3 cons: cont [ 0 ]
+#   10:d=3  hl=2 l=   1 prim: INTEGER           :02
+#   13:d=2  hl=2 l=  20 prim: INTEGER           :3CEB2CB8818D968AC00EEFE195F0DF9665328B7B
+#   35:d=2  hl=2 l=  13 cons: SEQUENCE
+#   37:d=3  hl=2 l=   9 prim: OBJECT            :sha256WithRSAEncryption
+RANGE_AND_DIGEST_RE='
+2s/^\s*\([0-9]\+\):d=\s*[0-9]\+\s\+hl=\s*[0-9]\+\s\+l=\s*\([0-9]\+\)\s\+cons:\s*SEQUENCE\s*$/\1 \2/p;
+7s/^\s*[0-9]\+:d=\s*[0-9]\+\s\+hl=\s*[0-9]\+\s\+l=\s*[0-9]\+\s\+prim:\s*OBJECT\s*:\(.*\)$/\1/p;
+'
+
+RANGE_AND_DIGEST=($(echo "${PEM}" | \
+       openssl asn1parse -in - | \
+       sed -n -e "${RANGE_AND_DIGEST_RE}"))
+
+if [ "${#RANGE_AND_DIGEST[@]}" != 3 ]; then
+       echo "ERROR: Failed to parse TBSCertificate." >&2
+       exit 1
+fi
+
+OFFSET="${RANGE_AND_DIGEST[0]}"
+END="$(( OFFSET + RANGE_AND_DIGEST[1] ))"
+DIGEST="${RANGE_AND_DIGEST[2]}"
+
+# The signature hash algorithm is used by Linux to blacklist certificates.
+# Cf. crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo()
+DIGEST_MATCH=""
+while read -r DIGEST_ITEM; do
+       if [ -z "${DIGEST_ITEM}" ]; then
+               break
+       fi
+       if echo "${DIGEST}" | grep -qiF "${DIGEST_ITEM}"; then
+               DIGEST_MATCH="${DIGEST_ITEM}"
+               break
+       fi
+done < <(openssl list -digest-commands | tr ' ' '\n' | sort -ur)
+
+if [ -z "${DIGEST_MATCH}" ]; then
+       echo "ERROR: Unknown digest algorithm: ${DIGEST}" >&2
+       exit 1
+fi
+
+echo "${PEM}" | \
+       openssl x509 -in - -outform DER | \
+       dd "bs=1" "skip=${OFFSET}" "count=${END}" "status=none" | \
+       openssl dgst "-${DIGEST_MATCH}" - | \
+       awk '{printf "tbs:" $2}'
index 586d35720f135f62b9d7434beccacb2e22a5bd2d..6491fa8fba6d5028eb55cc7c827d162ee7578ef3 100644 (file)
@@ -38,7 +38,9 @@ struct unwind_hint {
 #define UNWIND_HINT_TYPE_REGS_PARTIAL  2
 #define UNWIND_HINT_TYPE_FUNC          3
 
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_OBJTOOL
+
+#include <asm/asm.h>
 
 #ifndef __ASSEMBLY__
 
@@ -137,7 +139,7 @@ struct unwind_hint {
 
 .macro STACK_FRAME_NON_STANDARD func:req
        .pushsection .discard.func_stack_frame_non_standard, "aw"
-               .long \func - .
+       _ASM_PTR \func
        .popsection
 .endm
 
@@ -157,7 +159,7 @@ struct unwind_hint {
 
 #endif /* __ASSEMBLY__ */
 
-#else /* !CONFIG_STACK_VALIDATION */
+#else /* !CONFIG_OBJTOOL */
 
 #ifndef __ASSEMBLY__
 
@@ -179,6 +181,6 @@ struct unwind_hint {
 .endm
 #endif
 
-#endif /* CONFIG_STACK_VALIDATION */
+#endif /* CONFIG_OBJTOOL */
 
 #endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
new file mode 100644 (file)
index 0000000..7a16d91
--- /dev/null
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc installation and tests
+include ../../scripts/Makefile.include
+
+# we're in ".../tools/include/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR)))
+endif
+
+nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
+arch_file := arch-$(nolibc_arch).h
+all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
+             sys.h time.h types.h unistd.h
+
+# install all headers needed to support a bare-metal compiler
+all:
+
+# Note: when ARCH is "x86" we concatenate both x86_64 and i386
+headers:
+       $(Q)mkdir -p $(OUTPUT)sysroot
+       $(Q)mkdir -p $(OUTPUT)sysroot/include
+       $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
+       $(Q)if [ "$(ARCH)" = "x86" ]; then      \
+               sed -e                          \
+                 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
+                 arch-x86_64.h;                \
+               sed -e                          \
+                 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \
+                 arch-i386.h;                  \
+       elif [ -e "$(arch_file)" ]; then        \
+               cat $(arch_file);               \
+       else                                    \
+               echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
+               exit 1;                         \
+       fi > $(OUTPUT)sysroot/include/arch.h
+
+headers_standalone: headers
+       $(Q)$(MAKE) -C $(srctree) headers
+       $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)/sysroot
+
+clean:
+       $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
new file mode 100644 (file)
index 0000000..f68baf8
--- /dev/null
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * AARCH64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_AARCH64_H
+#define _NOLIBC_ARCH_AARCH64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY    0x4000
+
+/* The struct returned by the newfstatat() syscall. Differs slightly from the
+ * x86_64's stat one by field ordering, so be careful.
+ */
+struct sys_stat_struct {
+       unsigned long   st_dev;
+       unsigned long   st_ino;
+       unsigned int    st_mode;
+       unsigned int    st_nlink;
+       unsigned int    st_uid;
+       unsigned int    st_gid;
+
+       unsigned long   st_rdev;
+       unsigned long   __pad1;
+       long            st_size;
+       int             st_blksize;
+       int             __pad2;
+
+       long            st_blocks;
+       long            st_atime;
+       unsigned long   st_atime_nsec;
+       long            st_mtime;
+
+       unsigned long   st_mtime_nsec;
+       long            st_ctime;
+       unsigned long   st_ctime_nsec;
+       unsigned int    __unused[2];
+};
+
+/* Syscalls for AARCH64 :
+ *   - registers are 64-bit
+ *   - stack is 16-byte aligned
+ *   - syscall number is passed in x8
+ *   - arguments are in x0, x1, x2, x3, x4, x5
+ *   - the system call is performed by calling svc 0
+ *   - syscall return comes in x0.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *
+ * On aarch64, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1),                                                 \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r" (_arg1)                                                \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
+       register long _arg6 __asm__ ("x5") = (long)(arg6);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r" (_arg1)                                                \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_arg6), "r"(_num)                                       \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    "ldr x0, [sp]\n"              // argc (x0) was in the stack
+    "add x1, sp, 8\n"             // argv (x1) = sp
+    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
+    "add x2, x2, 8\n"             //           + 8 (skip null)
+    "add x2, x2, x1\n"            //           + argv
+    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
+    "bl main\n"                   // main() returns the status code, we'll exit with it.
+    "mov x8, 93\n"                // NR_exit == 93
+    "svc #0\n"
+    "");
+
+#endif // _NOLIBC_ARCH_AARCH64_H
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
new file mode 100644 (file)
index 0000000..f31be8e
--- /dev/null
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ARM specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_ARM_H
+#define _NOLIBC_ARCH_ARM_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY    0x4000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array). In big endian, the format
+ * differs as devices are returned as short only.
+ */
+struct sys_stat_struct {
+#if defined(__ARMEB__)
+       unsigned short st_dev;
+       unsigned short __pad1;
+#else
+       unsigned long  st_dev;
+#endif
+       unsigned long  st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+
+#if defined(__ARMEB__)
+       unsigned short st_rdev;
+       unsigned short __pad2;
+#else
+       unsigned long  st_rdev;
+#endif
+       unsigned long  st_size;
+       unsigned long  st_blksize;
+       unsigned long  st_blocks;
+
+       unsigned long  st_atime;
+       unsigned long  st_atime_nsec;
+       unsigned long  st_mtime;
+       unsigned long  st_mtime_nsec;
+
+       unsigned long  st_ctime;
+       unsigned long  st_ctime_nsec;
+       unsigned long  __unused[2];
+};
+
+/* Syscalls for ARM in ARM or Thumb modes :
+ *   - registers are 32-bit
+ *   - stack is 8-byte aligned
+ *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+ *   - syscall number is passed in r7
+ *   - arguments are in r0, r1, r2, r3, r4, r5
+ *   - the system call is performed by calling svc #0
+ *   - syscall return comes in r0.
+ *   - only lr is clobbered.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, ARM supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1),                                                 \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r" (_arg1)                                                \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+#if defined(__THUMBEB__) || defined(__THUMBEL__)
+    /* We enter here in 32-bit mode but if some previous functions were in
+     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+     * 32-bit now, then switch to 16-bit (is there a better way to do it than
+     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+     * it generates correct instructions. Note that we do not support thumb1.
+     */
+    ".code 32\n"
+    "add     r0, pc, #1\n"
+    "bx      r0\n"
+    ".code 16\n"
+#endif
+    "pop {%r0}\n"                 // argc was in the stack
+    "mov %r1, %sp\n"              // argv = sp
+    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+    "add %r2, %r2, $4\n"          //        ... + 4
+    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
+    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
+    "bl main\n"                   // main() returns the status code, we'll exit with it.
+    "movs r7, $1\n"               // NR_exit == 1
+    "svc $0x00\n"
+    "");
+
+#endif // _NOLIBC_ARCH_ARM_H
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
new file mode 100644 (file)
index 0000000..d7e7212
--- /dev/null
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * i386 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_I386_H
+#define _NOLIBC_ARCH_I386_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array).
+ */
+struct sys_stat_struct {
+       unsigned long  st_dev;
+       unsigned long  st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+
+       unsigned long  st_rdev;
+       unsigned long  st_size;
+       unsigned long  st_blksize;
+       unsigned long  st_blocks;
+
+       unsigned long  st_atime;
+       unsigned long  st_atime_nsec;
+       unsigned long  st_mtime;
+       unsigned long  st_mtime_nsec;
+
+       unsigned long  st_ctime;
+       unsigned long  st_ctime_nsec;
+       unsigned long  __unused[2];
+};
+
+/* Syscalls for i386 :
+ *   - mostly similar to x86_64
+ *   - registers are 32-bit
+ *   - syscall number is passed in eax
+ *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ *   - all registers are preserved (except eax of course)
+ *   - the system call is performed by calling int $0x80
+ *   - syscall return comes in eax
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1),                                                 \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
+       register long _arg5 __asm__ ("edi") = (long)(arg5);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)   \
+({                                                             \
+       long _eax  = (long)(num);                               \
+       long _arg6 = (long)(arg6); /* Always in memory */       \
+       __asm__ volatile (                                      \
+               "pushl  %[_arg6]\n\t"                           \
+               "pushl  %%ebp\n\t"                              \
+               "movl   4(%%esp),%%ebp\n\t"                     \
+               "int    $0x80\n\t"                              \
+               "popl   %%ebp\n\t"                              \
+               "addl   $4,%%esp\n\t"                           \
+               : "+a"(_eax)            /* %eax */              \
+               : "b"(arg1),            /* %ebx */              \
+                 "c"(arg2),            /* %ecx */              \
+                 "d"(arg3),            /* %edx */              \
+                 "S"(arg4),            /* %esi */              \
+                 "D"(arg5),            /* %edi */              \
+                 [_arg6]"m"(_arg6)     /* memory */            \
+               : "memory", "cc"                                \
+       );                                                      \
+       _eax;                                                   \
+})
+
+/* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    "pop %eax\n"                // argc   (first arg, %eax)
+    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
+    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+    "xor %ebp, %ebp\n"          // zero the stack frame
+    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
+    "sub $4, %esp\n"            // the call instruction (args are aligned)
+    "push %ecx\n"               // push all registers on the stack so that we
+    "push %ebx\n"               // support both regparm and plain stack modes
+    "push %eax\n"
+    "call main\n"               // main() returns the status code in %eax
+    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
+    "movl $1, %eax\n"           // NR_exit == 1
+    "int $0x80\n"               // exit now
+    "hlt\n"                     // ensure it does not
+    "");
+
+#endif // _NOLIBC_ARCH_I386_H
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
new file mode 100644 (file)
index 0000000..5fc5b80
--- /dev/null
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * MIPS specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_MIPS_H
+#define _NOLIBC_ARCH_MIPS_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_APPEND       0x0008
+#define O_NONBLOCK     0x0080
+#define O_CREAT        0x0100
+#define O_TRUNC        0x0200
+#define O_EXCL         0x0400
+#define O_NOCTTY       0x0800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall. 88 bytes are returned by the
+ * syscall.
+ */
+struct sys_stat_struct {
+       unsigned int  st_dev;
+       long          st_pad1[3];
+       unsigned long st_ino;
+       unsigned int  st_mode;
+       unsigned int  st_nlink;
+       unsigned int  st_uid;
+       unsigned int  st_gid;
+       unsigned int  st_rdev;
+       long          st_pad2[2];
+       long          st_size;
+       long          st_pad3;
+
+       long          st_atime;
+       long          st_atime_nsec;
+       long          st_mtime;
+       long          st_mtime_nsec;
+
+       long          st_ctime;
+       long          st_ctime_nsec;
+       long          st_blksize;
+       long          st_blocks;
+       long          st_pad4[14];
+};
+
+/* Syscalls for MIPS ABI O32 :
+ *   - WARNING! there's always a delayed slot!
+ *   - WARNING again, the syntax is different, registers take a '$' and numbers
+ *     do not.
+ *   - registers are 32-bit
+ *   - stack is 8-byte aligned
+ *   - syscall number is passed in v0 (starts at 0xfa0).
+ *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+ *     leave some room in the stack for the callee to save a0..a3 if needed.
+ *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+ *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+ *     scall32-o32.S in the kernel sources.
+ *   - the system call is performed by calling "syscall"
+ *   - syscall return comes in v0, and register a3 needs to be checked to know
+ *     if an error occurred, in which case errno is in v0.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ */
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "r"(_num)                                                   \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "0"(_num),                                                  \
+                 "r"(_arg1)                                                  \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2)                                      \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num __asm__ ("v0")  = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r" (_num), "=r"(_arg4)                                    \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+       register long _arg5 = (long)(arg5);                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "sw %7, 16($sp)\n"                                            \
+               "syscall\n  "                                                 \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r" (_num), "=r"(_arg4)                                    \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+/* startup code, note that it's called __start on MIPS */
+__asm__ (".section .text\n"
+    ".weak __start\n"
+    ".set nomips16\n"
+    ".set    noreorder\n"
+    ".option pic0\n"
+    ".ent __start\n"
+    "__start:\n"
+    "lw $a0,($sp)\n"              // argc was in the stack
+    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
+    "sll $a2, $a0, 2\n"           // a2 = argc * 4
+    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
+    "addiu $a2, $a2, 4\n"         //        ... + 4
+    "li $t0, -8\n"
+    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
+    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
+    "jal main\n"                  // main() returns the status code, we'll exit with it.
+    "nop\n"                       // delayed slot
+    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
+    "li $v0, 4001\n"              // NR_exit == 4001
+    "syscall\n"
+    ".end __start\n"
+    "");
+
+#endif // _NOLIBC_ARCH_MIPS_H
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
new file mode 100644 (file)
index 0000000..95e2b79
--- /dev/null
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * RISCV (32 and 64) specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_RISCV_H
+#define _NOLIBC_ARCH_RISCV_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT         0x100
+#define O_EXCL          0x200
+#define O_NOCTTY        0x400
+#define O_TRUNC        0x1000
+#define O_APPEND       0x2000
+#define O_NONBLOCK     0x4000
+#define O_DIRECTORY  0x200000
+
+struct sys_stat_struct {
+       unsigned long   st_dev;         /* Device.  */
+       unsigned long   st_ino;         /* File serial number.  */
+       unsigned int    st_mode;        /* File mode.  */
+       unsigned int    st_nlink;       /* Link count.  */
+       unsigned int    st_uid;         /* User ID of the file's owner.  */
+       unsigned int    st_gid;         /* Group ID of the file's group. */
+       unsigned long   st_rdev;        /* Device number, if device.  */
+       unsigned long   __pad1;
+       long            st_size;        /* Size of file, in bytes.  */
+       int             st_blksize;     /* Optimal block size for I/O.  */
+       int             __pad2;
+       long            st_blocks;      /* Number 512-byte blocks allocated. */
+       long            st_atime;       /* Time of last access.  */
+       unsigned long   st_atime_nsec;
+       long            st_mtime;       /* Time of last modification.  */
+       unsigned long   st_mtime_nsec;
+       long            st_ctime;       /* Time of last status change.  */
+       unsigned long   st_ctime_nsec;
+       unsigned int    __unused4;
+       unsigned int    __unused5;
+};
+
+#if   __riscv_xlen == 64
+#define PTRLOG "3"
+#define SZREG  "8"
+#elif __riscv_xlen == 32
+#define PTRLOG "2"
+#define SZREG  "4"
+#endif
+
+/* Syscalls for RISCV :
+ *   - stack is 16-byte aligned
+ *   - syscall number is passed in a7
+ *   - arguments are in a0, a1, a2, a3, a4, a5
+ *   - the system call is performed by calling ecall
+ *   - syscall return comes in a0
+ *   - the arguments are cast to long and assigned into the target
+ *     registers which are then simply passed as registers to the asm code,
+ *     so that we don't have to experience issues with register constraints.
+ *
+ * On riscv, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n\t"                                                   \
+               : "=r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2),                                                 \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n\t"                                                   \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3),                                     \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
+       register long _arg6 __asm__ ("a5") = (long)(arg6);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    ".option push\n"
+    ".option norelax\n"
+    "lla   gp, __global_pointer$\n"
+    ".option pop\n"
+    "ld    a0, 0(sp)\n"          // argc (a0) was in the stack
+    "add   a1, sp, "SZREG"\n"    // argv (a1) = sp
+    "slli  a2, a0, "PTRLOG"\n"   // envp (a2) = SZREG*argc ...
+    "add   a2, a2, "SZREG"\n"    //             + SZREG (skip null)
+    "add   a2,a2,a1\n"           //             + argv
+    "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
+    "call  main\n"               // main() returns the status code, we'll exit with it.
+    "li a7, 93\n"                // NR_exit == 93
+    "ecall\n"
+    "");
+
+#endif // _NOLIBC_ARCH_RISCV_H
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
new file mode 100644 (file)
index 0000000..0e1e9eb
--- /dev/null
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * x86_64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_X86_64_H
+#define _NOLIBC_ARCH_X86_64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall, equivalent to stat64(). The
+ * syscall returns 116 bytes and stops in the middle of __unused.
+ */
+struct sys_stat_struct {
+       unsigned long st_dev;
+       unsigned long st_ino;
+       unsigned long st_nlink;
+       unsigned int  st_mode;
+       unsigned int  st_uid;
+
+       unsigned int  st_gid;
+       unsigned int  __pad0;
+       unsigned long st_rdev;
+       long          st_size;
+       long          st_blksize;
+
+       long          st_blocks;
+       unsigned long st_atime;
+       unsigned long st_atime_nsec;
+       unsigned long st_mtime;
+
+       unsigned long st_mtime_nsec;
+       unsigned long st_ctime;
+       unsigned long st_ctime_nsec;
+       long          __unused[3];
+};
+
+/* Syscalls for x86_64 :
+ *   - registers are 64-bit
+ *   - syscall number is passed in rax
+ *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ *   - the system call is performed by calling the syscall instruction
+ *   - syscall return comes in rax
+ *   - rcx and r11 are clobbered, others are preserved.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ *     Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
+ *
+ */
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1),                                                 \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
+       register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
+       register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
+       register long _arg6 __asm__ ("r9")  = (long)(arg6);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_arg6), "0"(_num)                                       \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+/* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    "pop %rdi\n"                // argc   (first arg, %rdi)
+    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
+    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+    "xor %ebp, %ebp\n"          // zero the stack frame
+    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
+    "call main\n"               // main() returns the status code, we'll exit with it.
+    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
+    "mov $60, %eax\n"           // NR_exit == 60
+    "syscall\n"                 // really exit
+    "hlt\n"                     // ensure it does not return
+    "");
+
+#endif // _NOLIBC_ARCH_X86_64_H
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
new file mode 100644 (file)
index 0000000..4c69923
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+/* Below comes the architecture-specific code. For each architecture, we have
+ * the syscall declarations and the _start code definition. This is the only
+ * global part. On all architectures the kernel puts everything in the stack
+ * before jumping to _start just above us, without any return address (_start
+ * is not a function but an entry pint). So at the stack pointer we find argc.
+ * Then argv[] begins, and ends at the first NULL. Then we have envp which
+ * starts and ends with a NULL as well. So envp=argv+argc+1.
+ */
+
+#ifndef _NOLIBC_ARCH_H
+#define _NOLIBC_ARCH_H
+
+#if defined(__x86_64__)
+#include "arch-x86_64.h"
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+#include "arch-i386.h"
+#elif defined(__ARM_EABI__)
+#include "arch-arm.h"
+#elif defined(__aarch64__)
+#include "arch-aarch64.h"
+#elif defined(__mips__) && defined(_ABIO32)
+#include "arch-mips.h"
+#elif defined(__riscv)
+#include "arch-riscv.h"
+#endif
+
+#endif /* _NOLIBC_ARCH_H */
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
new file mode 100644 (file)
index 0000000..e3000b2
--- /dev/null
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ctype function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_CTYPE_H
+#define _NOLIBC_CTYPE_H
+
+#include "std.h"
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int isascii(int c)
+{
+       /* 0x00..0x7f */
+       return (unsigned int)c <= 0x7f;
+}
+
+static __attribute__((unused))
+int isblank(int c)
+{
+       return c == '\t' || c == ' ';
+}
+
+static __attribute__((unused))
+int iscntrl(int c)
+{
+       /* 0x00..0x1f, 0x7f */
+       return (unsigned int)c < 0x20 || c == 0x7f;
+}
+
+static __attribute__((unused))
+int isdigit(int c)
+{
+       return (unsigned int)(c - '0') < 10;
+}
+
+static __attribute__((unused))
+int isgraph(int c)
+{
+       /* 0x21..0x7e */
+       return (unsigned int)(c - 0x21) < 0x5e;
+}
+
+static __attribute__((unused))
+int islower(int c)
+{
+       return (unsigned int)(c - 'a') < 26;
+}
+
+static __attribute__((unused))
+int isprint(int c)
+{
+       /* 0x20..0x7e */
+       return (unsigned int)(c - 0x20) < 0x5f;
+}
+
+static __attribute__((unused))
+int isspace(int c)
+{
+       /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */
+       return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5;
+}
+
+static __attribute__((unused))
+int isupper(int c)
+{
+       return (unsigned int)(c - 'A') < 26;
+}
+
+static __attribute__((unused))
+int isxdigit(int c)
+{
+       return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6;
+}
+
+static __attribute__((unused))
+int isalpha(int c)
+{
+       return islower(c) || isupper(c);
+}
+
+static __attribute__((unused))
+int isalnum(int c)
+{
+       return isalpha(c) || isdigit(c);
+}
+
+static __attribute__((unused))
+int ispunct(int c)
+{
+       return isgraph(c) && !isalnum(c);
+}
+
+#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
new file mode 100644 (file)
index 0000000..06893d6
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Minimal errno definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ERRNO_H
+#define _NOLIBC_ERRNO_H
+
+#include <asm/errno.h>
+
+/* this way it will be removed if unused */
+static int errno;
+
+#ifndef NOLIBC_IGNORE_ERRNO
+#define SET_ERRNO(v) do { errno = (v); } while (0)
+#else
+#define SET_ERRNO(v) do { } while (0)
+#endif
+
+
+/* errno codes all ensure that they will not conflict with a valid pointer
+ * because they all correspond to the highest addressable memory page.
+ */
+#define MAX_ERRNO 4095
+
+#endif /* _NOLIBC_ERRNO_H */
index c1c285fe494aa6ca1f334a1130889a80ec8330b0..b2bc48d3cfe4b9321f5478162dc29a623c1fc497 100644 (file)
  * having to specify anything.
  *
  * Finally some very common libc-level functions are provided. It is the case
- * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing
- * is currently provided regarding stdio emulation.
+ * for a few functions usually found in string.h, ctype.h, or stdlib.h.
  *
- * The macro NOLIBC is always defined, so that it is possible for a program to
- * check this macro to know if it is being built against and decide to disable
- * some features or simply not to include some standard libc files.
- *
- * Ideally this file should be split in multiple files for easier long term
- * maintenance, but provided as a single file as it is now, it's quite
- * convenient to use. Maybe some variations involving a set of includes at the
- * top could work.
+ * The nolibc.h file is only a convenient entry point which includes all other
+ * files. It also defines the NOLIBC macro, so that it is possible for a
+ * program to check this macro to know if it is being built against and decide
+ * to disable some features or simply not to include some standard libc files.
  *
  * A simple static executable may be built this way :
  *      $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
  *            -static -include nolibc.h -o hello hello.c -lgcc
  *
+ * Simple programs meant to be reasonably portable to various libc and using
+ * only a few common includes, may also be built by simply making the include
+ * path point to the nolibc directory:
+ *      $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ *            -I../nolibc -o hello hello.c -lgcc
+ *
+ * The available standard (but limited) include files are:
+ *   ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h
+ *
+ * In addition, the following ones are expected to be provided by the compiler:
+ *   float.h, stdarg.h, stddef.h
+ *
+ * The following ones which are part to the C standard are not provided:
+ *   assert.h, locale.h, math.h, setjmp.h, limits.h
+ *
  * A very useful calling convention table may be found here :
  *      http://man7.org/linux/man-pages/man2/syscall.2.html
  *
  *      https://w3challs.com/syscalls/
  *
  */
+#ifndef _NOLIBC_H
+#define _NOLIBC_H
 
-#include <asm/unistd.h>
-#include <asm/ioctls.h>
-#include <asm/errno.h>
-#include <linux/fs.h>
-#include <linux/loop.h>
-#include <linux/time.h>
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "ctype.h"
+#include "signal.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "time.h"
+#include "unistd.h"
 
+/* Used by programs to avoid std includes */
 #define NOLIBC
 
-/* this way it will be removed if unused */
-static int errno;
-
-#ifndef NOLIBC_IGNORE_ERRNO
-#define SET_ERRNO(v) do { errno = (v); } while (0)
-#else
-#define SET_ERRNO(v) do { } while (0)
-#endif
-
-/* errno codes all ensure that they will not conflict with a valid pointer
- * because they all correspond to the highest addressable memory page.
- */
-#define MAX_ERRNO 4095
-
-/* Declare a few quite common macros and types that usually are in stdlib.h,
- * stdint.h, ctype.h, unistd.h and a few other common locations.
- */
-
-#define NULL ((void *)0)
-
-/* stdint types */
-typedef unsigned char       uint8_t;
-typedef   signed char        int8_t;
-typedef unsigned short     uint16_t;
-typedef   signed short      int16_t;
-typedef unsigned int       uint32_t;
-typedef   signed int        int32_t;
-typedef unsigned long long uint64_t;
-typedef   signed long long  int64_t;
-typedef unsigned long        size_t;
-typedef   signed long       ssize_t;
-typedef unsigned long     uintptr_t;
-typedef   signed long      intptr_t;
-typedef   signed long     ptrdiff_t;
-
-/* for stat() */
-typedef unsigned int          dev_t;
-typedef unsigned long         ino_t;
-typedef unsigned int         mode_t;
-typedef   signed int          pid_t;
-typedef unsigned int          uid_t;
-typedef unsigned int          gid_t;
-typedef unsigned long       nlink_t;
-typedef   signed long         off_t;
-typedef   signed long     blksize_t;
-typedef   signed long      blkcnt_t;
-typedef   signed long        time_t;
-
-/* for poll() */
-struct pollfd {
-       int fd;
-       short int events;
-       short int revents;
-};
-
-/* for getdents64() */
-struct linux_dirent64 {
-       uint64_t       d_ino;
-       int64_t        d_off;
-       unsigned short d_reclen;
-       unsigned char  d_type;
-       char           d_name[];
-};
-
-/* commonly an fd_set represents 256 FDs */
-#define FD_SETSIZE 256
-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
-
-/* needed by wait4() */
-struct rusage {
-       struct timeval ru_utime;
-       struct timeval ru_stime;
-       long   ru_maxrss;
-       long   ru_ixrss;
-       long   ru_idrss;
-       long   ru_isrss;
-       long   ru_minflt;
-       long   ru_majflt;
-       long   ru_nswap;
-       long   ru_inblock;
-       long   ru_oublock;
-       long   ru_msgsnd;
-       long   ru_msgrcv;
-       long   ru_nsignals;
-       long   ru_nvcsw;
-       long   ru_nivcsw;
-};
-
-/* stat flags (WARNING, octal here) */
-#define S_IFDIR       0040000
-#define S_IFCHR       0020000
-#define S_IFBLK       0060000
-#define S_IFREG       0100000
-#define S_IFIFO       0010000
-#define S_IFLNK       0120000
-#define S_IFSOCK      0140000
-#define S_IFMT        0170000
-
-#define S_ISDIR(mode)  (((mode) & S_IFDIR) == S_IFDIR)
-#define S_ISCHR(mode)  (((mode) & S_IFCHR) == S_IFCHR)
-#define S_ISBLK(mode)  (((mode) & S_IFBLK) == S_IFBLK)
-#define S_ISREG(mode)  (((mode) & S_IFREG) == S_IFREG)
-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
-#define S_ISLNK(mode)  (((mode) & S_IFLNK) == S_IFLNK)
-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
-
-#define DT_UNKNOWN 0
-#define DT_FIFO    1
-#define DT_CHR     2
-#define DT_DIR     4
-#define DT_BLK     6
-#define DT_REG     8
-#define DT_LNK    10
-#define DT_SOCK   12
-
-/* all the *at functions */
-#ifndef AT_FDCWD
-#define AT_FDCWD             -100
-#endif
-
-/* lseek */
-#define SEEK_SET        0
-#define SEEK_CUR        1
-#define SEEK_END        2
-
-/* reboot */
-#define LINUX_REBOOT_MAGIC1         0xfee1dead
-#define LINUX_REBOOT_MAGIC2         0x28121969
-#define LINUX_REBOOT_CMD_HALT       0xcdef0123
-#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
-#define LINUX_REBOOT_CMD_RESTART    0x01234567
-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
-
-
-/* The format of the struct as returned by the libc to the application, which
- * significantly differs from the format returned by the stat() syscall flavours.
- */
-struct stat {
-       dev_t     st_dev;     /* ID of device containing file */
-       ino_t     st_ino;     /* inode number */
-       mode_t    st_mode;    /* protection */
-       nlink_t   st_nlink;   /* number of hard links */
-       uid_t     st_uid;     /* user ID of owner */
-       gid_t     st_gid;     /* group ID of owner */
-       dev_t     st_rdev;    /* device ID (if special file) */
-       off_t     st_size;    /* total size, in bytes */
-       blksize_t st_blksize; /* blocksize for file system I/O */
-       blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
-       time_t    st_atime;   /* time of last access */
-       time_t    st_mtime;   /* time of last modification */
-       time_t    st_ctime;   /* time of last status change */
-};
-
-#define WEXITSTATUS(status)   (((status) & 0xff00) >> 8)
-#define WIFEXITED(status)     (((status) & 0x7f) == 0)
-
-/* for SIGCHLD */
-#include <asm/signal.h>
-
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry pint). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
-#if defined(__x86_64__)
-/* Syscalls for x86_64 :
- *   - registers are 64-bit
- *   - syscall number is passed in rax
- *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
- *   - the system call is performed by calling the syscall instruction
- *   - syscall return comes in rax
- *   - rcx and r11 are clobbered, others are preserved.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *   - the syscall number is always specified last in order to allow to force
- *     some registers before (gcc refuses a %-register at the last position).
- *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
- *     Calling Conventions.
- *
- * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
- *
- */
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1),                                                 \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-       register long _arg4 asm("r10") = (long)(arg4);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-       register long _arg4 asm("r10") = (long)(arg4);                        \
-       register long _arg5 asm("r8")  = (long)(arg5);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-       register long _arg4 asm("r10") = (long)(arg4);                        \
-       register long _arg5 asm("r8")  = (long)(arg5);                        \
-       register long _arg6 asm("r9")  = (long)(arg6);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_arg6), "0"(_num)                                       \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-/* startup code */
-/*
- * x86-64 System V ABI mandates:
- * 1) %rsp must be 16-byte aligned right before the function call.
- * 2) The deepest stack frame should be zero (the %rbp).
- *
- */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    "pop %rdi\n"                // argc   (first arg, %rdi)
-    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
-    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
-    "xor %ebp, %ebp\n"          // zero the stack frame
-    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
-    "call main\n"               // main() returns the status code, we'll exit with it.
-    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
-    "mov $60, %eax\n"           // NR_exit == 60
-    "syscall\n"                 // really exit
-    "hlt\n"                     // ensure it does not return
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY   0x10000
-
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-struct sys_stat_struct {
-       unsigned long st_dev;
-       unsigned long st_ino;
-       unsigned long st_nlink;
-       unsigned int  st_mode;
-       unsigned int  st_uid;
-
-       unsigned int  st_gid;
-       unsigned int  __pad0;
-       unsigned long st_rdev;
-       long          st_size;
-       long          st_blksize;
-
-       long          st_blocks;
-       unsigned long st_atime;
-       unsigned long st_atime_nsec;
-       unsigned long st_mtime;
-
-       unsigned long st_mtime_nsec;
-       unsigned long st_ctime;
-       unsigned long st_ctime_nsec;
-       long          __unused[3];
-};
-
-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
-/* Syscalls for i386 :
- *   - mostly similar to x86_64
- *   - registers are 32-bit
- *   - syscall number is passed in eax
- *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
- *   - all registers are preserved (except eax of course)
- *   - the system call is performed by calling int $0x80
- *   - syscall return comes in eax
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *   - the syscall number is always specified last in order to allow to force
- *     some registers before (gcc refuses a %-register at the last position).
- *
- * Also, i386 supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1),                                                 \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-       register long _arg3 asm("edx") = (long)(arg3);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-       register long _arg3 asm("edx") = (long)(arg3);                        \
-       register long _arg4 asm("esi") = (long)(arg4);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-       register long _arg3 asm("edx") = (long)(arg3);                        \
-       register long _arg4 asm("esi") = (long)(arg4);                        \
-       register long _arg5 asm("edi") = (long)(arg5);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-/* startup code */
-/*
- * i386 System V ABI mandates:
- * 1) last pushed argument must be 16-byte aligned.
- * 2) The deepest stack frame should be set to zero
- *
- */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    "pop %eax\n"                // argc   (first arg, %eax)
-    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
-    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
-    "xor %ebp, %ebp\n"          // zero the stack frame
-    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
-    "sub $4, %esp\n"            // the call instruction (args are aligned)
-    "push %ecx\n"               // push all registers on the stack so that we
-    "push %ebx\n"               // support both regparm and plain stack modes
-    "push %eax\n"
-    "call main\n"               // main() returns the status code in %eax
-    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
-    "movl $1, %eax\n"           // NR_exit == 1
-    "int $0x80\n"               // exit now
-    "hlt\n"                     // ensure it does not
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY   0x10000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array).
- */
-struct sys_stat_struct {
-       unsigned long  st_dev;
-       unsigned long  st_ino;
-       unsigned short st_mode;
-       unsigned short st_nlink;
-       unsigned short st_uid;
-       unsigned short st_gid;
-
-       unsigned long  st_rdev;
-       unsigned long  st_size;
-       unsigned long  st_blksize;
-       unsigned long  st_blocks;
-
-       unsigned long  st_atime;
-       unsigned long  st_atime_nsec;
-       unsigned long  st_mtime;
-       unsigned long  st_mtime_nsec;
-
-       unsigned long  st_ctime;
-       unsigned long  st_ctime_nsec;
-       unsigned long  __unused[2];
-};
-
-#elif defined(__ARM_EABI__)
-/* Syscalls for ARM in ARM or Thumb modes :
- *   - registers are 32-bit
- *   - stack is 8-byte aligned
- *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
- *   - syscall number is passed in r7
- *   - arguments are in r0, r1, r2, r3, r4, r5
- *   - the system call is performed by calling svc #0
- *   - syscall return comes in r0.
- *   - only lr is clobbered.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *   - the syscall number is always specified last in order to allow to force
- *     some registers before (gcc refuses a %-register at the last position).
- *
- * Also, ARM supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1),                                                 \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-       register long _arg3 asm("r2") = (long)(arg3);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-       register long _arg3 asm("r2") = (long)(arg3);                         \
-       register long _arg4 asm("r3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-       register long _arg3 asm("r2") = (long)(arg3);                         \
-       register long _arg4 asm("r3") = (long)(arg4);                         \
-       register long _arg5 asm("r4") = (long)(arg5);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r" (_arg1)                                                \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-/* startup code */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-#if defined(__THUMBEB__) || defined(__THUMBEL__)
-    /* We enter here in 32-bit mode but if some previous functions were in
-     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
-     * 32-bit now, then switch to 16-bit (is there a better way to do it than
-     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
-     * it generates correct instructions. Note that we do not support thumb1.
-     */
-    ".code 32\n"
-    "add     r0, pc, #1\n"
-    "bx      r0\n"
-    ".code 16\n"
-#endif
-    "pop {%r0}\n"                 // argc was in the stack
-    "mov %r1, %sp\n"              // argv = sp
-    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
-    "add %r2, %r2, $4\n"          //        ... + 4
-    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
-    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
-    "bl main\n"                   // main() returns the status code, we'll exit with it.
-    "movs r7, $1\n"               // NR_exit == 1
-    "svc $0x00\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY    0x4000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array). In big endian, the format
- * differs as devices are returned as short only.
- */
-struct sys_stat_struct {
-#if defined(__ARMEB__)
-       unsigned short st_dev;
-       unsigned short __pad1;
-#else
-       unsigned long  st_dev;
-#endif
-       unsigned long  st_ino;
-       unsigned short st_mode;
-       unsigned short st_nlink;
-       unsigned short st_uid;
-       unsigned short st_gid;
-#if defined(__ARMEB__)
-       unsigned short st_rdev;
-       unsigned short __pad2;
-#else
-       unsigned long  st_rdev;
-#endif
-       unsigned long  st_size;
-       unsigned long  st_blksize;
-       unsigned long  st_blocks;
-       unsigned long  st_atime;
-       unsigned long  st_atime_nsec;
-       unsigned long  st_mtime;
-       unsigned long  st_mtime_nsec;
-       unsigned long  st_ctime;
-       unsigned long  st_ctime_nsec;
-       unsigned long  __unused[2];
-};
-
-#elif defined(__aarch64__)
-/* Syscalls for AARCH64 :
- *   - registers are 64-bit
- *   - stack is 16-byte aligned
- *   - syscall number is passed in x8
- *   - arguments are in x0, x1, x2, x3, x4, x5
- *   - the system call is performed by calling svc 0
- *   - syscall return comes in x0.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *
- * On aarch64, select() is not implemented so we have to use pselect6().
- */
-#define __ARCH_WANT_SYS_PSELECT6
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1),                                                 \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-       register long _arg4 asm("x3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-       register long _arg4 asm("x3") = (long)(arg4);                         \
-       register long _arg5 asm("x4") = (long)(arg5);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r" (_arg1)                                                \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-       register long _arg4 asm("x3") = (long)(arg4);                         \
-       register long _arg5 asm("x4") = (long)(arg5);                         \
-       register long _arg6 asm("x5") = (long)(arg6);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r" (_arg1)                                                \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_arg6), "r"(_num)                                       \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-/* startup code */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    "ldr x0, [sp]\n"              // argc (x0) was in the stack
-    "add x1, sp, 8\n"             // argv (x1) = sp
-    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
-    "add x2, x2, 8\n"             //           + 8 (skip null)
-    "add x2, x2, x1\n"            //           + argv
-    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
-    "bl main\n"                   // main() returns the status code, we'll exit with it.
-    "mov x8, 93\n"                // NR_exit == 93
-    "svc #0\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY    0x4000
-
-/* The struct returned by the newfstatat() syscall. Differs slightly from the
- * x86_64's stat one by field ordering, so be careful.
- */
-struct sys_stat_struct {
-       unsigned long   st_dev;
-       unsigned long   st_ino;
-       unsigned int    st_mode;
-       unsigned int    st_nlink;
-       unsigned int    st_uid;
-       unsigned int    st_gid;
-
-       unsigned long   st_rdev;
-       unsigned long   __pad1;
-       long            st_size;
-       int             st_blksize;
-       int             __pad2;
-
-       long            st_blocks;
-       long            st_atime;
-       unsigned long   st_atime_nsec;
-       long            st_mtime;
-
-       unsigned long   st_mtime_nsec;
-       long            st_ctime;
-       unsigned long   st_ctime_nsec;
-       unsigned int    __unused[2];
-};
-
-#elif defined(__mips__) && defined(_ABIO32)
-/* Syscalls for MIPS ABI O32 :
- *   - WARNING! there's always a delayed slot!
- *   - WARNING again, the syntax is different, registers take a '$' and numbers
- *     do not.
- *   - registers are 32-bit
- *   - stack is 8-byte aligned
- *   - syscall number is passed in v0 (starts at 0xfa0).
- *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
- *     leave some room in the stack for the callee to save a0..a3 if needed.
- *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
- *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
- *     scall32-o32.S in the kernel sources.
- *   - the system call is performed by calling "syscall"
- *   - syscall return comes in v0, and register a3 needs to be checked to know
- *     if an error occurred, in which case errno is in v0.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "r"(_num)                                                   \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "0"(_num),                                                  \
-                 "r"(_arg1)                                                  \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2)                                      \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num asm("v0")  = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r" (_num), "=r"(_arg4)                                    \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-       register long _arg5 = (long)(arg5);                                   \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "sw %7, 16($sp)\n"                                            \
-               "syscall\n  "                                                 \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r" (_num), "=r"(_arg4)                                    \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-/* startup code, note that it's called __start on MIPS */
-asm(".section .text\n"
-    ".set nomips16\n"
-    ".global __start\n"
-    ".set    noreorder\n"
-    ".option pic0\n"
-    ".ent __start\n"
-    "__start:\n"
-    "lw $a0,($sp)\n"              // argc was in the stack
-    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
-    "sll $a2, $a0, 2\n"           // a2 = argc * 4
-    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
-    "addiu $a2, $a2, 4\n"         //        ... + 4
-    "li $t0, -8\n"
-    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
-    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
-    "jal main\n"                  // main() returns the status code, we'll exit with it.
-    "nop\n"                       // delayed slot
-    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
-    "li $v0, 4001\n"              // NR_exit == 4001
-    "syscall\n"
-    ".end __start\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_APPEND       0x0008
-#define O_NONBLOCK     0x0080
-#define O_CREAT        0x0100
-#define O_TRUNC        0x0200
-#define O_EXCL         0x0400
-#define O_NOCTTY       0x0800
-#define O_DIRECTORY   0x10000
-
-/* The struct returned by the stat() syscall. 88 bytes are returned by the
- * syscall.
- */
-struct sys_stat_struct {
-       unsigned int  st_dev;
-       long          st_pad1[3];
-       unsigned long st_ino;
-       unsigned int  st_mode;
-       unsigned int  st_nlink;
-       unsigned int  st_uid;
-       unsigned int  st_gid;
-       unsigned int  st_rdev;
-       long          st_pad2[2];
-       long          st_size;
-       long          st_pad3;
-       long          st_atime;
-       long          st_atime_nsec;
-       long          st_mtime;
-       long          st_mtime_nsec;
-       long          st_ctime;
-       long          st_ctime_nsec;
-       long          st_blksize;
-       long          st_blocks;
-       long          st_pad4[14];
-};
-
-#elif defined(__riscv)
-
-#if   __riscv_xlen == 64
-#define PTRLOG "3"
-#define SZREG  "8"
-#elif __riscv_xlen == 32
-#define PTRLOG "2"
-#define SZREG  "4"
-#endif
-
-/* Syscalls for RISCV :
- *   - stack is 16-byte aligned
- *   - syscall number is passed in a7
- *   - arguments are in a0, a1, a2, a3, a4, a5
- *   - the system call is performed by calling ecall
- *   - syscall return comes in a0
- *   - the arguments are cast to long and assigned into the target
- *     registers which are then simply passed as registers to the asm code,
- *     so that we don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n\t"                                                   \
-               : "=r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2),                                                 \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n\t"                                                   \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3),                                     \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-       register long _arg5 asm("a4") = (long)(arg5);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-       register long _arg5 asm("a4") = (long)(arg5);                         \
-       register long _arg6 asm("a5") = (long)(arg6);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-/* startup code */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    ".option push\n"
-    ".option norelax\n"
-    "lla   gp, __global_pointer$\n"
-    ".option pop\n"
-    "ld    a0, 0(sp)\n"          // argc (a0) was in the stack
-    "add   a1, sp, "SZREG"\n"    // argv (a1) = sp
-    "slli  a2, a0, "PTRLOG"\n"   // envp (a2) = SZREG*argc ...
-    "add   a2, a2, "SZREG"\n"    //             + SZREG (skip null)
-    "add   a2,a2,a1\n"           //             + argv
-    "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
-    "call  main\n"               // main() returns the status code, we'll exit with it.
-    "li a7, 93\n"                // NR_exit == 93
-    "ecall\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT         0x100
-#define O_EXCL          0x200
-#define O_NOCTTY        0x400
-#define O_TRUNC        0x1000
-#define O_APPEND       0x2000
-#define O_NONBLOCK     0x4000
-#define O_DIRECTORY  0x200000
-
-struct sys_stat_struct {
-       unsigned long   st_dev;         /* Device.  */
-       unsigned long   st_ino;         /* File serial number.  */
-       unsigned int    st_mode;        /* File mode.  */
-       unsigned int    st_nlink;       /* Link count.  */
-       unsigned int    st_uid;         /* User ID of the file's owner.  */
-       unsigned int    st_gid;         /* Group ID of the file's group. */
-       unsigned long   st_rdev;        /* Device number, if device.  */
-       unsigned long   __pad1;
-       long            st_size;        /* Size of file, in bytes.  */
-       int             st_blksize;     /* Optimal block size for I/O.  */
-       int             __pad2;
-       long            st_blocks;      /* Number 512-byte blocks allocated. */
-       long            st_atime;       /* Time of last access.  */
-       unsigned long   st_atime_nsec;
-       long            st_mtime;       /* Time of last modification.  */
-       unsigned long   st_mtime_nsec;
-       long            st_ctime;       /* Time of last status change.  */
-       unsigned long   st_ctime_nsec;
-       unsigned int    __unused4;
-       unsigned int    __unused5;
-};
-
-#endif
-
-
-/* Below are the C functions used to declare the raw syscalls. They try to be
- * architecture-agnostic, and return either a success or -errno. Declaring them
- * static will lead to them being inlined in most cases, but it's still possible
- * to reference them by a pointer if needed.
- */
-static __attribute__((unused))
-void *sys_brk(void *addr)
-{
-       return (void *)my_syscall1(__NR_brk, addr);
-}
-
-static __attribute__((noreturn,unused))
-void sys_exit(int status)
-{
-       my_syscall1(__NR_exit, status & 255);
-       while(1); // shut the "noreturn" warnings.
-}
-
-static __attribute__((unused))
-int sys_chdir(const char *path)
-{
-       return my_syscall1(__NR_chdir, path);
-}
-
-static __attribute__((unused))
-int sys_chmod(const char *path, mode_t mode)
-{
-#ifdef __NR_fchmodat
-       return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
-       return my_syscall2(__NR_chmod, path, mode);
-#else
-#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chown(const char *path, uid_t owner, gid_t group)
-{
-#ifdef __NR_fchownat
-       return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
-       return my_syscall3(__NR_chown, path, owner, group);
-#else
-#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chroot(const char *path)
-{
-       return my_syscall1(__NR_chroot, path);
-}
-
-static __attribute__((unused))
-int sys_close(int fd)
-{
-       return my_syscall1(__NR_close, fd);
-}
-
-static __attribute__((unused))
-int sys_dup(int fd)
-{
-       return my_syscall1(__NR_dup, fd);
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int sys_dup3(int old, int new, int flags)
-{
-       return my_syscall3(__NR_dup3, old, new, flags);
-}
-#endif
-
-static __attribute__((unused))
-int sys_dup2(int old, int new)
-{
-#ifdef __NR_dup3
-       return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
-       return my_syscall2(__NR_dup2, old, new);
-#else
-#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
-#endif
-}
-
-static __attribute__((unused))
-int sys_execve(const char *filename, char *const argv[], char *const envp[])
-{
-       return my_syscall3(__NR_execve, filename, argv, envp);
-}
-
-static __attribute__((unused))
-pid_t sys_fork(void)
-{
-#ifdef __NR_clone
-       /* note: some archs only have clone() and not fork(). Different archs
-        * have a different API, but most archs have the flags on first arg and
-        * will not use the rest with no other flag.
-        */
-       return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
-       return my_syscall0(__NR_fork);
-#else
-#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
-#endif
-}
-
-static __attribute__((unused))
-int sys_fsync(int fd)
-{
-       return my_syscall1(__NR_fsync, fd);
-}
-
-static __attribute__((unused))
-int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
-       return my_syscall3(__NR_getdents64, fd, dirp, count);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgid(pid_t pid)
-{
-       return my_syscall1(__NR_getpgid, pid);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgrp(void)
-{
-       return sys_getpgid(0);
-}
-
-static __attribute__((unused))
-pid_t sys_getpid(void)
-{
-       return my_syscall0(__NR_getpid);
-}
-
-static __attribute__((unused))
-pid_t sys_gettid(void)
-{
-       return my_syscall0(__NR_gettid);
-}
-
-static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       return my_syscall2(__NR_gettimeofday, tv, tz);
-}
-
-static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
-{
-       return my_syscall3(__NR_ioctl, fd, req, value);
-}
-
-static __attribute__((unused))
-int sys_kill(pid_t pid, int signal)
-{
-       return my_syscall2(__NR_kill, pid, signal);
-}
-
-static __attribute__((unused))
-int sys_link(const char *old, const char *new)
-{
-#ifdef __NR_linkat
-       return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
-       return my_syscall2(__NR_link, old, new);
-#else
-#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
-#endif
-}
-
-static __attribute__((unused))
-off_t sys_lseek(int fd, off_t offset, int whence)
-{
-       return my_syscall3(__NR_lseek, fd, offset, whence);
-}
-
-static __attribute__((unused))
-int sys_mkdir(const char *path, mode_t mode)
-{
-#ifdef __NR_mkdirat
-       return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
-       return my_syscall2(__NR_mkdir, path, mode);
-#else
-#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
-#endif
-}
-
-static __attribute__((unused))
-long sys_mknod(const char *path, mode_t mode, dev_t dev)
-{
-#ifdef __NR_mknodat
-       return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
-       return my_syscall3(__NR_mknod, path, mode, dev);
-#else
-#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
-             unsigned long flags, const void *data)
-{
-       return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
-
-static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
-{
-#ifdef __NR_openat
-       return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
-       return my_syscall3(__NR_open, path, flags, mode);
-#else
-#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
-#endif
-}
-
-static __attribute__((unused))
-int sys_pivot_root(const char *new, const char *old)
-{
-       return my_syscall2(__NR_pivot_root, new, old);
-}
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
-       struct timespec t;
-
-       if (timeout >= 0) {
-               t.tv_sec  = timeout / 1000;
-               t.tv_nsec = (timeout % 1000) * 1000000;
-       }
-       return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
-#elif defined(__NR_poll)
-       return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
-#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
-#endif
-}
-
-static __attribute__((unused))
-ssize_t sys_read(int fd, void *buf, size_t count)
-{
-       return my_syscall3(__NR_read, fd, buf, count);
-}
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
-       return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int sys_sched_yield(void)
-{
-       return my_syscall0(__NR_sched_yield);
-}
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
-       struct sel_arg_struct {
-               unsigned long n;
-               fd_set *r, *w, *e;
-               struct timeval *t;
-       } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
-       return my_syscall1(__NR_select, &arg);
-#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
-       struct timespec t;
-
-       if (timeout) {
-               t.tv_sec  = timeout->tv_sec;
-               t.tv_nsec = timeout->tv_usec * 1000;
-       }
-       return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR__newselect) || defined(__NR_select)
-#ifndef __NR__newselect
-#define __NR__newselect __NR_select
-#endif
-       return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#else
-#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
-#endif
-}
-
-static __attribute__((unused))
-int sys_setpgid(pid_t pid, pid_t pgid)
-{
-       return my_syscall2(__NR_setpgid, pid, pgid);
-}
-
-static __attribute__((unused))
-pid_t sys_setsid(void)
-{
-       return my_syscall0(__NR_setsid);
-}
-
-static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
-{
-       struct sys_stat_struct stat;
-       long ret;
-
-#ifdef __NR_newfstatat
-       /* only solution for arm64 */
-       ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#elif defined(__NR_stat)
-       ret = my_syscall2(__NR_stat, path, &stat);
-#else
-#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
-#endif
-       buf->st_dev     = stat.st_dev;
-       buf->st_ino     = stat.st_ino;
-       buf->st_mode    = stat.st_mode;
-       buf->st_nlink   = stat.st_nlink;
-       buf->st_uid     = stat.st_uid;
-       buf->st_gid     = stat.st_gid;
-       buf->st_rdev    = stat.st_rdev;
-       buf->st_size    = stat.st_size;
-       buf->st_blksize = stat.st_blksize;
-       buf->st_blocks  = stat.st_blocks;
-       buf->st_atime   = stat.st_atime;
-       buf->st_mtime   = stat.st_mtime;
-       buf->st_ctime   = stat.st_ctime;
-       return ret;
-}
-
-
-static __attribute__((unused))
-int sys_symlink(const char *old, const char *new)
-{
-#ifdef __NR_symlinkat
-       return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
-       return my_syscall2(__NR_symlink, old, new);
-#else
-#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
-#endif
-}
-
-static __attribute__((unused))
-mode_t sys_umask(mode_t mode)
-{
-       return my_syscall1(__NR_umask, mode);
-}
-
-static __attribute__((unused))
-int sys_umount2(const char *path, int flags)
-{
-       return my_syscall2(__NR_umount2, path, flags);
-}
-
-static __attribute__((unused))
-int sys_unlink(const char *path)
-{
-#ifdef __NR_unlinkat
-       return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
-       return my_syscall1(__NR_unlink, path);
-#else
-#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
-#endif
-}
-
-static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-       return my_syscall4(__NR_wait4, pid, status, options, rusage);
-}
-
-static __attribute__((unused))
-pid_t sys_waitpid(pid_t pid, int *status, int options)
-{
-       return sys_wait4(pid, status, options, 0);
-}
-
-static __attribute__((unused))
-pid_t sys_wait(int *status)
-{
-       return sys_waitpid(-1, status, 0);
-}
-
-static __attribute__((unused))
-ssize_t sys_write(int fd, const void *buf, size_t count)
-{
-       return my_syscall3(__NR_write, fd, buf, count);
-}
-
-
-/* Below are the libc-compatible syscalls which return x or -1 and set errno.
- * They rely on the functions above. Similarly they're marked static so that it
- * is possible to assign pointers to them if needed.
- */
-
-static __attribute__((unused))
-int brk(void *addr)
-{
-       void *ret = sys_brk(addr);
-
-       if (!ret) {
-               SET_ERRNO(ENOMEM);
-               return -1;
-       }
-       return 0;
-}
-
-static __attribute__((noreturn,unused))
-void exit(int status)
-{
-       sys_exit(status);
-}
-
-static __attribute__((unused))
-int chdir(const char *path)
-{
-       int ret = sys_chdir(path);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int chmod(const char *path, mode_t mode)
-{
-       int ret = sys_chmod(path, mode);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int chown(const char *path, uid_t owner, gid_t group)
-{
-       int ret = sys_chown(path, owner, group);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int chroot(const char *path)
-{
-       int ret = sys_chroot(path);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int close(int fd)
-{
-       int ret = sys_close(fd);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int dup(int fd)
-{
-       int ret = sys_dup(fd);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int dup2(int old, int new)
-{
-       int ret = sys_dup2(old, new);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int dup3(int old, int new, int flags)
-{
-       int ret = sys_dup3(old, new, flags);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-#endif
-
-static __attribute__((unused))
-int execve(const char *filename, char *const argv[], char *const envp[])
-{
-       int ret = sys_execve(filename, argv, envp);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t fork(void)
-{
-       pid_t ret = sys_fork();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int fsync(int fd)
-{
-       int ret = sys_fsync(fd);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
-       int ret = sys_getdents64(fd, dirp, count);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgid(pid_t pid)
-{
-       pid_t ret = sys_getpgid(pid);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgrp(void)
-{
-       pid_t ret = sys_getpgrp();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t getpid(void)
-{
-       pid_t ret = sys_getpid();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t gettid(void)
-{
-       pid_t ret = sys_gettid();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       int ret = sys_gettimeofday(tv, tz);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
-       int ret = sys_ioctl(fd, req, value);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int kill(pid_t pid, int signal)
-{
-       int ret = sys_kill(pid, signal);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int link(const char *old, const char *new)
-{
-       int ret = sys_link(old, new);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-off_t lseek(int fd, off_t offset, int whence)
-{
-       off_t ret = sys_lseek(fd, offset, whence);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int mkdir(const char *path, mode_t mode)
-{
-       int ret = sys_mkdir(path, mode);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int mknod(const char *path, mode_t mode, dev_t dev)
-{
-       int ret = sys_mknod(path, mode, dev);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int mount(const char *src, const char *tgt,
-         const char *fst, unsigned long flags,
-         const void *data)
-{
-       int ret = sys_mount(src, tgt, fst, flags, data);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int open(const char *path, int flags, mode_t mode)
-{
-       int ret = sys_open(path, flags, mode);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int pivot_root(const char *new, const char *old)
-{
-       int ret = sys_pivot_root(new, old);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
-       int ret = sys_poll(fds, nfds, timeout);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-ssize_t read(int fd, void *buf, size_t count)
-{
-       ssize_t ret = sys_read(fd, buf, count);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
-       int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-void *sbrk(intptr_t inc)
-{
-       void *ret;
-
-       /* first call to find current end */
-       if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
-               return ret + inc;
-
-       SET_ERRNO(ENOMEM);
-       return (void *)-1;
-}
-
-static __attribute__((unused))
-int sched_yield(void)
-{
-       int ret = sys_sched_yield();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-       int ret = sys_select(nfds, rfds, wfds, efds, timeout);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int setpgid(pid_t pid, pid_t pgid)
-{
-       int ret = sys_setpgid(pid, pgid);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t setsid(void)
-{
-       pid_t ret = sys_setsid();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-unsigned int sleep(unsigned int seconds)
-{
-       struct timeval my_timeval = { seconds, 0 };
-
-       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
-               return my_timeval.tv_sec + !!my_timeval.tv_usec;
-       else
-               return 0;
-}
-
-static __attribute__((unused))
-int msleep(unsigned int msecs)
-{
-       struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
-
-       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
-               return (my_timeval.tv_sec * 1000) +
-                       (my_timeval.tv_usec / 1000) +
-                       !!(my_timeval.tv_usec % 1000);
-       else
-               return 0;
-}
-
-static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
-{
-       int ret = sys_stat(path, buf);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int symlink(const char *old, const char *new)
-{
-       int ret = sys_symlink(old, new);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int tcsetpgrp(int fd, pid_t pid)
-{
-       return ioctl(fd, TIOCSPGRP, &pid);
-}
-
-static __attribute__((unused))
-mode_t umask(mode_t mode)
-{
-       return sys_umask(mode);
-}
-
-static __attribute__((unused))
-int umount2(const char *path, int flags)
-{
-       int ret = sys_umount2(path, flags);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int unlink(const char *path)
-{
-       int ret = sys_unlink(path);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-       pid_t ret = sys_wait4(pid, status, options, rusage);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
-{
-       pid_t ret = sys_waitpid(pid, status, options);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
-       pid_t ret = sys_wait(status);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-ssize_t write(int fd, const void *buf, size_t count)
-{
-       ssize_t ret = sys_write(fd, buf, count);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-/* some size-optimized reimplementations of a few common str* and mem*
- * functions. They're marked static, except memcpy() and raise() which are used
- * by libgcc on ARM, so they are marked weak instead in order not to cause an
- * error when building a program made of multiple files (not recommended).
- */
-
-static __attribute__((unused))
-void *memmove(void *dst, const void *src, size_t len)
-{
-       ssize_t pos = (dst <= src) ? -1 : (long)len;
-       void *ret = dst;
-
-       while (len--) {
-               pos += (dst <= src) ? 1 : -1;
-               ((char *)dst)[pos] = ((char *)src)[pos];
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-void *memset(void *dst, int b, size_t len)
-{
-       char *p = dst;
-
-       while (len--)
-               *(p++) = b;
-       return dst;
-}
-
-static __attribute__((unused))
-int memcmp(const void *s1, const void *s2, size_t n)
-{
-       size_t ofs = 0;
-       char c1 = 0;
-
-       while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
-               ofs++;
-       }
-       return c1;
-}
-
-static __attribute__((unused))
-char *strcpy(char *dst, const char *src)
-{
-       char *ret = dst;
-
-       while ((*dst++ = *src++));
-       return ret;
-}
-
-static __attribute__((unused))
-char *strchr(const char *s, int c)
-{
-       while (*s) {
-               if (*s == (char)c)
-                       return (char *)s;
-               s++;
-       }
-       return NULL;
-}
-
-static __attribute__((unused))
-char *strrchr(const char *s, int c)
-{
-       const char *ret = NULL;
-
-       while (*s) {
-               if (*s == (char)c)
-                       ret = s;
-               s++;
-       }
-       return (char *)ret;
-}
-
-static __attribute__((unused))
-size_t nolibc_strlen(const char *str)
-{
-       size_t len;
-
-       for (len = 0; str[len]; len++);
-       return len;
-}
-
-#define strlen(str) ({                          \
-       __builtin_constant_p((str)) ?           \
-               __builtin_strlen((str)) :       \
-               nolibc_strlen((str));           \
-})
-
-static __attribute__((unused))
-int isdigit(int c)
-{
-       return (unsigned int)(c - '0') <= 9;
-}
-
-static __attribute__((unused))
-long atol(const char *s)
-{
-       unsigned long ret = 0;
-       unsigned long d;
-       int neg = 0;
-
-       if (*s == '-') {
-               neg = 1;
-               s++;
-       }
-
-       while (1) {
-               d = (*s++) - '0';
-               if (d > 9)
-                       break;
-               ret *= 10;
-               ret += d;
-       }
-
-       return neg ? -ret : ret;
-}
-
-static __attribute__((unused))
-int atoi(const char *s)
-{
-       return atol(s);
-}
-
-static __attribute__((unused))
-const char *ltoa(long in)
-{
-       /* large enough for -9223372036854775808 */
-       static char buffer[21];
-       char       *pos = buffer + sizeof(buffer) - 1;
-       int         neg = in < 0;
-       unsigned long n = neg ? -in : in;
-
-       *pos-- = '\0';
-       do {
-               *pos-- = '0' + n % 10;
-               n /= 10;
-               if (pos < buffer)
-                       return pos + 1;
-       } while (n);
-
-       if (neg)
-               *pos-- = '-';
-       return pos + 1;
-}
-
-__attribute__((weak,unused))
-void *memcpy(void *dst, const void *src, size_t len)
-{
-       return memmove(dst, src, len);
-}
-
-/* needed by libgcc for divide by zero */
-__attribute__((weak,unused))
-int raise(int signal)
-{
-       return kill(getpid(), signal);
-}
-
-/* Here come a few helper functions */
-
-static __attribute__((unused))
-void FD_ZERO(fd_set *set)
-{
-       memset(set, 0, sizeof(*set));
-}
-
-static __attribute__((unused))
-void FD_SET(int fd, fd_set *set)
-{
-       if (fd < 0 || fd >= FD_SETSIZE)
-               return;
-       set->fd32[fd / 32] |= 1 << (fd & 31);
-}
-
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-static __attribute__((unused))
-dev_t makedev(unsigned int major, unsigned int minor)
-{
-       return ((major & 0xfff) << 8) | (minor & 0xff);
-}
+#endif /* _NOLIBC_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
new file mode 100644 (file)
index 0000000..ef47e71
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * signal function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SIGNAL_H
+#define _NOLIBC_SIGNAL_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/* This one is not marked static as it's needed by libgcc for divide by zero */
+__attribute__((weak,unused,section(".text.nolibc_raise")))
+int raise(int signal)
+{
+       return sys_kill(sys_getpid(), signal);
+}
+
+#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
new file mode 100644 (file)
index 0000000..1747ae1
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Standard definitions and types for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STD_H
+#define _NOLIBC_STD_H
+
+/* Declare a few quite common macros and types that usually are in stdlib.h,
+ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place
+ * integer type definitions and generic macros here, but avoid OS-specific and
+ * syscall-specific stuff, as this file is expected to be included very early.
+ */
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+/* stdint types */
+typedef unsigned char       uint8_t;
+typedef   signed char        int8_t;
+typedef unsigned short     uint16_t;
+typedef   signed short      int16_t;
+typedef unsigned int       uint32_t;
+typedef   signed int        int32_t;
+typedef unsigned long long uint64_t;
+typedef   signed long long  int64_t;
+typedef unsigned long        size_t;
+typedef   signed long       ssize_t;
+typedef unsigned long     uintptr_t;
+typedef   signed long      intptr_t;
+typedef   signed long     ptrdiff_t;
+
+/* those are commonly provided by sys/types.h */
+typedef unsigned int          dev_t;
+typedef unsigned long         ino_t;
+typedef unsigned int         mode_t;
+typedef   signed int          pid_t;
+typedef unsigned int          uid_t;
+typedef unsigned int          gid_t;
+typedef unsigned long       nlink_t;
+typedef   signed long         off_t;
+typedef   signed long     blksize_t;
+typedef   signed long      blkcnt_t;
+typedef   signed long        time_t;
+
+#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
new file mode 100644 (file)
index 0000000..15dedf8
--- /dev/null
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * minimal stdio function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDIO_H
+#define _NOLIBC_STDIO_H
+
+#include <stdarg.h>
+
+#include "std.h"
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+#include "sys.h"
+#include "stdlib.h"
+#include "string.h"
+
+#ifndef EOF
+#define EOF (-1)
+#endif
+
+/* just define FILE as a non-empty type */
+typedef struct FILE {
+       char dummy[1];
+} FILE;
+
+/* We define the 3 common stdio files as constant invalid pointers that
+ * are easily recognized.
+ */
+static __attribute__((unused)) FILE* const stdin  = (FILE*)-3;
+static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
+static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+
+/* getc(), fgetc(), getchar() */
+
+#define getc(stream) fgetc(stream)
+
+static __attribute__((unused))
+int fgetc(FILE* stream)
+{
+       unsigned char ch;
+       int fd;
+
+       if (stream < stdin || stream > stderr)
+               return EOF;
+
+       fd = 3 + (long)stream;
+
+       if (read(fd, &ch, 1) <= 0)
+               return EOF;
+       return ch;
+}
+
+static __attribute__((unused))
+int getchar(void)
+{
+       return fgetc(stdin);
+}
+
+
+/* putc(), fputc(), putchar() */
+
+#define putc(c, stream) fputc(c, stream)
+
+static __attribute__((unused))
+int fputc(int c, FILE* stream)
+{
+       unsigned char ch = c;
+       int fd;
+
+       if (stream < stdin || stream > stderr)
+               return EOF;
+
+       fd = 3 + (long)stream;
+
+       if (write(fd, &ch, 1) <= 0)
+               return EOF;
+       return ch;
+}
+
+static __attribute__((unused))
+int putchar(int c)
+{
+       return fputc(c, stdout);
+}
+
+
+/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */
+
+/* internal fwrite()-like function which only takes a size and returns 0 on
+ * success or EOF on error. It automatically retries on short writes.
+ */
+static __attribute__((unused))
+int _fwrite(const void *buf, size_t size, FILE *stream)
+{
+       ssize_t ret;
+       int fd;
+
+       if (stream < stdin || stream > stderr)
+               return EOF;
+
+       fd = 3 + (long)stream;
+
+       while (size) {
+               ret = write(fd, buf, size);
+               if (ret <= 0)
+                       return EOF;
+               size -= ret;
+               buf += ret;
+       }
+       return 0;
+}
+
+static __attribute__((unused))
+size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream)
+{
+       size_t written;
+
+       for (written = 0; written < nmemb; written++) {
+               if (_fwrite(s, size, stream) != 0)
+                       break;
+               s += size;
+       }
+       return written;
+}
+
+static __attribute__((unused))
+int fputs(const char *s, FILE *stream)
+{
+       return _fwrite(s, strlen(s), stream);
+}
+
+static __attribute__((unused))
+int puts(const char *s)
+{
+       if (fputs(s, stdout) == EOF)
+               return EOF;
+       return putchar('\n');
+}
+
+
+/* fgets() */
+static __attribute__((unused))
+char *fgets(char *s, int size, FILE *stream)
+{
+       int ofs;
+       int c;
+
+       for (ofs = 0; ofs + 1 < size;) {
+               c = fgetc(stream);
+               if (c == EOF)
+                       break;
+               s[ofs++] = c;
+               if (c == '\n')
+                       break;
+       }
+       if (ofs < size)
+               s[ofs] = 0;
+       return ofs ? s : NULL;
+}
+
+
+/* minimal vfprintf(). It supports the following formats:
+ *  - %[l*]{d,u,c,x,p}
+ *  - %s
+ *  - unknown modifiers are ignored.
+ */
+static __attribute__((unused))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+       char escape, lpref, c;
+       unsigned long long v;
+       unsigned int written;
+       size_t len, ofs;
+       char tmpbuf[21];
+       const char *outstr;
+
+       written = ofs = escape = lpref = 0;
+       while (1) {
+               c = fmt[ofs++];
+
+               if (escape) {
+                       /* we're in an escape sequence, ofs == 1 */
+                       escape = 0;
+                       if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
+                               char *out = tmpbuf;
+
+                               if (c == 'p')
+                                       v = va_arg(args, unsigned long);
+                               else if (lpref) {
+                                       if (lpref > 1)
+                                               v = va_arg(args, unsigned long long);
+                                       else
+                                               v = va_arg(args, unsigned long);
+                               } else
+                                       v = va_arg(args, unsigned int);
+
+                               if (c == 'd') {
+                                       /* sign-extend the value */
+                                       if (lpref == 0)
+                                               v = (long long)(int)v;
+                                       else if (lpref == 1)
+                                               v = (long long)(long)v;
+                               }
+
+                               switch (c) {
+                               case 'c':
+                                       out[0] = v;
+                                       out[1] = 0;
+                                       break;
+                               case 'd':
+                                       i64toa_r(v, out);
+                                       break;
+                               case 'u':
+                                       u64toa_r(v, out);
+                                       break;
+                               case 'p':
+                                       *(out++) = '0';
+                                       *(out++) = 'x';
+                                       /* fall through */
+                               default: /* 'x' and 'p' above */
+                                       u64toh_r(v, out);
+                                       break;
+                               }
+                               outstr = tmpbuf;
+                       }
+                       else if (c == 's') {
+                               outstr = va_arg(args, char *);
+                               if (!outstr)
+                                       outstr="(null)";
+                       }
+                       else if (c == '%') {
+                               /* queue it verbatim */
+                               continue;
+                       }
+                       else {
+                               /* modifiers or final 0 */
+                               if (c == 'l') {
+                                       /* long format prefix, maintain the escape */
+                                       lpref++;
+                               }
+                               escape = 1;
+                               goto do_escape;
+                       }
+                       len = strlen(outstr);
+                       goto flush_str;
+               }
+
+               /* not an escape sequence */
+               if (c == 0 || c == '%') {
+                       /* flush pending data on escape or end */
+                       escape = 1;
+                       lpref = 0;
+                       outstr = fmt;
+                       len = ofs - 1;
+               flush_str:
+                       if (_fwrite(outstr, len, stream) != 0)
+                               break;
+
+                       written += len;
+               do_escape:
+                       if (c == 0)
+                               break;
+                       fmt += ofs;
+                       ofs = 0;
+                       continue;
+               }
+
+               /* literal char, just queue it */
+       }
+       return written;
+}
+
+static __attribute__((unused))
+int fprintf(FILE *stream, const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = vfprintf(stream, fmt, args);
+       va_end(args);
+       return ret;
+}
+
+static __attribute__((unused))
+int printf(const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = vfprintf(stdout, fmt, args);
+       va_end(args);
+       return ret;
+}
+
+static __attribute__((unused))
+void perror(const char *msg)
+{
+       fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+}
+
+#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
new file mode 100644 (file)
index 0000000..8fd32ea
--- /dev/null
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stdlib function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDLIB_H
+#define _NOLIBC_STDLIB_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "string.h"
+
+struct nolibc_heap {
+       size_t  len;
+       char    user_p[] __attribute__((__aligned__));
+};
+
+/* Buffer used to store int-to-ASCII conversions. Will only be implemented if
+ * any of the related functions is implemented. The area is large enough to
+ * store "18446744073709551615" or "-9223372036854775808" and the final zero.
+ */
+static __attribute__((unused)) char itoa_buffer[21];
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+/* must be exported, as it's used by libgcc for various divide functions */
+__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
+void abort(void)
+{
+       sys_kill(sys_getpid(), SIGABRT);
+       for (;;);
+}
+
+static __attribute__((unused))
+long atol(const char *s)
+{
+       unsigned long ret = 0;
+       unsigned long d;
+       int neg = 0;
+
+       if (*s == '-') {
+               neg = 1;
+               s++;
+       }
+
+       while (1) {
+               d = (*s++) - '0';
+               if (d > 9)
+                       break;
+               ret *= 10;
+               ret += d;
+       }
+
+       return neg ? -ret : ret;
+}
+
+static __attribute__((unused))
+int atoi(const char *s)
+{
+       return atol(s);
+}
+
+static __attribute__((unused))
+void free(void *ptr)
+{
+       struct nolibc_heap *heap;
+
+       if (!ptr)
+               return;
+
+       heap = container_of(ptr, struct nolibc_heap, user_p);
+       munmap(heap, heap->len);
+}
+
+/* getenv() tries to find the environment variable named <name> in the
+ * environment array pointed to by global variable "environ" which must be
+ * declared as a char **, and must be terminated by a NULL (it is recommended
+ * to set this variable to the "envp" argument of main()). If the requested
+ * environment variable exists its value is returned otherwise NULL is
+ * returned. getenv() is forcefully inlined so that the reference to "environ"
+ * will be dropped if unused, even at -O0.
+ */
+static __attribute__((unused))
+char *_getenv(const char *name, char **environ)
+{
+       int idx, i;
+
+       if (environ) {
+               for (idx = 0; environ[idx]; idx++) {
+                       for (i = 0; name[i] && name[i] == environ[idx][i];)
+                               i++;
+                       if (!name[i] && environ[idx][i] == '=')
+                               return &environ[idx][i+1];
+               }
+       }
+       return NULL;
+}
+
+static inline __attribute__((unused,always_inline))
+char *getenv(const char *name)
+{
+       extern char **environ;
+       return _getenv(name, environ);
+}
+
+static __attribute__((unused))
+void *malloc(size_t len)
+{
+       struct nolibc_heap *heap;
+
+       /* Always allocate memory with size multiple of 4096. */
+       len  = sizeof(*heap) + len;
+       len  = (len + 4095UL) & -4096UL;
+       heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
+                   -1, 0);
+       if (__builtin_expect(heap == MAP_FAILED, 0))
+               return NULL;
+
+       heap->len = len;
+       return heap->user_p;
+}
+
+static __attribute__((unused))
+void *calloc(size_t size, size_t nmemb)
+{
+       void *orig;
+       size_t res = 0;
+
+       if (__builtin_expect(__builtin_mul_overflow(nmemb, size, &res), 0)) {
+               SET_ERRNO(ENOMEM);
+               return NULL;
+       }
+
+       /*
+        * No need to zero the heap, the MAP_ANONYMOUS in malloc()
+        * already does it.
+        */
+       return malloc(res);
+}
+
+static __attribute__((unused))
+void *realloc(void *old_ptr, size_t new_size)
+{
+       struct nolibc_heap *heap;
+       size_t user_p_len;
+       void *ret;
+
+       if (!old_ptr)
+               return malloc(new_size);
+
+       heap = container_of(old_ptr, struct nolibc_heap, user_p);
+       user_p_len = heap->len - sizeof(*heap);
+       /*
+        * Don't realloc() if @user_p_len >= @new_size, this block of
+        * memory is still enough to handle the @new_size. Just return
+        * the same pointer.
+        */
+       if (user_p_len >= new_size)
+               return old_ptr;
+
+       ret = malloc(new_size);
+       if (__builtin_expect(!ret, 0))
+               return NULL;
+
+       memcpy(ret, heap->user_p, heap->len);
+       munmap(heap, heap->len);
+       return ret;
+}
+
+/* Converts the unsigned long integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The
+ * buffer is filled from the first byte, and the number of characters emitted
+ * (not counting the trailing zero) is returned. The function is constructed
+ * in a way to optimize the code size and avoid any divide that could add a
+ * dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoh_r(unsigned long in, char *buffer)
+{
+       signed char pos = (~0UL > 0xfffffffful) ? 60 : 28;
+       int digits = 0;
+       int dig;
+
+       do {
+               dig = in >> pos;
+               in -= (uint64_t)dig << pos;
+               pos -= 4;
+               if (dig || digits || pos < 0) {
+                       if (dig > 9)
+                               dig += 'a' - '0' - 10;
+                       buffer[digits++] = '0' + dig;
+               }
+       } while (pos >= 0);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* converts unsigned long <in> to an hex string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoh(unsigned long in)
+{
+       utoh_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* Converts the unsigned long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for
+ * 4294967295 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ * The function is constructed in a way to optimize the code size and avoid
+ * any divide that could add a dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoa_r(unsigned long in, char *buffer)
+{
+       unsigned long lim;
+       int digits = 0;
+       int pos = (~0UL > 0xfffffffful) ? 19 : 9;
+       int dig;
+
+       do {
+               for (dig = 0, lim = 1; dig < pos; dig++)
+                       lim *= 10;
+
+               if (digits || in >= lim || !pos) {
+                       for (dig = 0; in >= lim; dig++)
+                               in -= lim;
+                       buffer[digits++] = '0' + dig;
+               }
+       } while (pos--);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* Converts the signed long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for
+ * -2147483648 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ */
+static __attribute__((unused))
+int itoa_r(long in, char *buffer)
+{
+       char *ptr = buffer;
+       int len = 0;
+
+       if (in < 0) {
+               in = -in;
+               *(ptr++) = '-';
+               len++;
+       }
+       len += utoa_r(in, ptr);
+       return len;
+}
+
+/* for historical compatibility, same as above but returns the pointer to the
+ * buffer.
+ */
+static inline __attribute__((unused))
+char *ltoa_r(long in, char *buffer)
+{
+       itoa_r(in, buffer);
+       return buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *itoa(long in)
+{
+       itoa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string. Same as above, for compatibility.
+ */
+static inline __attribute__((unused))
+char *ltoa(long in)
+{
+       itoa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* converts unsigned long integer <in> to a string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoa(unsigned long in)
+{
+       utoa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toh_r(uint64_t in, char *buffer)
+{
+       signed char pos = 60;
+       int digits = 0;
+       int dig;
+
+       do {
+               if (sizeof(long) >= 8) {
+                       dig = (in >> pos) & 0xF;
+               } else {
+                       /* 32-bit platforms: avoid a 64-bit shift */
+                       uint32_t d = (pos >= 32) ? (in >> 32) : in;
+                       dig = (d >> (pos & 31)) & 0xF;
+               }
+               if (dig > 9)
+                       dig += 'a' - '0' - 10;
+               pos -= 4;
+               if (dig || digits || pos < 0)
+                       buffer[digits++] = '0' + dig;
+       } while (pos >= 0);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* converts uint64_t <in> to an hex string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toh(uint64_t in)
+{
+       u64toh_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toa_r(uint64_t in, char *buffer)
+{
+       unsigned long long lim;
+       int digits = 0;
+       int pos = 19; /* start with the highest possible digit */
+       int dig;
+
+       do {
+               for (dig = 0, lim = 1; dig < pos; dig++)
+                       lim *= 10;
+
+               if (digits || in >= lim || !pos) {
+                       for (dig = 0; in >= lim; dig++)
+                               in -= lim;
+                       buffer[digits++] = '0' + dig;
+               }
+       } while (pos--);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* Converts the signed 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned.
+ */
+static __attribute__((unused))
+int i64toa_r(int64_t in, char *buffer)
+{
+       char *ptr = buffer;
+       int len = 0;
+
+       if (in < 0) {
+               in = -in;
+               *(ptr++) = '-';
+               len++;
+       }
+       len += u64toa_r(in, ptr);
+       return len;
+}
+
+/* converts int64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *i64toa(int64_t in)
+{
+       i64toa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* converts uint64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toa(uint64_t in)
+{
+       u64toa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
new file mode 100644 (file)
index 0000000..bef35be
--- /dev/null
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * string function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STRING_H
+#define _NOLIBC_STRING_H
+
+#include "std.h"
+
+static void *malloc(size_t len);
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+       size_t ofs = 0;
+       char c1 = 0;
+
+       while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
+               ofs++;
+       }
+       return c1;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_up(void *dst, const void *src, size_t len)
+{
+       size_t pos = 0;
+
+       while (pos < len) {
+               ((char *)dst)[pos] = ((const char *)src)[pos];
+               pos++;
+       }
+       return dst;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_down(void *dst, const void *src, size_t len)
+{
+       while (len) {
+               len--;
+               ((char *)dst)[len] = ((const char *)src)[len];
+       }
+       return dst;
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memmove")))
+void *memmove(void *dst, const void *src, size_t len)
+{
+       size_t dir, pos;
+
+       pos = len;
+       dir = -1;
+
+       if (dst < src) {
+               pos = -1;
+               dir = 1;
+       }
+
+       while (len) {
+               pos += dir;
+               ((char *)dst)[pos] = ((const char *)src)[pos];
+               len--;
+       }
+       return dst;
+}
+
+/* must be exported, as it's used by libgcc on ARM */
+__attribute__((weak,unused,section(".text.nolibc_memcpy")))
+void *memcpy(void *dst, const void *src, size_t len)
+{
+       return _nolibc_memcpy_up(dst, src, len);
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memset")))
+void *memset(void *dst, int b, size_t len)
+{
+       char *p = dst;
+
+       while (len--)
+               *(p++) = b;
+       return dst;
+}
+
+static __attribute__((unused))
+char *strchr(const char *s, int c)
+{
+       while (*s) {
+               if (*s == (char)c)
+                       return (char *)s;
+               s++;
+       }
+       return NULL;
+}
+
+static __attribute__((unused))
+int strcmp(const char *a, const char *b)
+{
+       unsigned int c;
+       int diff;
+
+       while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+               ;
+       return diff;
+}
+
+static __attribute__((unused))
+char *strcpy(char *dst, const char *src)
+{
+       char *ret = dst;
+
+       while ((*dst++ = *src++));
+       return ret;
+}
+
+/* this function is only used with arguments that are not constants or when
+ * it's not known because optimizations are disabled.
+ */
+static __attribute__((unused))
+size_t nolibc_strlen(const char *str)
+{
+       size_t len;
+
+       for (len = 0; str[len]; len++);
+       return len;
+}
+
+/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and
+ * the two branches, then will rely on an external definition of strlen().
+ */
+#if defined(__OPTIMIZE__)
+#define strlen(str) ({                          \
+       __builtin_constant_p((str)) ?           \
+               __builtin_strlen((str)) :       \
+               nolibc_strlen((str));           \
+})
+#else
+#define strlen(str) nolibc_strlen((str))
+#endif
+
+static __attribute__((unused))
+size_t strnlen(const char *str, size_t maxlen)
+{
+       size_t len;
+
+       for (len = 0; (len < maxlen) && str[len]; len++);
+       return len;
+}
+
+static __attribute__((unused))
+char *strdup(const char *str)
+{
+       size_t len;
+       char *ret;
+
+       len = strlen(str);
+       ret = malloc(len + 1);
+       if (__builtin_expect(ret != NULL, 1))
+               memcpy(ret, str, len + 1);
+
+       return ret;
+}
+
+static __attribute__((unused))
+char *strndup(const char *str, size_t maxlen)
+{
+       size_t len;
+       char *ret;
+
+       len = strnlen(str, maxlen);
+       ret = malloc(len + 1);
+       if (__builtin_expect(ret != NULL, 1)) {
+               memcpy(ret, str, len);
+               ret[len] = '\0';
+       }
+
+       return ret;
+}
+
+static __attribute__((unused))
+size_t strlcat(char *dst, const char *src, size_t size)
+{
+       size_t len;
+       char c;
+
+       for (len = 0; dst[len]; len++)
+               ;
+
+       for (;;) {
+               c = *src;
+               if (len < size)
+                       dst[len] = c;
+               if (!c)
+                       break;
+               len++;
+               src++;
+       }
+
+       return len;
+}
+
+static __attribute__((unused))
+size_t strlcpy(char *dst, const char *src, size_t size)
+{
+       size_t len;
+       char c;
+
+       for (len = 0;;) {
+               c = src[len];
+               if (len < size)
+                       dst[len] = c;
+               if (!c)
+                       break;
+               len++;
+       }
+       return len;
+}
+
+static __attribute__((unused))
+char *strncat(char *dst, const char *src, size_t size)
+{
+       char *orig = dst;
+
+       while (*dst)
+               dst++;
+
+       while (size && (*dst = *src)) {
+               src++;
+               dst++;
+               size--;
+       }
+
+       *dst = 0;
+       return orig;
+}
+
+static __attribute__((unused))
+int strncmp(const char *a, const char *b, size_t size)
+{
+       unsigned int c;
+       int diff = 0;
+
+       while (size-- &&
+              !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+               ;
+
+       return diff;
+}
+
+static __attribute__((unused))
+char *strncpy(char *dst, const char *src, size_t size)
+{
+       size_t len;
+
+       for (len = 0; len < size; len++)
+               if ((dst[len] = *src))
+                       src++;
+       return dst;
+}
+
+static __attribute__((unused))
+char *strrchr(const char *s, int c)
+{
+       const char *ret = NULL;
+
+       while (*s) {
+               if (*s == (char)c)
+                       ret = s;
+               s++;
+       }
+       return (char *)ret;
+}
+
+#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
new file mode 100644 (file)
index 0000000..0849107
--- /dev/null
@@ -0,0 +1,1247 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Syscall definitions for NOLIBC (those in man(2))
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SYS_H
+#define _NOLIBC_SYS_H
+
+#include <stdarg.h>
+#include "std.h"
+
+/* system includes */
+#include <asm/unistd.h>
+#include <asm/signal.h>  // for SIGCHLD
+#include <asm/ioctls.h>
+#include <asm/mman.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <linux/time.h>
+
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+
+
+/* Functions in this file only describe syscalls. They're declared static so
+ * that the compiler usually decides to inline them while still being allowed
+ * to pass a pointer to one of their instances. Each syscall exists in two
+ * versions:
+ *   - the "internal" ones, which matches the raw syscall interface at the
+ *     kernel level, which may sometimes slightly differ from the documented
+ *     libc-level ones. For example most of them return either a valid value
+ *     or -errno. All of these are prefixed with "sys_". They may be called
+ *     by non-portable applications if desired.
+ *
+ *   - the "exported" ones, whose interface must closely match the one
+ *     documented in man(2), that applications are supposed to expect. These
+ *     ones rely on the internal ones, and set errno.
+ *
+ * Each syscall will be defined with the two functions, sorted in alphabetical
+ * order applied to the exported names.
+ *
+ * In case of doubt about the relevance of a function here, only those which
+ * set errno should be defined here. Wrappers like those appearing in man(3)
+ * should not be placed here.
+ */
+
+
+/*
+ * int brk(void *addr);
+ * void *sbrk(intptr_t inc)
+ */
+
+static __attribute__((unused))
+void *sys_brk(void *addr)
+{
+       return (void *)my_syscall1(__NR_brk, addr);
+}
+
+static __attribute__((unused))
+int brk(void *addr)
+{
+       void *ret = sys_brk(addr);
+
+       if (!ret) {
+               SET_ERRNO(ENOMEM);
+               return -1;
+       }
+       return 0;
+}
+
+static __attribute__((unused))
+void *sbrk(intptr_t inc)
+{
+       void *ret;
+
+       /* first call to find current end */
+       if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+               return ret + inc;
+
+       SET_ERRNO(ENOMEM);
+       return (void *)-1;
+}
+
+
+/*
+ * int chdir(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chdir(const char *path)
+{
+       return my_syscall1(__NR_chdir, path);
+}
+
+static __attribute__((unused))
+int chdir(const char *path)
+{
+       int ret = sys_chdir(path);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int chmod(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_chmod(const char *path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+       return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
+#elif defined(__NR_chmod)
+       return my_syscall2(__NR_chmod, path, mode);
+#else
+#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
+#endif
+}
+
+static __attribute__((unused))
+int chmod(const char *path, mode_t mode)
+{
+       int ret = sys_chmod(path, mode);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int chown(const char *path, uid_t owner, gid_t group);
+ */
+
+static __attribute__((unused))
+int sys_chown(const char *path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+       return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
+#elif defined(__NR_chown)
+       return my_syscall3(__NR_chown, path, owner, group);
+#else
+#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
+#endif
+}
+
+static __attribute__((unused))
+int chown(const char *path, uid_t owner, gid_t group)
+{
+       int ret = sys_chown(path, owner, group);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int chroot(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chroot(const char *path)
+{
+       return my_syscall1(__NR_chroot, path);
+}
+
+static __attribute__((unused))
+int chroot(const char *path)
+{
+       int ret = sys_chroot(path);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int close(int fd);
+ */
+
+static __attribute__((unused))
+int sys_close(int fd)
+{
+       return my_syscall1(__NR_close, fd);
+}
+
+static __attribute__((unused))
+int close(int fd)
+{
+       int ret = sys_close(fd);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int dup(int fd);
+ */
+
+static __attribute__((unused))
+int sys_dup(int fd)
+{
+       return my_syscall1(__NR_dup, fd);
+}
+
+static __attribute__((unused))
+int dup(int fd)
+{
+       int ret = sys_dup(fd);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int dup2(int old, int new);
+ */
+
+static __attribute__((unused))
+int sys_dup2(int old, int new)
+{
+#ifdef __NR_dup3
+       return my_syscall3(__NR_dup3, old, new, 0);
+#elif defined(__NR_dup2)
+       return my_syscall2(__NR_dup2, old, new);
+#else
+#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+#endif
+}
+
+static __attribute__((unused))
+int dup2(int old, int new)
+{
+       int ret = sys_dup2(old, new);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int dup3(int old, int new, int flags);
+ */
+
+#ifdef __NR_dup3
+static __attribute__((unused))
+int sys_dup3(int old, int new, int flags)
+{
+       return my_syscall3(__NR_dup3, old, new, flags);
+}
+
+static __attribute__((unused))
+int dup3(int old, int new, int flags)
+{
+       int ret = sys_dup3(old, new, flags);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+#endif
+
+
+/*
+ * int execve(const char *filename, char *const argv[], char *const envp[]);
+ */
+
+static __attribute__((unused))
+int sys_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       return my_syscall3(__NR_execve, filename, argv, envp);
+}
+
+static __attribute__((unused))
+int execve(const char *filename, char *const argv[], char *const envp[])
+{
+       int ret = sys_execve(filename, argv, envp);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * void exit(int status);
+ */
+
+static __attribute__((noreturn,unused))
+void sys_exit(int status)
+{
+       my_syscall1(__NR_exit, status & 255);
+       while(1); // shut the "noreturn" warnings.
+}
+
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+       sys_exit(status);
+}
+
+
+/*
+ * pid_t fork(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+#ifdef __NR_clone
+       /* note: some archs only have clone() and not fork(). Different archs
+        * have a different API, but most archs have the flags on first arg and
+        * will not use the rest with no other flag.
+        */
+       return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
+#elif defined(__NR_fork)
+       return my_syscall0(__NR_fork);
+#else
+#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+#endif
+}
+
+static __attribute__((unused))
+pid_t fork(void)
+{
+       pid_t ret = sys_fork();
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int fsync(int fd);
+ */
+
+static __attribute__((unused))
+int sys_fsync(int fd)
+{
+       return my_syscall1(__NR_fsync, fd);
+}
+
+static __attribute__((unused))
+int fsync(int fd)
+{
+       int ret = sys_fsync(fd);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int getdents64(int fd, struct linux_dirent64 *dirp, int count);
+ */
+
+static __attribute__((unused))
+int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+       return my_syscall3(__NR_getdents64, fd, dirp, count);
+}
+
+static __attribute__((unused))
+int getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+       int ret = sys_getdents64(fd, dirp, count);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t getpgid(pid_t pid);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgid(pid_t pid)
+{
+       return my_syscall1(__NR_getpgid, pid);
+}
+
+static __attribute__((unused))
+pid_t getpgid(pid_t pid)
+{
+       pid_t ret = sys_getpgid(pid);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t getpgrp(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgrp(void)
+{
+       return sys_getpgid(0);
+}
+
+static __attribute__((unused))
+pid_t getpgrp(void)
+{
+       return sys_getpgrp();
+}
+
+
+/*
+ * pid_t getpid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpid(void)
+{
+       return my_syscall0(__NR_getpid);
+}
+
+static __attribute__((unused))
+pid_t getpid(void)
+{
+       return sys_getpid();
+}
+
+
+/*
+ * pid_t getppid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getppid(void)
+{
+       return my_syscall0(__NR_getppid);
+}
+
+static __attribute__((unused))
+pid_t getppid(void)
+{
+       return sys_getppid();
+}
+
+
+/*
+ * pid_t gettid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_gettid(void)
+{
+       return my_syscall0(__NR_gettid);
+}
+
+static __attribute__((unused))
+pid_t gettid(void)
+{
+       return sys_gettid();
+}
+
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       return my_syscall2(__NR_gettimeofday, tv, tz);
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       int ret = sys_gettimeofday(tv, tz);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int ioctl(int fd, unsigned long req, void *value);
+ */
+
+static __attribute__((unused))
+int sys_ioctl(int fd, unsigned long req, void *value)
+{
+       return my_syscall3(__NR_ioctl, fd, req, value);
+}
+
+static __attribute__((unused))
+int ioctl(int fd, unsigned long req, void *value)
+{
+       int ret = sys_ioctl(fd, req, value);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+/*
+ * int kill(pid_t pid, int signal);
+ */
+
+static __attribute__((unused))
+int sys_kill(pid_t pid, int signal)
+{
+       return my_syscall2(__NR_kill, pid, signal);
+}
+
+static __attribute__((unused))
+int kill(pid_t pid, int signal)
+{
+       int ret = sys_kill(pid, signal);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int link(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_link(const char *old, const char *new)
+{
+#ifdef __NR_linkat
+       return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
+#elif defined(__NR_link)
+       return my_syscall2(__NR_link, old, new);
+#else
+#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
+#endif
+}
+
+static __attribute__((unused))
+int link(const char *old, const char *new)
+{
+       int ret = sys_link(old, new);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * off_t lseek(int fd, off_t offset, int whence);
+ */
+
+static __attribute__((unused))
+off_t sys_lseek(int fd, off_t offset, int whence)
+{
+       return my_syscall3(__NR_lseek, fd, offset, whence);
+}
+
+static __attribute__((unused))
+off_t lseek(int fd, off_t offset, int whence)
+{
+       off_t ret = sys_lseek(fd, offset, whence);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int mkdir(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_mkdir(const char *path, mode_t mode)
+{
+#ifdef __NR_mkdirat
+       return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
+#elif defined(__NR_mkdir)
+       return my_syscall2(__NR_mkdir, path, mode);
+#else
+#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
+#endif
+}
+
+static __attribute__((unused))
+int mkdir(const char *path, mode_t mode)
+{
+       int ret = sys_mkdir(path, mode);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int mknod(const char *path, mode_t mode, dev_t dev);
+ */
+
+static __attribute__((unused))
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
+{
+#ifdef __NR_mknodat
+       return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
+#elif defined(__NR_mknod)
+       return my_syscall3(__NR_mknod, path, mode, dev);
+#else
+#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
+#endif
+}
+
+static __attribute__((unused))
+int mknod(const char *path, mode_t mode, dev_t dev)
+{
+       int ret = sys_mknod(path, mode, dev);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+#ifndef MAP_SHARED
+#define MAP_SHARED             0x01    /* Share changes */
+#define MAP_PRIVATE            0x02    /* Changes are private */
+#define MAP_SHARED_VALIDATE    0x03    /* share + validate extension flags */
+#endif
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+              off_t offset)
+{
+#ifndef my_syscall6
+       /* Function not implemented. */
+       return -ENOSYS;
+#else
+
+       int n;
+
+#if defined(__i386__)
+       n = __NR_mmap2;
+       offset >>= 12;
+#else
+       n = __NR_mmap;
+#endif
+
+       return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+#endif
+}
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+       void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+       if ((unsigned long)ret >= -4095UL) {
+               SET_ERRNO(-(long)ret);
+               ret = MAP_FAILED;
+       }
+       return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+       return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+       int ret = sys_munmap(addr, length);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+/*
+ * int mount(const char *source, const char *target,
+ *           const char *fstype, unsigned long flags,
+ *           const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+                     unsigned long flags, const void *data)
+{
+       return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+          const char *fst, unsigned long flags,
+          const void *data)
+{
+       int ret = sys_mount(src, tgt, fst, flags, data);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+#ifdef __NR_openat
+       return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+#elif defined(__NR_open)
+       return my_syscall3(__NR_open, path, flags, mode);
+#else
+#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
+#endif
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+       mode_t mode = 0;
+       int ret;
+
+       if (flags & O_CREAT) {
+               va_list args;
+
+               va_start(args, flags);
+               mode = va_arg(args, mode_t);
+               va_end(args);
+       }
+
+       ret = sys_open(path, flags, mode);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int pivot_root(const char *new, const char *old);
+ */
+
+static __attribute__((unused))
+int sys_pivot_root(const char *new, const char *old)
+{
+       return my_syscall2(__NR_pivot_root, new, old);
+}
+
+static __attribute__((unused))
+int pivot_root(const char *new, const char *old)
+{
+       int ret = sys_pivot_root(new, old);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+       struct timespec t;
+
+       if (timeout >= 0) {
+               t.tv_sec  = timeout / 1000;
+               t.tv_nsec = (timeout % 1000) * 1000000;
+       }
+       return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
+#elif defined(__NR_poll)
+       return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+       int ret = sys_poll(fds, nfds, timeout);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * ssize_t read(int fd, void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_read(int fd, void *buf, size_t count)
+{
+       return my_syscall3(__NR_read, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t read(int fd, void *buf, size_t count)
+{
+       ssize_t ret = sys_read(fd, buf, count);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+       return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+       int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int sched_yield(void);
+ */
+
+static __attribute__((unused))
+int sys_sched_yield(void)
+{
+       return my_syscall0(__NR_sched_yield);
+}
+
+static __attribute__((unused))
+int sched_yield(void)
+{
+       int ret = sys_sched_yield();
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ *            fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+       struct sel_arg_struct {
+               unsigned long n;
+               fd_set *r, *w, *e;
+               struct timeval *t;
+       } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+       return my_syscall1(__NR_select, &arg);
+#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
+       struct timespec t;
+
+       if (timeout) {
+               t.tv_sec  = timeout->tv_sec;
+               t.tv_nsec = timeout->tv_usec * 1000;
+       }
+       return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#elif defined(__NR__newselect) || defined(__NR_select)
+#ifndef __NR__newselect
+#define __NR__newselect __NR_select
+#endif
+       return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#else
+#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+       int ret = sys_select(nfds, rfds, wfds, efds, timeout);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int setpgid(pid_t pid, pid_t pgid);
+ */
+
+static __attribute__((unused))
+int sys_setpgid(pid_t pid, pid_t pgid)
+{
+       return my_syscall2(__NR_setpgid, pid, pgid);
+}
+
+static __attribute__((unused))
+int setpgid(pid_t pid, pid_t pgid)
+{
+       int ret = sys_setpgid(pid, pgid);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t setsid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_setsid(void)
+{
+       return my_syscall0(__NR_setsid);
+}
+
+static __attribute__((unused))
+pid_t setsid(void)
+{
+       pid_t ret = sys_setsid();
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int stat(const char *path, struct stat *buf);
+ * Warning: the struct stat's layout is arch-dependent.
+ */
+
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+       struct sys_stat_struct stat;
+       long ret;
+
+#ifdef __NR_newfstatat
+       /* only solution for arm64 */
+       ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
+#elif defined(__NR_stat)
+       ret = my_syscall2(__NR_stat, path, &stat);
+#else
+#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
+#endif
+       buf->st_dev     = stat.st_dev;
+       buf->st_ino     = stat.st_ino;
+       buf->st_mode    = stat.st_mode;
+       buf->st_nlink   = stat.st_nlink;
+       buf->st_uid     = stat.st_uid;
+       buf->st_gid     = stat.st_gid;
+       buf->st_rdev    = stat.st_rdev;
+       buf->st_size    = stat.st_size;
+       buf->st_blksize = stat.st_blksize;
+       buf->st_blocks  = stat.st_blocks;
+       buf->st_atime   = stat.st_atime;
+       buf->st_mtime   = stat.st_mtime;
+       buf->st_ctime   = stat.st_ctime;
+       return ret;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+       int ret = sys_stat(path, buf);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int symlink(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_symlink(const char *old, const char *new)
+{
+#ifdef __NR_symlinkat
+       return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
+#elif defined(__NR_symlink)
+       return my_syscall2(__NR_symlink, old, new);
+#else
+#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
+#endif
+}
+
+static __attribute__((unused))
+int symlink(const char *old, const char *new)
+{
+       int ret = sys_symlink(old, new);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * mode_t umask(mode_t mode);
+ */
+
+static __attribute__((unused))
+mode_t sys_umask(mode_t mode)
+{
+       return my_syscall1(__NR_umask, mode);
+}
+
+static __attribute__((unused))
+mode_t umask(mode_t mode)
+{
+       return sys_umask(mode);
+}
+
+
+/*
+ * int umount2(const char *path, int flags);
+ */
+
+static __attribute__((unused))
+int sys_umount2(const char *path, int flags)
+{
+       return my_syscall2(__NR_umount2, path, flags);
+}
+
+static __attribute__((unused))
+int umount2(const char *path, int flags)
+{
+       int ret = sys_umount2(path, flags);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int unlink(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_unlink(const char *path)
+{
+#ifdef __NR_unlinkat
+       return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
+#elif defined(__NR_unlink)
+       return my_syscall1(__NR_unlink, path);
+#else
+#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
+#endif
+}
+
+static __attribute__((unused))
+int unlink(const char *path)
+{
+       int ret = sys_unlink(path);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t wait(int *status);
+ * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ */
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+       return my_syscall4(__NR_wait4, pid, status, options, rusage);
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+       pid_t ret = sys_wait4(-1, status, 0, NULL);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+       pid_t ret = sys_wait4(pid, status, options, rusage);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+       pid_t ret = sys_wait4(pid, status, options, NULL);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * ssize_t write(int fd, const void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_write(int fd, const void *buf, size_t count)
+{
+       return my_syscall3(__NR_write, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t write(int fd, const void *buf, size_t count)
+{
+       ssize_t ret = sys_write(fd, buf, count);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
new file mode 100644 (file)
index 0000000..d18b766
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TIME_H
+#define _NOLIBC_TIME_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+static __attribute__((unused))
+time_t time(time_t *tptr)
+{
+       struct timeval tv;
+
+       /* note, cannot fail here */
+       sys_gettimeofday(&tv, NULL);
+
+       if (tptr)
+               *tptr = tv.tv_sec;
+       return tv.tv_sec;
+}
+
+#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
new file mode 100644 (file)
index 0000000..9599970
--- /dev/null
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Special types used by various syscalls for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TYPES_H
+#define _NOLIBC_TYPES_H
+
+#include "std.h"
+#include <linux/time.h>
+
+
+/* Only the generic macros and types may be defined here. The arch-specific
+ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
+ * the layout of sys_stat_struct must not be defined here.
+ */
+
+/* stat flags (WARNING, octal here) */
+#define S_IFDIR        0040000
+#define S_IFCHR        0020000
+#define S_IFBLK        0060000
+#define S_IFREG        0100000
+#define S_IFIFO        0010000
+#define S_IFLNK        0120000
+#define S_IFSOCK       0140000
+#define S_IFMT         0170000
+
+#define S_ISDIR(mode)  (((mode) & S_IFDIR)  == S_IFDIR)
+#define S_ISCHR(mode)  (((mode) & S_IFCHR)  == S_IFCHR)
+#define S_ISBLK(mode)  (((mode) & S_IFBLK)  == S_IFBLK)
+#define S_ISREG(mode)  (((mode) & S_IFREG)  == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFIFO)  == S_IFIFO)
+#define S_ISLNK(mode)  (((mode) & S_IFLNK)  == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+
+/* dirent types */
+#define DT_UNKNOWN     0x0
+#define DT_FIFO        0x1
+#define DT_CHR         0x2
+#define DT_DIR         0x4
+#define DT_BLK         0x6
+#define DT_REG         0x8
+#define DT_LNK         0xa
+#define DT_SOCK        0xc
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE     256
+#endif
+
+/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
+ * values.
+ */
+#ifndef PATH_MAX
+#define PATH_MAX       4096
+#endif
+
+#ifndef MAXPATHLEN
+#define MAXPATHLEN     (PATH_MAX)
+#endif
+
+/* Special FD used by all the *at functions */
+#ifndef AT_FDCWD
+#define AT_FDCWD       (-100)
+#endif
+
+/* whence values for lseek() */
+#define SEEK_SET       0
+#define SEEK_CUR       1
+#define SEEK_END       2
+
+/* cmd for reboot() */
+#define LINUX_REBOOT_MAGIC1         0xfee1dead
+#define LINUX_REBOOT_MAGIC2         0x28121969
+#define LINUX_REBOOT_CMD_HALT       0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART    0x01234567
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+
+/* Macros used on waitpid()'s return status */
+#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+#define WIFEXITED(status)   (((status) & 0x7f) == 0)
+
+/* waitpid() flags */
+#define WNOHANG      1
+
+/* standard exit() codes */
+#define EXIT_SUCCESS 0
+#define EXIT_FAILURE 1
+
+/* for select() */
+typedef struct {
+       uint32_t fd32[(FD_SETSIZE + 31) / 32];
+} fd_set;
+
+#define FD_CLR(fd, set) do {                                            \
+               fd_set *__set = (set);                                  \
+               int __fd = (fd);                                        \
+               if (__fd >= 0)                                          \
+                       __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \
+       } while (0)
+
+#define FD_SET(fd, set) do {                                            \
+               fd_set *__set = (set);                                  \
+               int __fd = (fd);                                        \
+               if (__fd >= 0)                                          \
+                       __set->fd32[__fd / 32] |= 1U << (__fd & 31);    \
+       } while (0)
+
+#define FD_ISSET(fd, set) ({                                                  \
+               fd_set *__set = (set);                                        \
+               int __fd = (fd);                                              \
+               int __r = 0;                                                  \
+               if (__fd >= 0)                                                \
+                       __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \
+               __r;                                                          \
+       })
+
+#define FD_ZERO(set) do {                                               \
+               fd_set *__set = (set);                                  \
+               int __idx;                                              \
+               for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \
+                       __set->fd32[__idx] = 0;                         \
+       } while (0)
+
+/* for poll() */
+#define POLLIN          0x0001
+#define POLLPRI         0x0002
+#define POLLOUT         0x0004
+#define POLLERR         0x0008
+#define POLLHUP         0x0010
+#define POLLNVAL        0x0020
+
+struct pollfd {
+       int fd;
+       short int events;
+       short int revents;
+};
+
+/* for getdents64() */
+struct linux_dirent64 {
+       uint64_t       d_ino;
+       int64_t        d_off;
+       unsigned short d_reclen;
+       unsigned char  d_type;
+       char           d_name[];
+};
+
+/* needed by wait4() */
+struct rusage {
+       struct timeval ru_utime;
+       struct timeval ru_stime;
+       long   ru_maxrss;
+       long   ru_ixrss;
+       long   ru_idrss;
+       long   ru_isrss;
+       long   ru_minflt;
+       long   ru_majflt;
+       long   ru_nswap;
+       long   ru_inblock;
+       long   ru_oublock;
+       long   ru_msgsnd;
+       long   ru_msgrcv;
+       long   ru_nsignals;
+       long   ru_nvcsw;
+       long   ru_nivcsw;
+};
+
+/* The format of the struct as returned by the libc to the application, which
+ * significantly differs from the format returned by the stat() syscall flavours.
+ */
+struct stat {
+       dev_t     st_dev;     /* ID of device containing file */
+       ino_t     st_ino;     /* inode number */
+       mode_t    st_mode;    /* protection */
+       nlink_t   st_nlink;   /* number of hard links */
+       uid_t     st_uid;     /* user ID of owner */
+       gid_t     st_gid;     /* group ID of owner */
+       dev_t     st_rdev;    /* device ID (if special file) */
+       off_t     st_size;    /* total size, in bytes */
+       blksize_t st_blksize; /* blocksize for file system I/O */
+       blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
+       time_t    st_atime;   /* time of last access */
+       time_t    st_mtime;   /* time of last modification */
+       time_t    st_ctime;   /* time of last status change */
+};
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)(((dev) & 0xff))
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#ifndef container_of
+#define container_of(PTR, TYPE, FIELD) ({                      \
+       __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR);    \
+       (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \
+})
+#endif
+
+#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
new file mode 100644 (file)
index 0000000..1c25e20
--- /dev/null
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * unistd function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_UNISTD_H
+#define _NOLIBC_UNISTD_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+
+static __attribute__((unused))
+int msleep(unsigned int msecs)
+{
+       struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
+
+       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+               return (my_timeval.tv_sec * 1000) +
+                       (my_timeval.tv_usec / 1000) +
+                       !!(my_timeval.tv_usec % 1000);
+       else
+               return 0;
+}
+
+static __attribute__((unused))
+unsigned int sleep(unsigned int seconds)
+{
+       struct timeval my_timeval = { seconds, 0 };
+
+       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+               return my_timeval.tv_sec + !!my_timeval.tv_usec;
+       else
+               return 0;
+}
+
+static __attribute__((unused))
+int usleep(unsigned int usecs)
+{
+       struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
+
+       return sys_select(0, 0, 0, 0, &my_timeval);
+}
+
+static __attribute__((unused))
+int tcsetpgrp(int fd, pid_t pid)
+{
+       return ioctl(fd, TIOCSPGRP, &pid);
+}
+
+#endif /* _NOLIBC_UNISTD_H */
index 91a6fe4e02c08c4b6ac6f1fb91f8f9e3fce85c0f..6a184d260c7f2e17d05831e702410175b18e550e 100644 (file)
@@ -445,7 +445,13 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
                        __u32 type;
-                       __u64 flags;
+                       __u32 ndata;
+                       union {
+#ifndef __KERNEL__
+                               __u64 flags;
+#endif
+                               __u64 data[16];
+                       };
                } system_event;
                /* KVM_EXIT_S390_STSI */
                struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
 #define KVM_CAP_PMU_CAPABILITY 212
 #define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index 39ebf6192016d2671c79c890f0807f2b5aec6d60..9fa75943f2ed177243f29b91629782bcb6a3059e 100644 (file)
@@ -806,9 +806,9 @@ static int option__cmp(const void *va, const void *vb)
 
 static struct option *options__order(const struct option *opts)
 {
-       int nr_opts = 0, len;
+       int nr_opts = 0, nr_group = 0, len;
        const struct option *o = opts;
-       struct option *ordered;
+       struct option *opt, *ordered, *group;
 
        for (o = opts; o->type != OPTION_END; o++)
                ++nr_opts;
@@ -819,7 +819,18 @@ static struct option *options__order(const struct option *opts)
                goto out;
        memcpy(ordered, opts, len);
 
-       qsort(ordered, nr_opts, sizeof(*o), option__cmp);
+       /* sort each option group individually */
+       for (opt = group = ordered; opt->type != OPTION_END; opt++) {
+               if (opt->type == OPTION_GROUP) {
+                       qsort(group, nr_group, sizeof(*opt), option__cmp);
+                       group = opt + 1;
+                       nr_group = 0;
+                       continue;
+               }
+               nr_group++;
+       }
+       qsort(group, nr_group, sizeof(*opt), option__cmp);
+
 out:
        return ordered;
 }
diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore
new file mode 100644 (file)
index 0000000..5d2aeda
--- /dev/null
@@ -0,0 +1,2 @@
+libthermal.so*
+libthermal.pc
diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build
new file mode 100644 (file)
index 0000000..4a892d9
--- /dev/null
@@ -0,0 +1,5 @@
+libthermal-y += commands.o
+libthermal-y += events.o
+libthermal-y += thermal_nl.o
+libthermal-y += sampling.o
+libthermal-y += thermal.o
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
new file mode 100644 (file)
index 0000000..2d0d255
--- /dev/null
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/perf/Makefile
+
+LIBTHERMAL_VERSION = 0
+LIBTHERMAL_PATCHLEVEL = 0
+LIBTHERMAL_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+INCLUDES = \
+-I/usr/include/libnl3 \
+-I$(srctree)/tools/lib/thermal/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
+override CFGLAS += -Wl,-L.
+override CFGLAS += -Wl,-lthermal
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/build/Makefile.include
+
+VERSION_SCRIPT := libthermal.map
+
+PATCHLEVEL    = $(LIBTHERMAL_PATCHLEVEL)
+EXTRAVERSION  = $(LIBTHERMAL_EXTRAVERSION)
+VERSION       = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION)
+
+LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION)
+LIBTHERMAL_A  := $(OUTPUT)libthermal.a
+LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o
+LIBTHERMAL_PC := $(OUTPUT)libthermal.pc
+LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so*
+
+THERMAL_UAPI := include/uapi/linux/thermal.h
+
+$(THERMAL_UAPI): FORCE
+       ln -sf $(srctree)/$@ $(srctree)/tools/$@
+
+$(LIBTHERMAL_IN): FORCE
+       $(Q)$(MAKE) $(build)=libthermal
+
+$(LIBTHERMAL_A): $(LIBTHERMAL_IN)
+       $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN)
+
+$(LIBTHERMAL_SO): $(LIBTHERMAL_IN)
+       $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \
+                                    -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
+       @ln -sf $(@F) $(OUTPUT)libthermal.so
+       @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION)
+
+
+libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC)
+
+all: fixdep
+       $(Q)$(MAKE) libs
+
+clean:
+       $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \
+                *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC)
+
+$(LIBTHERMAL_PC):
+       $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+               -e "s|@LIBDIR@|$(libdir_SQ)|" \
+               -e "s|@VERSION@|$(VERSION)|" \
+               < libthermal.pc.template > $@
+
+define do_install_mkdir
+       if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+       fi
+endef
+
+define do_install
+       if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+       fi;                                             \
+       $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: libs
+       $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \
+               $(call do_install_mkdir,$(libdir_SQ)); \
+               cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+       $(call QUIET_INSTALL, headers) \
+               $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \
+
+install_pkgconfig: $(LIBTHERMAL_PC)
+       $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \
+               $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644)
+
+install_doc:
+       $(Q)$(MAKE) -C Documentation install-man install-html install-examples
+
+install: install_lib install_headers install_pkgconfig
+
+FORCE:
+
+.PHONY: all install clean FORCE
diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c
new file mode 100644 (file)
index 0000000..73d4d4e
--- /dev/null
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = {
+       /* Thermal zone */
+       [THERMAL_GENL_ATTR_TZ]                  = { .type = NLA_NESTED },
+       [THERMAL_GENL_ATTR_TZ_ID]               = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_TEMP]             = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_TRIP]             = { .type = NLA_NESTED },
+       [THERMAL_GENL_ATTR_TZ_TRIP_ID]          = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_TRIP_TEMP]        = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_TRIP_TYPE]        = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_TRIP_HYST]        = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_MODE]             = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT]      = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_TZ_NAME]             = { .type = NLA_STRING },
+
+       /* Governor(s) */
+       [THERMAL_GENL_ATTR_TZ_GOV]              = { .type = NLA_NESTED },
+       [THERMAL_GENL_ATTR_TZ_GOV_NAME]         = { .type = NLA_STRING },
+
+       /* Cooling devices */
+       [THERMAL_GENL_ATTR_CDEV]                = { .type = NLA_NESTED },
+       [THERMAL_GENL_ATTR_CDEV_ID]             = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_CDEV_CUR_STATE]      = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_CDEV_MAX_STATE]      = { .type = NLA_U32 },
+       [THERMAL_GENL_ATTR_CDEV_NAME]           = { .type = NLA_STRING },
+};
+
+static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz)
+{
+       struct nlattr *attr;
+       struct thermal_zone *__tz = NULL;
+       size_t size = 0;
+       int rem;
+
+       nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) {
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) {
+
+                       size++;
+
+                       __tz = realloc(__tz, sizeof(*__tz) * (size + 2));
+                       if (!__tz)
+                               return THERMAL_ERROR;
+
+                       __tz[size - 1].id = nla_get_u32(attr);
+               }
+
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME)
+                       nla_strlcpy(__tz[size - 1].name, attr,
+                                   THERMAL_NAME_LENGTH);
+       }
+
+       if (__tz)
+               __tz[size].id = -1;
+
+       *tz = __tz;
+
+       return THERMAL_SUCCESS;
+}
+
+static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev)
+{
+       struct nlattr *attr;
+       struct thermal_cdev *__cdev = NULL;
+       size_t size = 0;
+       int rem;
+
+       nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) {
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) {
+
+                       size++;
+
+                       __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2));
+                       if (!__cdev)
+                               return THERMAL_ERROR;
+
+                       __cdev[size - 1].id = nla_get_u32(attr);
+               }
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) {
+                       nla_strlcpy(__cdev[size - 1].name, attr,
+                                   THERMAL_NAME_LENGTH);
+               }
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE)
+                       __cdev[size - 1].cur_state = nla_get_u32(attr);
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE)
+                       __cdev[size - 1].max_state = nla_get_u32(attr);
+       }
+
+       if (__cdev)
+               __cdev[size].id = -1;
+
+       *cdev = __cdev;
+
+       return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz)
+{
+       struct nlattr *attr;
+       struct thermal_trip *__tt = NULL;
+       size_t size = 0;
+       int rem;
+
+       nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) {
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) {
+
+                       size++;
+
+                       __tt = realloc(__tt, sizeof(*__tt) * (size + 2));
+                       if (!__tt)
+                               return THERMAL_ERROR;
+
+                       __tt[size - 1].id = nla_get_u32(attr);
+               }
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE)
+                       __tt[size - 1].type = nla_get_u32(attr);
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP)
+                       __tt[size - 1].temp = nla_get_u32(attr);
+
+               if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST)
+                       __tt[size - 1].hyst = nla_get_u32(attr);
+       }
+
+       if (__tt)
+               __tt[size].id = -1;
+
+       tz->trip = __tt;
+
+       return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz)
+{
+       int id = -1;
+
+       if (info->attrs[THERMAL_GENL_ATTR_TZ_ID])
+               id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]);
+
+       if (tz->id != id)
+               return THERMAL_ERROR;
+
+       if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP])
+               tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]);
+
+       return THERMAL_SUCCESS;
+}
+
+static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz)
+{
+       int id = -1;
+
+       if (info->attrs[THERMAL_GENL_ATTR_TZ_ID])
+               id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]);
+
+       if (tz->id != id)
+               return THERMAL_ERROR;
+
+       if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) {
+               nla_strlcpy(tz->governor,
+                           info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME],
+                           THERMAL_NAME_LENGTH);
+       }
+
+       return THERMAL_SUCCESS;
+}
+
+static int handle_netlink(struct nl_cache_ops *unused,
+                         struct genl_cmd *cmd,
+                         struct genl_info *info, void *arg)
+{
+       int ret;
+
+       switch (cmd->c_id) {
+
+       case THERMAL_GENL_CMD_TZ_GET_ID:
+               ret = parse_tz_get(info, arg);
+               break;
+
+       case THERMAL_GENL_CMD_CDEV_GET:
+               ret = parse_cdev_get(info, arg);
+               break;
+
+       case THERMAL_GENL_CMD_TZ_GET_TEMP:
+               ret = parse_tz_get_temp(info, arg);
+               break;
+
+       case THERMAL_GENL_CMD_TZ_GET_TRIP:
+               ret = parse_tz_get_trip(info, arg);
+               break;
+
+       case THERMAL_GENL_CMD_TZ_GET_GOV:
+               ret = parse_tz_get_gov(info, arg);
+               break;
+
+       default:
+               return THERMAL_ERROR;
+       }
+
+       return ret;
+}
+
+static struct genl_cmd thermal_cmds[] = {
+       {
+               .c_id           = THERMAL_GENL_CMD_TZ_GET_ID,
+               .c_name         = (char *)"List thermal zones",
+               .c_msg_parser   = handle_netlink,
+               .c_maxattr      = THERMAL_GENL_ATTR_MAX,
+               .c_attr_policy  = thermal_genl_policy,
+       },
+       {
+               .c_id           = THERMAL_GENL_CMD_TZ_GET_GOV,
+               .c_name         = (char *)"Get governor",
+               .c_msg_parser   = handle_netlink,
+               .c_maxattr      = THERMAL_GENL_ATTR_MAX,
+               .c_attr_policy  = thermal_genl_policy,
+       },
+       {
+               .c_id           = THERMAL_GENL_CMD_TZ_GET_TEMP,
+               .c_name         = (char *)"Get thermal zone temperature",
+               .c_msg_parser   = handle_netlink,
+               .c_maxattr      = THERMAL_GENL_ATTR_MAX,
+               .c_attr_policy  = thermal_genl_policy,
+       },
+       {
+               .c_id           = THERMAL_GENL_CMD_TZ_GET_TRIP,
+               .c_name         = (char *)"Get thermal zone trip points",
+               .c_msg_parser   = handle_netlink,
+               .c_maxattr      = THERMAL_GENL_ATTR_MAX,
+               .c_attr_policy  = thermal_genl_policy,
+       },
+       {
+               .c_id           = THERMAL_GENL_CMD_CDEV_GET,
+               .c_name         = (char *)"Get cooling devices",
+               .c_msg_parser   = handle_netlink,
+               .c_maxattr      = THERMAL_GENL_ATTR_MAX,
+               .c_attr_policy  = thermal_genl_policy,
+       },
+};
+
+static struct genl_ops thermal_cmd_ops = {
+       .o_name         = (char *)"thermal",
+       .o_cmds         = thermal_cmds,
+       .o_ncmds        = ARRAY_SIZE(thermal_cmds),
+};
+
+static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd,
+                                        int flags, void *arg)
+{
+       struct nl_msg *msg;
+       void *hdr;
+
+       msg = nlmsg_alloc();
+       if (!msg)
+               return THERMAL_ERROR;
+
+       hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id,
+                         0, flags, cmd, THERMAL_GENL_VERSION);
+       if (!hdr)
+               return THERMAL_ERROR;
+
+       if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id))
+               return THERMAL_ERROR;
+
+       if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg))
+               return THERMAL_ERROR;
+
+       nlmsg_free(msg);
+
+       return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz)
+{
+       return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID,
+                                NLM_F_DUMP | NLM_F_ACK, tz);
+}
+
+thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc)
+{
+       return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET,
+                                NLM_F_DUMP | NLM_F_ACK, tc);
+}
+
+thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz)
+{
+       return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP,
+                                0, tz);
+}
+
+thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz)
+{
+       return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz);
+}
+
+thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz)
+{
+       return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz);
+}
+
+thermal_error_t thermal_cmd_exit(struct thermal_handler *th)
+{
+       if (genl_unregister_family(&thermal_cmd_ops))
+               return THERMAL_ERROR;
+
+       nl_thermal_disconnect(th->sk_cmd, th->cb_cmd);
+
+       return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_cmd_init(struct thermal_handler *th)
+{
+       int ret;
+       int family;
+
+       if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd))
+               return THERMAL_ERROR;
+
+       ret = genl_register_family(&thermal_cmd_ops);
+       if (ret)
+               return THERMAL_ERROR;
+
+       ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops);
+       if (ret)
+               return THERMAL_ERROR;
+
+       family = genl_ctrl_resolve(th->sk_cmd, "nlctrl");
+       if (family != GENL_ID_CTRL)
+               return THERMAL_ERROR;
+
+       return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c
new file mode 100644 (file)
index 0000000..a7a55d1
--- /dev/null
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <linux/netlink.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+/*
+ * Optimization: fill this array to tell which event we do want to pay
+ * attention to. That happens at init time with the ops
+ * structure. Each ops will enable the event and the general handler
+ * will be able to discard the event if there is not ops associated
+ * with it.
+ */
+static int enabled_ops[__THERMAL_GENL_EVENT_MAX];
+
+static int handle_thermal_event(struct nl_msg *n, void *arg)
+{
+       struct nlmsghdr *nlh = nlmsg_hdr(n);
+       struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+       struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+       struct thermal_handler_param *thp = arg;
+       struct thermal_events_ops *ops = &thp->th->ops->events;
+
+       genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+       arg = thp->arg;
+
+       /*
+        * This is an event we don't care of, bail out.
+        */
+       if (!enabled_ops[genlhdr->cmd])
+               return THERMAL_SUCCESS;
+
+       switch (genlhdr->cmd) {
+
+       case THERMAL_GENL_EVENT_TZ_CREATE:
+               return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]),
+                                     nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_DELETE:
+               return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_ENABLE:
+               return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_DISABLE:
+               return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE:
+               return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+                                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]),
+                                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]),
+                                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_TRIP_ADD:
+               return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_TRIP_DELETE:
+               return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_TRIP_UP:
+               return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                                     nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+                                     nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_TRIP_DOWN:
+               return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+
+       case THERMAL_GENL_EVENT_CDEV_ADD:
+               return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]),
+                                    nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg);
+
+       case THERMAL_GENL_EVENT_CDEV_DELETE:
+               return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg);
+
+       case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE:
+               return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]),
+                                       nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg);
+
+       case THERMAL_GENL_EVENT_TZ_GOV_CHANGE:
+               return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                                      nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg);
+       default:
+               return -1;
+       }
+}
+
+static void thermal_events_ops_init(struct thermal_events_ops *ops)
+{
+       enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE]       = !!ops->tz_create;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE]       = !!ops->tz_delete;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE]      = !!ops->tz_disable;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE]       = !!ops->tz_enable;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP]      = !!ops->trip_high;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN]    = !!ops->trip_low;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE]  = !!ops->trip_change;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD]     = !!ops->trip_add;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE]  = !!ops->trip_delete;
+       enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD]        = !!ops->cdev_add;
+       enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE]     = !!ops->cdev_delete;
+       enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update;
+       enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE]   = !!ops->gov_change;
+}
+
+thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg)
+{
+       struct thermal_handler_param thp = { .th = th, .arg = arg };
+
+       if (!th)
+               return THERMAL_ERROR;
+
+       if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM,
+                     handle_thermal_event, &thp))
+               return THERMAL_ERROR;
+
+       return nl_recvmsgs(th->sk_event, th->cb_event);
+}
+
+int thermal_events_fd(struct thermal_handler *th)
+{
+       if (!th)
+               return -1;
+
+       return nl_socket_get_fd(th->sk_event);
+}
+
+thermal_error_t thermal_events_exit(struct thermal_handler *th)
+{
+       if (nl_unsubscribe_thermal(th->sk_event, th->cb_event,
+                                  THERMAL_GENL_EVENT_GROUP_NAME))
+               return THERMAL_ERROR;
+
+       nl_thermal_disconnect(th->sk_event, th->cb_event);
+
+       return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_events_init(struct thermal_handler *th)
+{
+       thermal_events_ops_init(&th->ops->events);
+
+       if (nl_thermal_connect(&th->sk_event, &th->cb_event))
+               return THERMAL_ERROR;
+
+       if (nl_subscribe_thermal(th->sk_event, th->cb_event,
+                                THERMAL_GENL_EVENT_GROUP_NAME))
+               return THERMAL_ERROR;
+
+       return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h
new file mode 100644 (file)
index 0000000..1abc560
--- /dev/null
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __LIBTHERMAL_H
+#define __LIBTHERMAL_H
+
+#include <linux/thermal.h>
+
+#ifndef LIBTHERMAL_API
+#define LIBTHERMAL_API __attribute__((visibility("default")))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct thermal_sampling_ops {
+       int (*tz_temp)(int tz_id, int temp, void *arg);
+};
+
+struct thermal_events_ops {
+       int (*tz_create)(const char *name, int tz_id, void *arg);
+       int (*tz_delete)(int tz_id, void *arg);
+       int (*tz_enable)(int tz_id, void *arg);
+       int (*tz_disable)(int tz_id, void *arg);
+       int (*trip_high)(int tz_id, int trip_id, int temp, void *arg);
+       int (*trip_low)(int tz_id, int trip_id, int temp, void *arg);
+       int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg);
+       int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg);
+       int (*trip_delete)(int tz_id, int trip_id, void *arg);
+       int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg);
+       int (*cdev_delete)(int cdev_id, void *arg);
+       int (*cdev_update)(int cdev_id, int cur_state, void *arg);
+       int (*gov_change)(int tz_id, const char *gov_name, void *arg);
+};
+
+struct thermal_ops {
+       struct thermal_sampling_ops sampling;
+       struct thermal_events_ops events;
+};
+
+struct thermal_trip {
+       int id;
+       int type;
+       int temp;
+       int hyst;
+};
+
+struct thermal_zone {
+       int id;
+       int temp;
+       char name[THERMAL_NAME_LENGTH];
+       char governor[THERMAL_NAME_LENGTH];
+       struct thermal_trip *trip;
+};
+
+struct thermal_cdev {
+       int id;
+       char name[THERMAL_NAME_LENGTH];
+       int max_state;
+       int min_state;
+       int cur_state;
+};
+
+typedef enum {
+       THERMAL_ERROR = -1,
+       THERMAL_SUCCESS = 0,
+} thermal_error_t;
+
+struct thermal_handler;
+
+typedef int (*cb_tz_t)(struct thermal_zone *, void *);
+
+typedef int (*cb_tt_t)(struct thermal_trip *, void *);
+
+typedef int (*cb_tc_t)(struct thermal_cdev *, void *);
+
+LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg);
+
+LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg);
+
+LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz,
+                                                             const char *name);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id);
+
+LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th);
+
+LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops);
+
+LIBTHERMAL_API void thermal_exit(struct thermal_handler *th);
+
+/*
+ * Netlink thermal events
+ */
+LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg);
+
+LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th);
+
+/*
+ * Netlink thermal commands
+ */
+LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th,
+                                                 struct thermal_zone **tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th,
+                                                   struct thermal_cdev **tc);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th,
+                                                   struct thermal_zone *tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th,
+                                                       struct thermal_zone *tz);
+
+LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th,
+                                                   struct thermal_zone *tz);
+
+/*
+ * Netlink thermal samples
+ */
+LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th);
+
+LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg);
+
+LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th);
+
+#endif /* __LIBTHERMAL_H */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map
new file mode 100644 (file)
index 0000000..d5e7773
--- /dev/null
@@ -0,0 +1,25 @@
+LIBTHERMAL_0.0.1 {
+       global:
+               thermal_init;
+               for_each_thermal_zone;
+               for_each_thermal_trip;
+               for_each_thermal_cdev;
+               thermal_zone_find_by_name;
+               thermal_zone_find_by_id;
+               thermal_zone_discover;
+               thermal_init;
+               thermal_events_init;
+               thermal_events_handle;
+               thermal_events_fd;
+               thermal_cmd_init;
+               thermal_cmd_get_tz;
+               thermal_cmd_get_cdev;
+               thermal_cmd_get_trip;
+               thermal_cmd_get_governor;
+               thermal_cmd_get_temp;
+               thermal_sampling_init;
+               thermal_sampling_handle;
+               thermal_sampling_fd;
+local:
+               *;
+};
diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template
new file mode 100644 (file)
index 0000000..6f37697
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libthermal
+Description: thermal library
+Requires: libnl-3.0 libnl-genl-3.0
+Version: @VERSION@
+Libs: -L${libdir} -lnl-genl-3 -lnl-3
+Cflags: -I${includedir} -I{include}/libnl3
diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c
new file mode 100644 (file)
index 0000000..ee818f4
--- /dev/null
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+static int handle_thermal_sample(struct nl_msg *n, void *arg)
+{
+       struct nlmsghdr *nlh = nlmsg_hdr(n);
+       struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+       struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+       struct thermal_handler_param *thp = arg;
+       struct thermal_handler *th = thp->th;
+
+       genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+       switch (genlhdr->cmd) {
+
+       case THERMAL_GENL_SAMPLING_TEMP:
+               return th->ops->sampling.tz_temp(
+                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]),
+                       nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg);
+       default:
+               return THERMAL_ERROR;
+       }
+}
+
+thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg)
+{
+       struct thermal_handler_param thp = { .th = th, .arg = arg };
+
+       if (!th)
+               return THERMAL_ERROR;
+
+       if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM,
+                     handle_thermal_sample, &thp))
+               return THERMAL_ERROR;
+
+       return nl_recvmsgs(th->sk_sampling, th->cb_sampling);
+}
+
+int thermal_sampling_fd(struct thermal_handler *th)
+{
+       if (!th)
+               return -1;
+
+       return nl_socket_get_fd(th->sk_sampling);
+}
+
+thermal_error_t thermal_sampling_exit(struct thermal_handler *th)
+{
+       if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling,
+                                  THERMAL_GENL_EVENT_GROUP_NAME))
+               return THERMAL_ERROR;
+
+       nl_thermal_disconnect(th->sk_sampling, th->cb_sampling);
+
+       return THERMAL_SUCCESS;
+}
+
+thermal_error_t thermal_sampling_init(struct thermal_handler *th)
+{
+       if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling))
+               return THERMAL_ERROR;
+
+       if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling,
+                                THERMAL_GENL_SAMPLING_GROUP_NAME))
+               return THERMAL_ERROR;
+
+       return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c
new file mode 100644 (file)
index 0000000..72a76dc
--- /dev/null
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdio.h>
+#include <thermal.h>
+
+#include "thermal_nl.h"
+
+int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg)
+{
+       int i, ret = 0;
+
+       if (!cdev)
+               return 0;
+
+       for (i = 0; cdev[i].id != -1; i++)
+               ret |= cb(&cdev[i], arg);
+
+       return ret;
+}
+
+int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg)
+{
+       int i, ret = 0;
+
+       if (!tt)
+               return 0;
+
+       for (i = 0; tt[i].id != -1; i++)
+               ret |= cb(&tt[i], arg);
+
+       return ret;
+}
+
+int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg)
+{
+       int i, ret = 0;
+
+       if (!tz)
+               return 0;
+
+       for (i = 0; tz[i].id != -1; i++)
+               ret |= cb(&tz[i], arg);
+
+       return ret;
+}
+
+struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz,
+                                              const char *name)
+{
+       int i;
+
+       if (!tz || !name)
+               return NULL;
+
+       for (i = 0; tz[i].id != -1; i++) {
+               if (!strcmp(tz[i].name, name))
+                       return &tz[i];
+       }
+
+       return NULL;
+}
+
+struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id)
+{
+       int i;
+
+       if (!tz || id < 0)
+               return NULL;
+
+       for (i = 0; tz[i].id != -1; i++) {
+               if (tz[i].id == id)
+                       return &tz[i];
+       }
+
+       return NULL;
+}
+
+static int __thermal_zone_discover(struct thermal_zone *tz, void *th)
+{
+       if (thermal_cmd_get_trip(th, tz) < 0)
+               return -1;
+
+       if (thermal_cmd_get_governor(th, tz))
+               return -1;
+
+       return 0;
+}
+
+struct thermal_zone *thermal_zone_discover(struct thermal_handler *th)
+{
+       struct thermal_zone *tz;
+
+       if (thermal_cmd_get_tz(th, &tz) < 0)
+               return NULL;
+
+       if (for_each_thermal_zone(tz, __thermal_zone_discover, th))
+               return NULL;
+
+       return tz;
+}
+
+void thermal_exit(struct thermal_handler *th)
+{
+       thermal_cmd_exit(th);
+       thermal_events_exit(th);
+       thermal_sampling_exit(th);
+
+       free(th);
+}
+
+struct thermal_handler *thermal_init(struct thermal_ops *ops)
+{
+       struct thermal_handler *th;
+
+       th = malloc(sizeof(*th));
+       if (!th)
+               return NULL;
+       th->ops = ops;
+
+       if (thermal_events_init(th))
+               goto out_free;
+
+       if (thermal_sampling_init(th))
+               goto out_free;
+
+       if (thermal_cmd_init(th))
+               goto out_free;
+
+       return th;
+
+out_free:
+       free(th);
+
+       return NULL;
+}
diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c
new file mode 100644 (file)
index 0000000..b05cf95
--- /dev/null
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <thermal.h>
+#include "thermal_nl.h"
+
+struct handler_args {
+       const char *group;
+       int id;
+};
+
+static __thread int err;
+static __thread int done;
+
+static int nl_seq_check_handler(struct nl_msg *msg, void *arg)
+{
+       return NL_OK;
+}
+
+static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err,
+                           void *arg)
+{
+       int *ret = arg;
+
+       if (ret)
+               *ret = nl_err->error;
+
+       return NL_STOP;
+}
+
+static int nl_finish_handler(struct nl_msg *msg, void *arg)
+{
+       int *ret = arg;
+
+       if (ret)
+               *ret = 1;
+
+       return NL_OK;
+}
+
+static int nl_ack_handler(struct nl_msg *msg, void *arg)
+{
+       int *ret = arg;
+
+       if (ret)
+               *ret = 1;
+
+       return NL_OK;
+}
+
+int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg,
+               int (*rx_handler)(struct nl_msg *, void *), void *data)
+{
+       if (!rx_handler)
+               return THERMAL_ERROR;
+
+       err = nl_send_auto_complete(sock, msg);
+       if (err < 0)
+               return err;
+
+       nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data);
+
+       err = done = 0;
+
+       while (err == 0 && done == 0)
+               nl_recvmsgs(sock, cb);
+
+       return err;
+}
+
+static int nl_family_handler(struct nl_msg *msg, void *arg)
+{
+       struct handler_args *grp = arg;
+       struct nlattr *tb[CTRL_ATTR_MAX + 1];
+       struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+       struct nlattr *mcgrp;
+       int rem_mcgrp;
+
+       nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+                 genlmsg_attrlen(gnlh, 0), NULL);
+
+       if (!tb[CTRL_ATTR_MCAST_GROUPS])
+               return THERMAL_ERROR;
+
+       nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+
+               struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+               nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+                         nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+               if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+                   !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+                       continue;
+
+               if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+                           grp->group,
+                           nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+                       continue;
+
+               grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+
+               break;
+       }
+
+       return THERMAL_SUCCESS;
+}
+
+static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb,
+                              const char *family, const char *group)
+{
+       struct nl_msg *msg;
+       int ret = 0, ctrlid;
+       struct handler_args grp = {
+               .group = group,
+               .id = -ENOENT,
+       };
+
+       msg = nlmsg_alloc();
+       if (!msg)
+               return THERMAL_ERROR;
+
+       ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+       genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+       nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+       ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp);
+       if (ret)
+               goto nla_put_failure;
+
+       ret = grp.id;
+
+nla_put_failure:
+       nlmsg_free(msg);
+       return ret;
+}
+
+int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb)
+{
+       struct nl_cb *cb;
+       struct nl_sock *sock;
+
+       cb = nl_cb_alloc(NL_CB_DEFAULT);
+       if (!cb)
+               return THERMAL_ERROR;
+
+       sock = nl_socket_alloc();
+       if (!sock)
+               goto out_cb_free;
+
+       if (genl_connect(sock))
+               goto out_socket_free;
+
+       if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) ||
+           nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) ||
+           nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) ||
+           nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done))
+               return THERMAL_ERROR;
+
+       *nl_sock = sock;
+       *nl_cb = cb;
+
+       return THERMAL_SUCCESS;
+
+out_socket_free:
+       nl_socket_free(sock);
+out_cb_free:
+       nl_cb_put(cb);
+       return THERMAL_ERROR;
+}
+
+void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb)
+{
+       nl_close(nl_sock);
+       nl_socket_free(nl_sock);
+       nl_cb_put(nl_cb);
+}
+
+int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+                          const char *group)
+{
+       int mcid;
+
+       mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME,
+                                  group);
+       if (mcid < 0)
+               return THERMAL_ERROR;
+
+       if (nl_socket_drop_membership(nl_sock, mcid))
+               return THERMAL_ERROR;
+
+       return THERMAL_SUCCESS;
+}
+
+int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+                        const char *group)
+{
+       int mcid;
+
+       mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME,
+                                  group);
+       if (mcid < 0)
+               return THERMAL_ERROR;
+
+       if (nl_socket_add_membership(nl_sock, mcid))
+               return THERMAL_ERROR;
+
+       return THERMAL_SUCCESS;
+}
diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h
new file mode 100644 (file)
index 0000000..ddf6356
--- /dev/null
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_H
+#define __THERMAL_H
+
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/mngt.h>
+#include <netlink/genl/ctrl.h>
+
+struct thermal_handler {
+       int done;
+       int error;
+       struct thermal_ops *ops;
+       struct nl_msg *msg;
+       struct nl_sock *sk_event;
+       struct nl_sock *sk_sampling;
+       struct nl_sock *sk_cmd;
+       struct nl_cb *cb_cmd;
+       struct nl_cb *cb_event;
+       struct nl_cb *cb_sampling;
+};
+
+struct thermal_handler_param {
+       struct thermal_handler *th;
+       void *arg;
+};
+
+/*
+ * Low level netlink
+ */
+extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+                               const char *group);
+
+extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb,
+                                 const char *group);
+
+extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb);
+
+extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb);
+
+extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg,
+                      int (*rx_handler)(struct nl_msg *, void *),
+                      void *data);
+
+#endif /* __THERMAL_H */
index 9edd402704c4f2870d448a1d3f4c3b9f15d7962d..dab38904206a0ba0fea7ccd10469443fec1f396c 100644 (file)
@@ -54,7 +54,8 @@ klitmus7 Compatibility Table
             -- 4.14  7.48 --
        4.15 -- 4.19  7.49 --
        4.20 -- 5.5   7.54 --
-       5.6  --       7.56 --
+       5.6  -- 5.16  7.56 --
+       5.17 --       7.56.1 --
        ============  ==========
 
 
index b7222d5cc7bc9019867a2982e855d9315cfaa1c8..33f2ee5a46d3bc981be557feb053cae2fcae954a 100644 (file)
@@ -2,17 +2,15 @@ objtool-y += arch/$(SRCARCH)/
 
 objtool-y += weak.o
 
-objtool-$(SUBCMD_CHECK) += check.o
-objtool-$(SUBCMD_CHECK) += special.o
-objtool-$(SUBCMD_ORC) += check.o
-objtool-$(SUBCMD_ORC) += orc_gen.o
-objtool-$(SUBCMD_ORC) += orc_dump.o
-
+objtool-y += check.o
+objtool-y += special.o
 objtool-y += builtin-check.o
-objtool-y += builtin-orc.o
 objtool-y += elf.o
 objtool-y += objtool.o
 
+objtool-$(BUILD_ORC) += orc_gen.o
+objtool-$(BUILD_ORC) += orc_dump.o
+
 objtool-y += libstring.o
 objtool-y += libctype.o
 objtool-y += str_error_r.o
similarity index 80%
rename from tools/objtool/Documentation/stack-validation.txt
rename to tools/objtool/Documentation/objtool.txt
index 30f38fdc0d56cb769b589f4bd4de452758344964..8a671902a18757b63f493b5611f5ef49addffb3a 100644 (file)
-Compile-time stack metadata validation
-======================================
+Objtool
+=======
 
+The kernel CONFIG_OBJTOOL option enables a host tool named 'objtool'
+which runs at compile time.  It can do various validations and
+transformations on .o files.
 
-Overview
+Objtool has become an integral part of the x86-64 kernel toolchain.  The
+kernel depends on it for a variety of security and performance features
+(and other types of features as well).
+
+
+Features
 --------
 
-The kernel CONFIG_STACK_VALIDATION option enables a host tool named
-objtool which runs at compile time.  It has a "check" subcommand which
-analyzes every .o file and ensures the validity of its stack metadata.
-It enforces a set of rules on asm code and C inline assembly code so
-that stack traces can be reliable.
+Objtool has the following features:
+
+- Stack unwinding metadata validation -- useful for helping to ensure
+  stack traces are reliable for live patching
+
+- ORC unwinder metadata generation -- a faster and more precise
+  alternative to frame pointer based unwinding
+
+- Retpoline validation -- ensures that all indirect calls go through
+  retpoline thunks, for Spectre v2 mitigations
+
+- Retpoline call site annotation -- annotates all retpoline thunk call
+  sites, enabling the kernel to patch them inline, to prevent "thunk
+  funneling" for both security and performance reasons
+
+- Non-instrumentation validation -- validates non-instrumentable
+  ("noinstr") code rules, preventing instrumentation in low-level C
+  entry code
+
+- Static call annotation -- annotates static call sites, enabling the
+  kernel to implement inline static calls, a faster alternative to some
+  indirect branches
+
+- Uaccess validation -- validates uaccess rules for a proper
+  implementation of Supervisor Mode Access Protection (SMAP)
+
+- Straight Line Speculation validation -- validates certain SLS
+  mitigations
+
+- Indirect Branch Tracking validation -- validates Intel CET IBT rules
+  to ensure that all functions referenced by function pointers have
+  corresponding ENDBR instructions
+
+- Indirect Branch Tracking annotation -- annotates unused ENDBR
+  instruction sites, enabling the kernel to "seal" them (replace them
+  with NOPs) to further harden IBT
+
+- Function entry annotation -- annotates function entries, enabling
+  kernel function tracing
+
+- Other toolchain hacks which will go unmentioned at this time...
+
+Each feature can be enabled individually or in combination using the
+objtool cmdline.
+
+
+Objects
+-------
+
+Typically, objtool runs on every translation unit (TU, aka ".o file") in
+the kernel.  If a TU is part of a kernel module, the '--module' option
+is added.
+
+However:
+
+- If noinstr validation is enabled, it also runs on vmlinux.o, with all
+  options removed and '--noinstr' added.
+
+- If IBT or LTO is enabled, it doesn't run on TUs at all.  Instead it
+  runs on vmlinux.o and linked modules, with all options.
+
+In summary:
+
+  A) Legacy mode:
+             TU: objtool [--module] <options>
+        vmlinux: N/A
+         module: N/A
+
+  B) CONFIG_NOINSTR_VALIDATION=y && !(CONFIG_X86_KERNEL_IBT=y || CONFIG_LTO=y):
+             TU: objtool [--module] <options>  // no --noinstr
+        vmlinux: objtool --noinstr             // other options removed
+         module: N/A
+
+  C) CONFIG_X86_KERNEL_IBT=y || CONFIG_LTO=y:
+             TU: N/A
+        vmlinux: objtool --noinstr <options>
+         module: objtool --module --noinstr <options>
+
+
+Stack validation
+----------------
+
+Objtool's stack validation feature analyzes every .o file and ensures
+the validity of its stack metadata.  It enforces a set of rules on asm
+code and C inline assembly code so that stack traces can be reliable.
 
 For each function, it recursively follows all possible code paths and
 validates the correct frame pointer state at each instruction.
@@ -20,14 +108,6 @@ alternative execution paths to a given instruction (or set of
 instructions).  Similarly, it knows how to follow switch statements, for
 which gcc sometimes uses jump tables.
 
-(Objtool also has an 'orc generate' subcommand which generates debuginfo
-for the ORC unwinder.  See Documentation/x86/orc-unwinder.rst in the
-kernel tree for more details.)
-
-
-Why do we need stack metadata validation?
------------------------------------------
-
 Here are some of the benefits of validating stack metadata:
 
 a) More reliable stack traces for frame pointer enabled kernels
@@ -113,9 +193,6 @@ c) Higher live patching compatibility rate
    For more details, see the livepatch documentation in the Linux kernel
    source tree at Documentation/livepatch/livepatch.rst.
 
-Rules
------
-
 To achieve the validation, objtool enforces the following rules:
 
 1. Each callable function must be annotated as such with the ELF
@@ -177,7 +254,8 @@ Another possible cause for errors in C code is if the Makefile removes
 -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
 
 Here are some examples of common warnings reported by objtool, what
-they mean, and suggestions for how to fix them.
+they mean, and suggestions for how to fix them.  When in doubt, ping
+the objtool maintainers.
 
 
 1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
@@ -358,3 +436,7 @@ ignore it:
     OBJECT_FILES_NON_STANDARD := y
 
   to the Makefile.
+
+NOTE: OBJECT_FILES_NON_STANDARD doesn't work for link time validation of
+vmlinux.o or a linked module.  So it should only be used for files which
+aren't linked into vmlinux or a module.
index 0dbd397f319d1a0793f08dfa3bd6e29b44fdd682..e66d717c245d0611f3387f161fd5892c1226f3b2 100644 (file)
@@ -39,15 +39,13 @@ CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 
 AWK = awk
 
-SUBCMD_CHECK := n
-SUBCMD_ORC := n
+BUILD_ORC := n
 
 ifeq ($(SRCARCH),x86)
-       SUBCMD_CHECK := y
-       SUBCMD_ORC := y
+       BUILD_ORC := y
 endif
 
-export SUBCMD_CHECK SUBCMD_ORC
+export BUILD_ORC
 export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
 
@@ -65,7 +63,7 @@ $(LIBSUBCMD): fixdep FORCE
 clean:
        $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
        $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-       $(Q)$(RM) $(OUTPUT)arch/x86/inat-tables.c $(OUTPUT)fixdep
+       $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep $(LIBSUBCMD)
 
 FORCE:
 
index 943cb41cddf7cd8e75f199c5b58335b06d05afc8..8b990a52aadac1e26ecb33ff97e114ddd789a94f 100644 (file)
@@ -581,7 +581,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                break;
 
        case 0xc7: /* mov imm, r/m */
-               if (!noinstr)
+               if (!opts.noinstr)
                        break;
 
                if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
index e707d9bcd1616480be236c6ba2e3ab8e70f8d13b..7c97b739127994febfded845e40d8b5dd1180b23 100644 (file)
@@ -20,7 +20,7 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
                 * find paths that see the STAC but take the NOP instead of
                 * CLAC and the other way around.
                 */
-               if (uaccess)
+               if (opts.uaccess)
                        alt->skip_orig = true;
                else
                        alt->skip_alt = true;
index fc6975ab8b06e8b15c649bf4bcdeccff60b76cf2..f4c3a50917379abaa6ccb17d3f578a5cbe1d83f9 100644 (file)
@@ -3,28 +3,21 @@
  * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
  */
 
-/*
- * objtool check:
- *
- * This command analyzes every .o file and ensures the validity of its stack
- * trace metadata.  It enforces a set of rules on asm code and C inline
- * assembly code so that stack traces can be reliable.
- *
- * For more information, see tools/objtool/Documentation/stack-validation.txt.
- */
-
 #include <subcmd/parse-options.h>
 #include <string.h>
 #include <stdlib.h>
 #include <objtool/builtin.h>
 #include <objtool/objtool.h>
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
-     lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
-     ibt;
+#define ERROR(format, ...)                             \
+       fprintf(stderr,                                 \
+               "error: objtool: " format "\n",         \
+               ##__VA_ARGS__)
+
+struct opts opts;
 
 static const char * const check_usage[] = {
-       "objtool check [<options>] file.o",
+       "objtool <actions> [<options>] file.o",
        NULL,
 };
 
@@ -33,22 +26,64 @@ static const char * const env_usage[] = {
        NULL,
 };
 
+static int parse_dump(const struct option *opt, const char *str, int unset)
+{
+       if (!str || !strcmp(str, "orc")) {
+               opts.dump_orc = true;
+               return 0;
+       }
+
+       return -1;
+}
+
+static int parse_hacks(const struct option *opt, const char *str, int unset)
+{
+       bool found = false;
+
+       /*
+        * Use strstr() as a lazy method of checking for comma-separated
+        * options.
+        *
+        * No string provided == enable all options.
+        */
+
+       if (!str || strstr(str, "jump_label")) {
+               opts.hack_jump_label = true;
+               found = true;
+       }
+
+       if (!str || strstr(str, "noinstr")) {
+               opts.hack_noinstr = true;
+               found = true;
+       }
+
+       return found ? 0 : -1;
+}
+
 const struct option check_options[] = {
-       OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
-       OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
-       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
-       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
-       OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
-       OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
-       OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
-       OPT_BOOLEAN(0, "lto", &lto, "whole-archive like runs"),
-       OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
-       OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
-       OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
-       OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
-       OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
-       OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"),
-       OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"),
+       OPT_GROUP("Actions:"),
+       OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks),
+       OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
+       OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
+       OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
+       OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
+       OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+       OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
+       OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
+       OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
+       OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
+       OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
+
+       OPT_GROUP("Options:"),
+       OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
+       OPT_BOOLEAN(0, "backup", &opts.backup, "create .orig files before modification"),
+       OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
+       OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
+       OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+       OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
+       OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+
        OPT_END(),
 };
 
@@ -79,7 +114,59 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[])
        return argc;
 }
 
-int cmd_check(int argc, const char **argv)
+static bool opts_valid(void)
+{
+       if (opts.hack_jump_label        ||
+           opts.hack_noinstr           ||
+           opts.ibt                    ||
+           opts.mcount                 ||
+           opts.noinstr                ||
+           opts.orc                    ||
+           opts.retpoline              ||
+           opts.sls                    ||
+           opts.stackval               ||
+           opts.static_call            ||
+           opts.uaccess) {
+               if (opts.dump_orc) {
+                       ERROR("--dump can't be combined with other options");
+                       return false;
+               }
+
+               return true;
+       }
+
+       if (opts.dump_orc)
+               return true;
+
+       ERROR("At least one command required");
+       return false;
+}
+
+static bool link_opts_valid(struct objtool_file *file)
+{
+       if (opts.link)
+               return true;
+
+       if (has_multiple_files(file->elf)) {
+               ERROR("Linked object detected, forcing --link");
+               opts.link = true;
+               return true;
+       }
+
+       if (opts.noinstr) {
+               ERROR("--noinstr requires --link");
+               return false;
+       }
+
+       if (opts.ibt) {
+               ERROR("--ibt requires --link");
+               return false;
+       }
+
+       return true;
+}
+
+int objtool_run(int argc, const char **argv)
 {
        const char *objname;
        struct objtool_file *file;
@@ -88,10 +175,19 @@ int cmd_check(int argc, const char **argv)
        argc = cmd_parse_options(argc, argv, check_usage);
        objname = argv[0];
 
+       if (!opts_valid())
+               return 1;
+
+       if (opts.dump_orc)
+               return orc_dump(objname);
+
        file = objtool_open_read(objname);
        if (!file)
                return 1;
 
+       if (!link_opts_valid(file))
+               return 1;
+
        ret = check(file);
        if (ret)
                return ret;
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
deleted file mode 100644 (file)
index 17f8b93..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- */
-
-/*
- * objtool orc:
- *
- * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip
- * sections to it, which is used by the in-kernel ORC unwinder.
- *
- * This command is a superset of "objtool check".
- */
-
-#include <string.h>
-#include <objtool/builtin.h>
-#include <objtool/objtool.h>
-
-static const char *orc_usage[] = {
-       "objtool orc generate [<options>] file.o",
-       "objtool orc dump file.o",
-       NULL,
-};
-
-int cmd_orc(int argc, const char **argv)
-{
-       const char *objname;
-
-       argc--; argv++;
-       if (argc <= 0)
-               usage_with_options(orc_usage, check_options);
-
-       if (!strncmp(argv[0], "gen", 3)) {
-               struct objtool_file *file;
-               int ret;
-
-               argc = cmd_parse_options(argc, argv, orc_usage);
-               objname = argv[0];
-
-               file = objtool_open_read(objname);
-               if (!file)
-                       return 1;
-
-               ret = check(file);
-               if (ret)
-                       return ret;
-
-               if (list_empty(&file->insn_list))
-                       return 0;
-
-               ret = orc_create(file);
-               if (ret)
-                       return ret;
-
-               if (!file->elf->changed)
-                       return 0;
-
-               return elf_write(file->elf);
-       }
-
-       if (!strcmp(argv[0], "dump")) {
-               if (argc != 2)
-                       usage_with_options(orc_usage, check_options);
-
-               objname = argv[1];
-
-               return orc_dump(objname);
-       }
-
-       usage_with_options(orc_usage, check_options);
-
-       return 0;
-}
index ca5b746030089c35c9c55788c75315de71aefb08..190b2f6e360a30cebe20f4d6235437703c3d7a13 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <inttypes.h>
 #include <sys/mman.h>
 
 #include <arch/elf.h>
@@ -263,7 +264,8 @@ static void init_cfi_state(struct cfi_state *cfi)
        cfi->drap_offset = -1;
 }
 
-static void init_insn_state(struct insn_state *state, struct section *sec)
+static void init_insn_state(struct objtool_file *file, struct insn_state *state,
+                           struct section *sec)
 {
        memset(state, 0, sizeof(*state));
        init_cfi_state(&state->cfi);
@@ -273,7 +275,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec)
         * not correctly determine insn->call_dest->sec (external symbols do
         * not have a section).
         */
-       if (vmlinux && noinstr && sec)
+       if (opts.link && opts.noinstr && sec)
                state->noinstr = sec->noinstr;
 }
 
@@ -339,7 +341,7 @@ static void *cfi_hash_alloc(unsigned long size)
        if (cfi_hash == (void *)-1L) {
                WARN("mmap fail cfi_hash");
                cfi_hash = NULL;
-       }  else if (stats) {
+       }  else if (opts.stats) {
                printf("cfi_bits: %d\n", cfi_bits);
        }
 
@@ -434,7 +436,7 @@ static int decode_instructions(struct objtool_file *file)
                }
        }
 
-       if (stats)
+       if (opts.stats)
                printf("nr_insns: %lu\n", nr_insns);
 
        return 0;
@@ -497,7 +499,7 @@ static int init_pv_ops(struct objtool_file *file)
        struct symbol *sym;
        int idx, nr;
 
-       if (!noinstr)
+       if (!opts.noinstr)
                return 0;
 
        file->pv_ops = NULL;
@@ -560,12 +562,12 @@ static int add_dead_ends(struct objtool_file *file)
                else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
                        insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
-                               WARN("can't find unreachable insn at %s+0x%lx",
+                               WARN("can't find unreachable insn at %s+0x%" PRIx64,
                                     reloc->sym->sec->name, reloc->addend);
                                return -1;
                        }
                } else {
-                       WARN("can't find unreachable insn at %s+0x%lx",
+                       WARN("can't find unreachable insn at %s+0x%" PRIx64,
                             reloc->sym->sec->name, reloc->addend);
                        return -1;
                }
@@ -595,12 +597,12 @@ reachable:
                else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
                        insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
-                               WARN("can't find reachable insn at %s+0x%lx",
+                               WARN("can't find reachable insn at %s+0x%" PRIx64,
                                     reloc->sym->sec->name, reloc->addend);
                                return -1;
                        }
                } else {
-                       WARN("can't find reachable insn at %s+0x%lx",
+                       WARN("can't find reachable insn at %s+0x%" PRIx64,
                             reloc->sym->sec->name, reloc->addend);
                        return -1;
                }
@@ -668,7 +670,7 @@ static int create_static_call_sections(struct objtool_file *file)
 
                key_sym = find_symbol_by_name(file->elf, tmp);
                if (!key_sym) {
-                       if (!module) {
+                       if (!opts.module) {
                                WARN("static_call: can't find static_call_key symbol: %s", tmp);
                                return -1;
                        }
@@ -761,7 +763,7 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
        list_for_each_entry(insn, &file->endbr_list, call_node)
                idx++;
 
-       if (stats) {
+       if (opts.stats) {
                printf("ibt: ENDBR at function start: %d\n", file->nr_endbr);
                printf("ibt: ENDBR inside functions:  %d\n", file->nr_endbr_int);
                printf("ibt: superfluous ENDBR:       %d\n", idx);
@@ -1028,7 +1030,7 @@ static void add_uaccess_safe(struct objtool_file *file)
        struct symbol *func;
        const char **name;
 
-       if (!uaccess)
+       if (!opts.uaccess)
                return;
 
        for (name = uaccess_safe_builtin; *name; name++) {
@@ -1144,7 +1146,7 @@ static void annotate_call_site(struct objtool_file *file,
         * attribute so they need a little help, NOP out any such calls from
         * noinstr text.
         */
-       if (insn->sec->noinstr && sym->profiling_func) {
+       if (opts.hack_noinstr && insn->sec->noinstr && sym->profiling_func) {
                if (reloc) {
                        reloc->type = R_NONE;
                        elf_write_reloc(file->elf, reloc);
@@ -1170,7 +1172,7 @@ static void annotate_call_site(struct objtool_file *file,
                return;
        }
 
-       if (mcount && sym->fentry) {
+       if (opts.mcount && sym->fentry) {
                if (sibling)
                        WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset);
 
@@ -1256,7 +1258,7 @@ static bool is_first_func_insn(struct objtool_file *file, struct instruction *in
        if (insn->offset == insn->func->offset)
                return true;
 
-       if (ibt) {
+       if (opts.ibt) {
                struct instruction *prev = prev_insn_same_sym(file, insn);
 
                if (prev && prev->type == INSN_ENDBR &&
@@ -1592,7 +1594,7 @@ static int handle_jump_alt(struct objtool_file *file,
                return -1;
        }
 
-       if (special_alt->key_addend & 2) {
+       if (opts.hack_jump_label && special_alt->key_addend & 2) {
                struct reloc *reloc = insn_reloc(file, orig_insn);
 
                if (reloc) {
@@ -1699,7 +1701,7 @@ static int add_special_section_alts(struct objtool_file *file)
                free(special_alt);
        }
 
-       if (stats) {
+       if (opts.stats) {
                printf("jl\\\tNOP\tJMP\n");
                printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
                printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
@@ -1945,7 +1947,7 @@ static int read_unwind_hints(struct objtool_file *file)
 
                insn->hint = true;
 
-               if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+               if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
                        struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
 
                        if (sym && sym->bind == STB_GLOBAL &&
@@ -2806,7 +2808,7 @@ static int update_cfi_state(struct instruction *insn,
                }
 
                /* detect when asm code uses rbp as a scratch register */
-               if (!no_fp && insn->func && op->src.reg == CFI_BP &&
+               if (opts.stackval && insn->func && op->src.reg == CFI_BP &&
                    cfa->base != CFI_BP)
                        cfi->bp_scratch = true;
                break;
@@ -3182,114 +3184,6 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file,
        return next_insn_same_sec(file, insn);
 }
 
-static struct instruction *
-validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
-{
-       struct instruction *dest;
-       struct section *sec;
-       unsigned long off;
-
-       sec = reloc->sym->sec;
-       off = reloc->sym->offset;
-
-       if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) &&
-           (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32))
-               off += arch_dest_reloc_offset(reloc->addend);
-       else
-               off += reloc->addend;
-
-       dest = find_insn(file, sec, off);
-       if (!dest)
-               return NULL;
-
-       if (dest->type == INSN_ENDBR) {
-               if (!list_empty(&dest->call_node))
-                       list_del_init(&dest->call_node);
-
-               return NULL;
-       }
-
-       if (reloc->sym->static_call_tramp)
-               return NULL;
-
-       return dest;
-}
-
-static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset,
-                        struct instruction *dest)
-{
-       WARN_FUNC("%srelocation to !ENDBR: %s", sec, offset, msg,
-                 offstr(dest->sec, dest->offset));
-}
-
-static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn,
-                             struct instruction *dest)
-{
-       if (dest->func && dest->func == insn->func) {
-               /*
-                * Anything from->to self is either _THIS_IP_ or IRET-to-self.
-                *
-                * There is no sane way to annotate _THIS_IP_ since the compiler treats the
-                * relocation as a constant and is happy to fold in offsets, skewing any
-                * annotation we do, leading to vast amounts of false-positives.
-                *
-                * There's also compiler generated _THIS_IP_ through KCOV and
-                * such which we have no hope of annotating.
-                *
-                * As such, blanket accept self-references without issue.
-                */
-               return;
-       }
-
-       if (dest->noendbr)
-               return;
-
-       warn_noendbr("", insn->sec, insn->offset, dest);
-}
-
-static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
-{
-       struct instruction *dest;
-       struct reloc *reloc;
-
-       switch (insn->type) {
-       case INSN_CALL:
-       case INSN_CALL_DYNAMIC:
-       case INSN_JUMP_CONDITIONAL:
-       case INSN_JUMP_UNCONDITIONAL:
-       case INSN_JUMP_DYNAMIC:
-       case INSN_JUMP_DYNAMIC_CONDITIONAL:
-       case INSN_RETURN:
-               /*
-                * We're looking for code references setting up indirect code
-                * flow. As such, ignore direct code flow and the actual
-                * dynamic branches.
-                */
-               return;
-
-       case INSN_NOP:
-               /*
-                * handle_group_alt() will create INSN_NOP instruction that
-                * don't belong to any section, ignore all NOP since they won't
-                * carry a (useful) relocation anyway.
-                */
-               return;
-
-       default:
-               break;
-       }
-
-       for (reloc = insn_reloc(file, insn);
-            reloc;
-            reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-                                             reloc->offset + 1,
-                                             (insn->offset + insn->len) - (reloc->offset + 1))) {
-               dest = validate_ibt_reloc(file, reloc);
-               if (dest)
-                       validate_ibt_dest(file, insn, dest);
-       }
-}
-
 /*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
@@ -3363,7 +3257,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
                                ret = validate_branch(file, func, alt->insn, state);
                                if (ret) {
-                                       if (backtrace)
+                                       if (opts.backtrace)
                                                BT_FUNC("(alt)", insn);
                                        return ret;
                                }
@@ -3379,11 +3273,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                switch (insn->type) {
 
                case INSN_RETURN:
-                       if (sls && !insn->retpoline_safe &&
-                           next_insn && next_insn->type != INSN_TRAP) {
-                               WARN_FUNC("missing int3 after ret",
-                                         insn->sec, insn->offset);
-                       }
                        return validate_return(func, insn, &state);
 
                case INSN_CALL:
@@ -3392,7 +3281,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        if (ret)
                                return ret;
 
-                       if (!no_fp && func && !is_fentry_call(insn) &&
+                       if (opts.stackval && func && !is_fentry_call(insn) &&
                            !has_valid_stack_frame(&state)) {
                                WARN_FUNC("call without frame pointer save/setup",
                                          sec, insn->offset);
@@ -3415,7 +3304,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                                ret = validate_branch(file, func,
                                                      insn->jump_dest, state);
                                if (ret) {
-                                       if (backtrace)
+                                       if (opts.backtrace)
                                                BT_FUNC("(branch)", insn);
                                        return ret;
                                }
@@ -3427,13 +3316,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        break;
 
                case INSN_JUMP_DYNAMIC:
-                       if (sls && !insn->retpoline_safe &&
-                           next_insn && next_insn->type != INSN_TRAP) {
-                               WARN_FUNC("missing int3 after indirect jump",
-                                         insn->sec, insn->offset);
-                       }
-
-                       /* fallthrough */
                case INSN_JUMP_DYNAMIC_CONDITIONAL:
                        if (is_sibling_call(insn)) {
                                ret = validate_sibling_call(file, insn, &state);
@@ -3499,9 +3381,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        break;
                }
 
-               if (ibt)
-                       validate_ibt_insn(file, insn);
-
                if (insn->dead_end)
                        return 0;
 
@@ -3528,7 +3407,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
        if (!file->hints)
                return 0;
 
-       init_insn_state(&state, sec);
+       init_insn_state(file, &state, sec);
 
        if (sec) {
                insn = find_insn(file, sec, 0);
@@ -3541,7 +3420,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
        while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
                if (insn->hint && !insn->visited && !insn->ignore) {
                        ret = validate_branch(file, insn->func, insn, state);
-                       if (ret && backtrace)
+                       if (ret && opts.backtrace)
                                BT_FUNC("<=== (hint)", insn);
                        warnings += ret;
                }
@@ -3571,7 +3450,7 @@ static int validate_retpoline(struct objtool_file *file)
                 * loaded late, they very much do need retpoline in their
                 * .init.text
                 */
-               if (!strcmp(insn->sec->name, ".init.text") && !module)
+               if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
                        continue;
 
                WARN_FUNC("indirect %s found in RETPOLINE build",
@@ -3614,14 +3493,14 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
                return true;
 
        /*
-        * Whole archive runs might encounder dead code from weak symbols.
+        * Whole archive runs might encounter dead code from weak symbols.
         * This is where the linker will have dropped the weak symbol in
         * favour of a regular symbol, but leaves the code in place.
         *
         * In this case we'll find a piece of code (whole function) that is not
         * covered by a !section symbol. Ignore them.
         */
-       if (!insn->func && lto) {
+       if (opts.link && !insn->func) {
                int size = find_symbol_hole_containing(insn->sec, insn->offset);
                unsigned long end = insn->offset + size;
 
@@ -3728,7 +3607,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
        state->uaccess = sym->uaccess_safe;
 
        ret = validate_branch(file, insn->func, insn, *state);
-       if (ret && backtrace)
+       if (ret && opts.backtrace)
                BT_FUNC("<=== (sym)", insn);
        return ret;
 }
@@ -3743,7 +3622,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
                if (func->type != STT_FUNC)
                        continue;
 
-               init_insn_state(&state, sec);
+               init_insn_state(file, &state, sec);
                set_func_state(&state.cfi);
 
                warnings += validate_symbol(file, sec, func, &state);
@@ -3752,7 +3631,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
        return warnings;
 }
 
-static int validate_vmlinux_functions(struct objtool_file *file)
+static int validate_noinstr_sections(struct objtool_file *file)
 {
        struct section *sec;
        int warnings = 0;
@@ -3787,48 +3666,208 @@ static int validate_functions(struct objtool_file *file)
        return warnings;
 }
 
+static void mark_endbr_used(struct instruction *insn)
+{
+       if (!list_empty(&insn->call_node))
+               list_del_init(&insn->call_node);
+}
+
+static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
+{
+       struct instruction *dest;
+       struct reloc *reloc;
+       unsigned long off;
+       int warnings = 0;
+
+       /*
+        * Looking for function pointer load relocations.  Ignore
+        * direct/indirect branches:
+        */
+       switch (insn->type) {
+       case INSN_CALL:
+       case INSN_CALL_DYNAMIC:
+       case INSN_JUMP_CONDITIONAL:
+       case INSN_JUMP_UNCONDITIONAL:
+       case INSN_JUMP_DYNAMIC:
+       case INSN_JUMP_DYNAMIC_CONDITIONAL:
+       case INSN_RETURN:
+       case INSN_NOP:
+               return 0;
+       default:
+               break;
+       }
+
+       for (reloc = insn_reloc(file, insn);
+            reloc;
+            reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+                                             reloc->offset + 1,
+                                             (insn->offset + insn->len) - (reloc->offset + 1))) {
+
+               /*
+                * static_call_update() references the trampoline, which
+                * doesn't have (or need) ENDBR.  Skip warning in that case.
+                */
+               if (reloc->sym->static_call_tramp)
+                       continue;
+
+               off = reloc->sym->offset;
+               if (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32)
+                       off += arch_dest_reloc_offset(reloc->addend);
+               else
+                       off += reloc->addend;
+
+               dest = find_insn(file, reloc->sym->sec, off);
+               if (!dest)
+                       continue;
+
+               if (dest->type == INSN_ENDBR) {
+                       mark_endbr_used(dest);
+                       continue;
+               }
+
+               if (dest->func && dest->func == insn->func) {
+                       /*
+                        * Anything from->to self is either _THIS_IP_ or
+                        * IRET-to-self.
+                        *
+                        * There is no sane way to annotate _THIS_IP_ since the
+                        * compiler treats the relocation as a constant and is
+                        * happy to fold in offsets, skewing any annotation we
+                        * do, leading to vast amounts of false-positives.
+                        *
+                        * There's also compiler generated _THIS_IP_ through
+                        * KCOV and such which we have no hope of annotating.
+                        *
+                        * As such, blanket accept self-references without
+                        * issue.
+                        */
+                       continue;
+               }
+
+               if (dest->noendbr)
+                       continue;
+
+               WARN_FUNC("relocation to !ENDBR: %s",
+                         insn->sec, insn->offset,
+                         offstr(dest->sec, dest->offset));
+
+               warnings++;
+       }
+
+       return warnings;
+}
+
+static int validate_ibt_data_reloc(struct objtool_file *file,
+                                  struct reloc *reloc)
+{
+       struct instruction *dest;
+
+       dest = find_insn(file, reloc->sym->sec,
+                        reloc->sym->offset + reloc->addend);
+       if (!dest)
+               return 0;
+
+       if (dest->type == INSN_ENDBR) {
+               mark_endbr_used(dest);
+               return 0;
+       }
+
+       if (dest->noendbr)
+               return 0;
+
+       WARN_FUNC("data relocation to !ENDBR: %s",
+                 reloc->sec->base, reloc->offset,
+                 offstr(dest->sec, dest->offset));
+
+       return 1;
+}
+
+/*
+ * Validate IBT rules and remove used ENDBR instructions from the seal list.
+ * Unused ENDBR instructions will be annotated for sealing (i.e., replaced with
+ * NOPs) later, in create_ibt_endbr_seal_sections().
+ */
 static int validate_ibt(struct objtool_file *file)
 {
        struct section *sec;
        struct reloc *reloc;
+       struct instruction *insn;
+       int warnings = 0;
+
+       for_each_insn(file, insn)
+               warnings += validate_ibt_insn(file, insn);
 
        for_each_sec(file, sec) {
-               bool is_data;
 
-               /* already done in validate_branch() */
+               /* Already done by validate_ibt_insn() */
                if (sec->sh.sh_flags & SHF_EXECINSTR)
                        continue;
 
                if (!sec->reloc)
                        continue;
 
-               if (!strncmp(sec->name, ".orc", 4))
+               /*
+                * These sections can reference text addresses, but not with
+                * the intent to indirect branch to them.
+                */
+               if (!strncmp(sec->name, ".discard", 8)                  ||
+                   !strncmp(sec->name, ".debug", 6)                    ||
+                   !strcmp(sec->name, ".altinstructions")              ||
+                   !strcmp(sec->name, ".ibt_endbr_seal")               ||
+                   !strcmp(sec->name, ".orc_unwind_ip")                ||
+                   !strcmp(sec->name, ".parainstructions")             ||
+                   !strcmp(sec->name, ".retpoline_sites")              ||
+                   !strcmp(sec->name, ".smp_locks")                    ||
+                   !strcmp(sec->name, ".static_call_sites")            ||
+                   !strcmp(sec->name, "_error_injection_whitelist")    ||
+                   !strcmp(sec->name, "_kprobe_blacklist")             ||
+                   !strcmp(sec->name, "__bug_table")                   ||
+                   !strcmp(sec->name, "__ex_table")                    ||
+                   !strcmp(sec->name, "__jump_table")                  ||
+                   !strcmp(sec->name, "__mcount_loc")                  ||
+                   !strcmp(sec->name, "__tracepoints"))
                        continue;
 
-               if (!strncmp(sec->name, ".discard", 8))
-                       continue;
+               list_for_each_entry(reloc, &sec->reloc->reloc_list, list)
+                       warnings += validate_ibt_data_reloc(file, reloc);
+       }
 
-               if (!strncmp(sec->name, ".debug", 6))
-                       continue;
+       return warnings;
+}
 
-               if (!strcmp(sec->name, "_error_injection_whitelist"))
-                       continue;
+static int validate_sls(struct objtool_file *file)
+{
+       struct instruction *insn, *next_insn;
+       int warnings = 0;
 
-               if (!strcmp(sec->name, "_kprobe_blacklist"))
-                       continue;
+       for_each_insn(file, insn) {
+               next_insn = next_insn_same_sec(file, insn);
 
-               is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata");
+               if (insn->retpoline_safe)
+                       continue;
 
-               list_for_each_entry(reloc, &sec->reloc->reloc_list, list) {
-                       struct instruction *dest;
+               switch (insn->type) {
+               case INSN_RETURN:
+                       if (!next_insn || next_insn->type != INSN_TRAP) {
+                               WARN_FUNC("missing int3 after ret",
+                                         insn->sec, insn->offset);
+                               warnings++;
+                       }
 
-                       dest = validate_ibt_reloc(file, reloc);
-                       if (is_data && dest && !dest->noendbr)
-                               warn_noendbr("data ", sec, reloc->offset, dest);
+                       break;
+               case INSN_JUMP_DYNAMIC:
+                       if (!next_insn || next_insn->type != INSN_TRAP) {
+                               WARN_FUNC("missing int3 after indirect jump",
+                                         insn->sec, insn->offset);
+                               warnings++;
+                       }
+                       break;
+               default:
+                       break;
                }
        }
 
-       return 0;
+       return warnings;
 }
 
 static int validate_reachable_instructions(struct objtool_file *file)
@@ -3853,16 +3892,6 @@ int check(struct objtool_file *file)
 {
        int ret, warnings = 0;
 
-       if (lto && !(vmlinux || module)) {
-               fprintf(stderr, "--lto requires: --vmlinux or --module\n");
-               return 1;
-       }
-
-       if (ibt && !lto) {
-               fprintf(stderr, "--ibt requires: --lto\n");
-               return 1;
-       }
-
        arch_initial_func_cfi_state(&initial_func_cfi);
        init_cfi_state(&init_cfi);
        init_cfi_state(&func_cfi);
@@ -3883,73 +3912,89 @@ int check(struct objtool_file *file)
        if (list_empty(&file->insn_list))
                goto out;
 
-       if (vmlinux && !lto) {
-               ret = validate_vmlinux_functions(file);
+       if (opts.retpoline) {
+               ret = validate_retpoline(file);
                if (ret < 0)
-                       goto out;
-
+                       return ret;
                warnings += ret;
-               goto out;
        }
 
-       if (retpoline) {
-               ret = validate_retpoline(file);
+       if (opts.stackval || opts.orc || opts.uaccess) {
+               ret = validate_functions(file);
                if (ret < 0)
-                       return ret;
+                       goto out;
                warnings += ret;
-       }
 
-       ret = validate_functions(file);
-       if (ret < 0)
-               goto out;
-       warnings += ret;
+               ret = validate_unwind_hints(file, NULL);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
 
-       ret = validate_unwind_hints(file, NULL);
-       if (ret < 0)
-               goto out;
-       warnings += ret;
+               if (!warnings) {
+                       ret = validate_reachable_instructions(file);
+                       if (ret < 0)
+                               goto out;
+                       warnings += ret;
+               }
 
-       if (ibt) {
+       } else if (opts.noinstr) {
+               ret = validate_noinstr_sections(file);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
+       }
+
+       if (opts.ibt) {
                ret = validate_ibt(file);
                if (ret < 0)
                        goto out;
                warnings += ret;
        }
 
-       if (!warnings) {
-               ret = validate_reachable_instructions(file);
+       if (opts.sls) {
+               ret = validate_sls(file);
                if (ret < 0)
                        goto out;
                warnings += ret;
        }
 
-       ret = create_static_call_sections(file);
-       if (ret < 0)
-               goto out;
-       warnings += ret;
+       if (opts.static_call) {
+               ret = create_static_call_sections(file);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
+       }
 
-       if (retpoline) {
+       if (opts.retpoline) {
                ret = create_retpoline_sites_sections(file);
                if (ret < 0)
                        goto out;
                warnings += ret;
        }
 
-       if (mcount) {
+       if (opts.mcount) {
                ret = create_mcount_loc_sections(file);
                if (ret < 0)
                        goto out;
                warnings += ret;
        }
 
-       if (ibt) {
+       if (opts.ibt) {
                ret = create_ibt_endbr_seal_sections(file);
                if (ret < 0)
                        goto out;
                warnings += ret;
        }
 
-       if (stats) {
+       if (opts.orc && !list_empty(&file->insn_list)) {
+               ret = orc_create(file);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
+       }
+
+
+       if (opts.stats) {
                printf("nr_insns_visited: %ld\n", nr_insns_visited);
                printf("nr_cfi: %ld\n", nr_cfi);
                printf("nr_cfi_reused: %ld\n", nr_cfi_reused);
index ebf2ba5755c1e1a115dbf06f09139a73ae4e8f8f..c25e957c1e520b8105516c6deb96578870ddb495 100644 (file)
@@ -355,7 +355,7 @@ static int read_sections(struct elf *elf)
                elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
        }
 
-       if (stats) {
+       if (opts.stats) {
                printf("nr_sections: %lu\n", (unsigned long)sections_nr);
                printf("section_bits: %d\n", elf->section_bits);
        }
@@ -374,9 +374,15 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
        struct list_head *entry;
        struct rb_node *pnode;
 
+       INIT_LIST_HEAD(&sym->pv_target);
+       sym->alias = sym;
+
        sym->type = GELF_ST_TYPE(sym->sym.st_info);
        sym->bind = GELF_ST_BIND(sym->sym.st_info);
 
+       if (sym->type == STT_FILE)
+               elf->num_files++;
+
        sym->offset = sym->sym.st_value;
        sym->len = sym->sym.st_size;
 
@@ -435,8 +441,6 @@ static int read_symbols(struct elf *elf)
                        return -1;
                }
                memset(sym, 0, sizeof(*sym));
-               INIT_LIST_HEAD(&sym->pv_target);
-               sym->alias = sym;
 
                sym->idx = i;
 
@@ -475,7 +479,7 @@ static int read_symbols(struct elf *elf)
                elf_add_symbol(elf, sym);
        }
 
-       if (stats) {
+       if (opts.stats) {
                printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
                printf("symbol_bits: %d\n", elf->symbol_bits);
        }
@@ -546,7 +550,7 @@ static struct section *elf_create_reloc_section(struct elf *elf,
                                                int reltype);
 
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
-                 unsigned int type, struct symbol *sym, long addend)
+                 unsigned int type, struct symbol *sym, s64 addend)
 {
        struct reloc *reloc;
 
@@ -600,24 +604,21 @@ static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
 }
 
 /*
- * Move the first global symbol, as per sh_info, into a new, higher symbol
- * index. This fees up the shndx for a new local symbol.
+ * The libelf API is terrible; gelf_update_sym*() takes a data block relative
+ * index value, *NOT* the symbol index. As such, iterate the data blocks and
+ * adjust index until it fits.
+ *
+ * If no data block is found, allow adding a new data block provided the index
+ * is only one past the end.
  */
-static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
-                                 struct section *symtab_shndx)
+static int elf_update_symbol(struct elf *elf, struct section *symtab,
+                            struct section *symtab_shndx, struct symbol *sym)
 {
-       Elf_Data *data, *shndx_data = NULL;
-       Elf32_Word first_non_local;
-       struct symbol *sym;
-       Elf_Scn *s;
-
-       first_non_local = symtab->sh.sh_info;
-
-       sym = find_symbol_by_index(elf, first_non_local);
-       if (!sym) {
-               WARN("no non-local symbols !?");
-               return first_non_local;
-       }
+       Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
+       Elf_Data *symtab_data = NULL, *shndx_data = NULL;
+       Elf64_Xword entsize = symtab->sh.sh_entsize;
+       int max_idx, idx = sym->idx;
+       Elf_Scn *s, *t = NULL;
 
        s = elf_getscn(elf->elf, symtab->idx);
        if (!s) {
@@ -625,79 +626,124 @@ static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
                return -1;
        }
 
-       data = elf_newdata(s);
-       if (!data) {
-               WARN_ELF("elf_newdata");
-               return -1;
+       if (symtab_shndx) {
+               t = elf_getscn(elf->elf, symtab_shndx->idx);
+               if (!t) {
+                       WARN_ELF("elf_getscn");
+                       return -1;
+               }
        }
 
-       data->d_buf = &sym->sym;
-       data->d_size = sizeof(sym->sym);
-       data->d_align = 1;
-       data->d_type = ELF_T_SYM;
+       for (;;) {
+               /* get next data descriptor for the relevant sections */
+               symtab_data = elf_getdata(s, symtab_data);
+               if (t)
+                       shndx_data = elf_getdata(t, shndx_data);
 
-       sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
-       elf_dirty_reloc_sym(elf, sym);
+               /* end-of-list */
+               if (!symtab_data) {
+                       void *buf;
 
-       symtab->sh.sh_info += 1;
-       symtab->sh.sh_size += data->d_size;
-       symtab->changed = true;
+                       if (idx) {
+                               /* we don't do holes in symbol tables */
+                               WARN("index out of range");
+                               return -1;
+                       }
 
-       if (symtab_shndx) {
-               s = elf_getscn(elf->elf, symtab_shndx->idx);
-               if (!s) {
-                       WARN_ELF("elf_getscn");
+                       /* if @idx == 0, it's the next contiguous entry, create it */
+                       symtab_data = elf_newdata(s);
+                       if (t)
+                               shndx_data = elf_newdata(t);
+
+                       buf = calloc(1, entsize);
+                       if (!buf) {
+                               WARN("malloc");
+                               return -1;
+                       }
+
+                       symtab_data->d_buf = buf;
+                       symtab_data->d_size = entsize;
+                       symtab_data->d_align = 1;
+                       symtab_data->d_type = ELF_T_SYM;
+
+                       symtab->sh.sh_size += entsize;
+                       symtab->changed = true;
+
+                       if (t) {
+                               shndx_data->d_buf = &sym->sec->idx;
+                               shndx_data->d_size = sizeof(Elf32_Word);
+                               shndx_data->d_align = sizeof(Elf32_Word);
+                               shndx_data->d_type = ELF_T_WORD;
+
+                               symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
+                               symtab_shndx->changed = true;
+                       }
+
+                       break;
+               }
+
+               /* empty blocks should not happen */
+               if (!symtab_data->d_size) {
+                       WARN("zero size data");
                        return -1;
                }
 
-               shndx_data = elf_newdata(s);
+               /* is this the right block? */
+               max_idx = symtab_data->d_size / entsize;
+               if (idx < max_idx)
+                       break;
+
+               /* adjust index and try again */
+               idx -= max_idx;
+       }
+
+       /* something went side-ways */
+       if (idx < 0) {
+               WARN("negative index");
+               return -1;
+       }
+
+       /* setup extended section index magic and write the symbol */
+       if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+               sym->sym.st_shndx = shndx;
+               if (!shndx_data)
+                       shndx = 0;
+       } else {
+               sym->sym.st_shndx = SHN_XINDEX;
                if (!shndx_data) {
-                       WARN_ELF("elf_newshndx_data");
+                       WARN("no .symtab_shndx");
                        return -1;
                }
+       }
 
-               shndx_data->d_buf = &sym->sec->idx;
-               shndx_data->d_size = sizeof(Elf32_Word);
-               shndx_data->d_align = 4;
-               shndx_data->d_type = ELF_T_WORD;
-
-               symtab_shndx->sh.sh_size += 4;
-               symtab_shndx->changed = true;
+       if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) {
+               WARN_ELF("gelf_update_symshndx");
+               return -1;
        }
 
-       return first_non_local;
+       return 0;
 }
 
 static struct symbol *
 elf_create_section_symbol(struct elf *elf, struct section *sec)
 {
        struct section *symtab, *symtab_shndx;
-       Elf_Data *shndx_data = NULL;
-       struct symbol *sym;
-       Elf32_Word shndx;
+       Elf32_Word first_non_local, new_idx;
+       struct symbol *sym, *old;
 
        symtab = find_section_by_name(elf, ".symtab");
        if (symtab) {
                symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
-               if (symtab_shndx)
-                       shndx_data = symtab_shndx->data;
        } else {
                WARN("no .symtab");
                return NULL;
        }
 
-       sym = malloc(sizeof(*sym));
+       sym = calloc(1, sizeof(*sym));
        if (!sym) {
                perror("malloc");
                return NULL;
        }
-       memset(sym, 0, sizeof(*sym));
-
-       sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
-       if (sym->idx < 0) {
-               WARN("elf_move_global_symbol");
-               return NULL;
-       }
 
        sym->name = sec->name;
        sym->sec = sec;
@@ -707,24 +753,41 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
        // st_other 0
        // st_value 0
        // st_size 0
-       shndx = sec->idx;
-       if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
-               sym->sym.st_shndx = shndx;
-               if (!shndx_data)
-                       shndx = 0;
-       } else {
-               sym->sym.st_shndx = SHN_XINDEX;
-               if (!shndx_data) {
-                       WARN("no .symtab_shndx");
+
+       /*
+        * Move the first global symbol, as per sh_info, into a new, higher
+        * symbol index. This fees up a spot for a new local symbol.
+        */
+       first_non_local = symtab->sh.sh_info;
+       new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
+       old = find_symbol_by_index(elf, first_non_local);
+       if (old) {
+               old->idx = new_idx;
+
+               hlist_del(&old->hash);
+               elf_hash_add(symbol, &old->hash, old->idx);
+
+               elf_dirty_reloc_sym(elf, old);
+
+               if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
+                       WARN("elf_update_symbol move");
                        return NULL;
                }
+
+               new_idx = first_non_local;
        }
 
-       if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
-               WARN_ELF("gelf_update_symshndx");
+       sym->idx = new_idx;
+       if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
+               WARN("elf_update_symbol");
                return NULL;
        }
 
+       /*
+        * Either way, we added a LOCAL symbol.
+        */
+       symtab->sh.sh_info += 1;
+
        elf_add_symbol(elf, sym);
 
        return sym;
@@ -843,7 +906,7 @@ static int read_relocs(struct elf *elf)
                tot_reloc += nr_reloc;
        }
 
-       if (stats) {
+       if (opts.stats) {
                printf("max_reloc: %lu\n", max_reloc);
                printf("tot_reloc: %lu\n", tot_reloc);
                printf("reloc_bits: %d\n", elf->reloc_bits);
@@ -1222,7 +1285,7 @@ int elf_write(struct elf *elf)
        struct section *sec;
        Elf_Scn *s;
 
-       if (dryrun)
+       if (opts.dryrun)
                return 0;
 
        /* Update changed relocation sections and section headers: */
index c39dbfaef6dcb94c35228b12dd1b003eb7cf07f5..280ea18b7f2b9efde38cf0c070130dfc4a1f45bb 100644 (file)
@@ -8,13 +8,37 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
-           lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
-           ibt;
+
+struct opts {
+       /* actions: */
+       bool dump_orc;
+       bool hack_jump_label;
+       bool hack_noinstr;
+       bool ibt;
+       bool mcount;
+       bool noinstr;
+       bool orc;
+       bool retpoline;
+       bool sls;
+       bool stackval;
+       bool static_call;
+       bool uaccess;
+
+       /* options: */
+       bool backtrace;
+       bool backup;
+       bool dryrun;
+       bool link;
+       bool module;
+       bool no_unreachable;
+       bool sec_address;
+       bool stats;
+};
+
+extern struct opts opts;
 
 extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
 
-extern int cmd_check(int argc, const char **argv);
-extern int cmd_orc(int argc, const char **argv);
+extern int objtool_run(int argc, const char **argv);
 
 #endif /* _BUILTIN_H */
index 9b36802ed86f605606aaa674f19cb79121425a2e..adebfbc2b51834b80daf01fac7b447e846f16e1e 100644 (file)
@@ -73,7 +73,7 @@ struct reloc {
        struct symbol *sym;
        unsigned long offset;
        unsigned int type;
-       long addend;
+       s64 addend;
        int idx;
        bool jump_table_start;
 };
@@ -86,7 +86,7 @@ struct elf {
        int fd;
        bool changed;
        char *name;
-       unsigned int text_size;
+       unsigned int text_size, num_files;
        struct list_head sections;
 
        int symbol_bits;
@@ -131,11 +131,21 @@ static inline u32 reloc_hash(struct reloc *reloc)
        return sec_offset_hash(reloc->sec, reloc->offset);
 }
 
+/*
+ * Try to see if it's a whole archive (vmlinux.o or module).
+ *
+ * Note this will miss the case where a module only has one source file.
+ */
+static inline bool has_multiple_files(struct elf *elf)
+{
+       return elf->num_files > 1;
+}
+
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
-                 unsigned int type, struct symbol *sym, long addend);
+                 unsigned int type, struct symbol *sym, s64 addend);
 int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
                          unsigned long offset, unsigned int type,
                          struct section *insn_sec, unsigned long insn_off);
index 802cfda0a6f638a37749201424b465077f38af23..a3e79ae75f2e8f4f55c84898e1ff81a4a10f93a9 100644 (file)
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <objtool/builtin.h>
 #include <objtool/elf.h>
 
 extern const char *objname;
 
 static inline char *offstr(struct section *sec, unsigned long offset)
 {
-       struct symbol *func;
-       char *name, *str;
-       unsigned long name_off;
+       bool is_text = (sec->sh.sh_flags & SHF_EXECINSTR);
+       struct symbol *sym = NULL;
+       char *str;
+       int len;
 
-       func = find_func_containing(sec, offset);
-       if (!func)
-               func = find_symbol_containing(sec, offset);
-       if (func) {
-               name = func->name;
-               name_off = offset - func->offset;
+       if (is_text)
+               sym = find_func_containing(sec, offset);
+       if (!sym)
+               sym = find_symbol_containing(sec, offset);
+
+       if (sym) {
+               str = malloc(strlen(sym->name) + strlen(sec->name) + 40);
+               len = sprintf(str, "%s+0x%lx", sym->name, offset - sym->offset);
+               if (opts.sec_address)
+                       sprintf(str+len, " (%s+0x%lx)", sec->name, offset);
        } else {
-               name = sec->name;
-               name_off = offset;
+               str = malloc(strlen(sec->name) + 20);
+               sprintf(str, "%s+0x%lx", sec->name, offset);
        }
 
-       str = malloc(strlen(name) + 20);
-
-       if (func)
-               sprintf(str, "%s()+0x%lx", name, name_off);
-       else
-               sprintf(str, "%s+0x%lx", name, name_off);
-
        return str;
 }
 
index 843ff3c2f28e4aa6e08c09a8a527aa2ab5aafed7..512669ce064c807b7bede63498f0871664434de0 100644 (file)
@@ -3,16 +3,6 @@
  * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
  */
 
-/*
- * objtool:
- *
- * The 'check' subcmd analyzes every .o file and ensures the validity of its
- * stack trace metadata.  It enforces a set of rules on asm code and C inline
- * assembly code so that stack traces can be reliable.
- *
- * For more information, see tools/objtool/Documentation/stack-validation.txt.
- */
-
 #include <stdio.h>
 #include <stdbool.h>
 #include <string.h>
 #include <objtool/objtool.h>
 #include <objtool/warn.h>
 
-struct cmd_struct {
-       const char *name;
-       int (*fn)(int, const char **);
-       const char *help;
-};
-
-static const char objtool_usage_string[] =
-       "objtool COMMAND [ARGS]";
-
-static struct cmd_struct objtool_cmds[] = {
-       {"check",       cmd_check,      "Perform stack metadata validation on an object file" },
-       {"orc",         cmd_orc,        "Generate in-place ORC unwind tables for an object file" },
-};
-
 bool help;
 
 const char *objname;
@@ -118,7 +94,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
        if (!file.elf)
                return NULL;
 
-       if (backup && !objtool_create_backup(objname)) {
+       if (opts.backup && !objtool_create_backup(objname)) {
                WARN("can't create backup file");
                return NULL;
        }
@@ -129,7 +105,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
        INIT_LIST_HEAD(&file.static_call_list);
        INIT_LIST_HEAD(&file.mcount_loc_list);
        INIT_LIST_HEAD(&file.endbr_list);
-       file.ignore_unreachables = no_unreachable;
+       file.ignore_unreachables = opts.no_unreachable;
        file.hints = false;
 
        return &file;
@@ -137,7 +113,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
 
 void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
 {
-       if (!noinstr)
+       if (!opts.noinstr)
                return;
 
        if (!f->pv_ops) {
@@ -161,70 +137,6 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
        f->pv_ops[idx].clean = false;
 }
 
-static void cmd_usage(void)
-{
-       unsigned int i, longest = 0;
-
-       printf("\n usage: %s\n\n", objtool_usage_string);
-
-       for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
-               if (longest < strlen(objtool_cmds[i].name))
-                       longest = strlen(objtool_cmds[i].name);
-       }
-
-       puts(" Commands:");
-       for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
-               printf("   %-*s   ", longest, objtool_cmds[i].name);
-               puts(objtool_cmds[i].help);
-       }
-
-       printf("\n");
-
-       if (!help)
-               exit(129);
-       exit(0);
-}
-
-static void handle_options(int *argc, const char ***argv)
-{
-       while (*argc > 0) {
-               const char *cmd = (*argv)[0];
-
-               if (cmd[0] != '-')
-                       break;
-
-               if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) {
-                       help = true;
-                       break;
-               } else {
-                       fprintf(stderr, "Unknown option: %s\n", cmd);
-                       cmd_usage();
-               }
-
-               (*argv)++;
-               (*argc)--;
-       }
-}
-
-static void handle_internal_command(int argc, const char **argv)
-{
-       const char *cmd = argv[0];
-       unsigned int i, ret;
-
-       for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
-               struct cmd_struct *p = objtool_cmds+i;
-
-               if (strcmp(p->name, cmd))
-                       continue;
-
-               ret = p->fn(argc, argv);
-
-               exit(ret);
-       }
-
-       cmd_usage();
-}
-
 int main(int argc, const char **argv)
 {
        static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
@@ -233,14 +145,7 @@ int main(int argc, const char **argv)
        exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
        pager_init(UNUSED);
 
-       argv++;
-       argc--;
-       handle_options(&argc, &argv);
-
-       if (!argc || help)
-               cmd_usage();
-
-       handle_internal_command(argc, argv);
+       objtool_run(argc, argv);
 
        return 0;
 }
index 8314e824db4ae2023695413b1ef5c254f6aa2bb7..d83f607733b047dfb8287a786e21b3766f8de4d1 100644 (file)
        return ENOSYS;                                                  \
 })
 
-int __weak check(struct objtool_file *file)
-{
-       UNSUPPORTED("check subcommand");
-}
-
 int __weak orc_dump(const char *_objname)
 {
-       UNSUPPORTED("orc");
+       UNSUPPORTED("ORC");
 }
 
 int __weak orc_create(struct objtool_file *file)
 {
-       UNSUPPORTED("orc");
+       UNSUPPORTED("ORC");
 }
index f3bf9297bcc03c5e5075a0233d8ca49285101c82..1bd64e7404b9fbd473c45fafbf15236a3b229246 100644 (file)
@@ -553,9 +553,16 @@ ifndef NO_LIBELF
         ifeq ($(feature-libbpf), 1)
           EXTLIBS += -lbpf
           $(call detected,CONFIG_LIBBPF_DYNAMIC)
+
+          $(call feature_check,libbpf-btf__load_from_kernel_by_id)
+          ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
+            CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+          endif
         else
           dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
         endif
+      else
+       CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
       endif
     endif
 
index 207c56805c551dec7ed651bab32d39cef39f86ec..0ed177991ad05e42f07efbe25c285d56806a6d67 100644 (file)
@@ -9,6 +9,8 @@
 #include "../../../util/perf_regs.h"
 #include "../../../util/debug.h"
 #include "../../../util/event.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmu-hybrid.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG(AX, PERF_REG_X86_AX),
@@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void)
                .disabled               = 1,
                .exclude_kernel         = 1,
        };
+       struct perf_pmu *pmu;
        int fd;
        /*
         * In an unnamed union, init it here to build on older gcc versions
         */
        attr.sample_period = 1;
 
+       if (perf_pmu__has_hybrid()) {
+               /*
+                * The same register set is supported among different hybrid PMUs.
+                * Only check the first available one.
+                */
+               pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
+               attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+       }
+
        event_attr_init(&attr);
 
        fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
index 44e1f8a44087e35d60c732bea6aef7146fee3111..20eed1e53f8092836b35051d392d7bd98b737539 100644 (file)
@@ -311,6 +311,7 @@ err_out:
 
        /* BUG_ON due to failure in allocation of orig_mask/mask */
        BUG_ON(-1);
+       return NULL;
 }
 
 static cpu_set_t *bind_to_node(int target_node)
@@ -364,6 +365,7 @@ err_out:
 
        /* BUG_ON due to failure in allocation of orig_mask/mask */
        BUG_ON(-1);
+       return NULL;
 }
 
 static void bind_to_cpumask(cpu_set_t *mask)
@@ -1738,7 +1740,7 @@ static int __bench_numa(const char *name)
                "GB/sec,", "total-speed",       "GB/sec total speed");
 
        if (g->p.show_details >= 2) {
-               char tname[14 + 2 * 10 + 1];
+               char tname[14 + 2 * 11 + 1];
                struct thread_data *td;
                for (p = 0; p < g->p.nr_proc; p++) {
                        for (t = 0; t < g->p.nr_threads; t++) {
index 57b9591f7cbb422d3afc32e4336931e4c202b10a..17c023823713d4b96ef8404083ce02b794dffe9c 100644 (file)
@@ -222,11 +222,11 @@ static int __test__bpf(int idx)
 
        ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
                                       bpf_testcase_table[idx].prog_id,
-                                      true, NULL);
+                                      false, NULL);
        if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
                pr_debug("Unable to get BPF object, %s\n",
                         bpf_testcase_table[idx].msg_compile_fail);
-               if (idx == 0)
+               if ((idx == 0) || (ret == TEST_SKIP))
                        return TEST_SKIP;
                else
                        return TEST_FAIL;
@@ -364,9 +364,11 @@ static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
 static struct test_case bpf_tests[] = {
 #ifdef HAVE_LIBBPF_SUPPORT
        TEST_CASE("Basic BPF filtering", basic_bpf_test),
-       TEST_CASE("BPF pinning", bpf_pinning),
+       TEST_CASE_REASON("BPF pinning", bpf_pinning,
+                       "clang isn't installed or environment missing BPF support"),
 #ifdef HAVE_BPF_PROLOGUE
-       TEST_CASE("BPF prologue generation", bpf_prologue_test),
+       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test,
+                       "clang isn't installed or environment missing BPF support"),
 #else
        TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
 #endif
index fac3717d9ba1bb9902fd2372477b175aa4646bea..d336cda94a115fdbdeb15fd71437755dcb75a59b 100644 (file)
@@ -279,6 +279,7 @@ static const char *shell_test__description(char *description, size_t size,
 {
        FILE *fp;
        char filename[PATH_MAX];
+       int ch;
 
        path__join(filename, sizeof(filename), path, name);
        fp = fopen(filename, "r");
@@ -286,7 +287,9 @@ static const char *shell_test__description(char *description, size_t size,
                return NULL;
 
        /* Skip shebang */
-       while (fgetc(fp) != '\n');
+       do {
+               ch = fgetc(fp);
+       } while (ch != EOF && ch != '\n');
 
        description = fgets(description, size, fp);
        fclose(fp);
@@ -417,7 +420,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
                        .priv = &st,
                };
 
-               if (!perf_test__matches(test_suite.desc, curr, argc, argv))
+               if (test_suite.desc == NULL ||
+                   !perf_test__matches(test_suite.desc, curr, argc, argv))
                        continue;
 
                st.file = ent->d_name;
index b30dba455f36c665227b2d90c26112f48fdb5974..9c9ef33e0b3c609e39be0449ba823f6096105e8a 100755 (executable)
@@ -5,6 +5,16 @@
 set -e
 
 for p in $(perf list --raw-dump pmu); do
+  # In powerpc, skip the events for hv_24x7 and hv_gpci.
+  # These events needs input values to be filled in for
+  # core, chip, partition id based on system.
+  # Example: hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/
+  # hv_gpci/event,partition_id=?/
+  # Hence skip these events for ppc.
+  if echo "$p" |grep -Eq 'hv_24x7|hv_gpci' ; then
+    echo "Skipping: Event '$p' in powerpc"
+    continue
+  fi
   echo "Testing $p"
   result=$(perf stat -e "$p" true 2>&1)
   if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then
index 6de53b7ef5ffd9a4ba8cb555b9ee8976a95a6017..e4cb4f1806ffa6b4b99e336abe08441e78ef7797 100755 (executable)
@@ -29,7 +29,6 @@ cleanup_files()
        rm -f ${file}
        rm -f "${perfdata}.old"
        trap - exit term int
-       kill -2 $$
        exit $glb_err
 }
 
index ee1e3dcbc0bdb185f0ce98bf20210eddee58f339..d23a9e322ff52868f3a5dbe70b04770c9a4b6c47 100644 (file)
@@ -109,6 +109,17 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
                        && strncmp(session->header.env.arch, "aarch64", 7))
                return TEST_SKIP;
 
+       /*
+        * In powerpc pSeries platform, not all the topology information
+        * are exposed via sysfs. Due to restriction, detail like
+        * physical_package_id will be set to -1. Hence skip this
+        * test if physical_package_id returns -1 for cpu from perf_cpu_map.
+        */
+       if (strncmp(session->header.env.arch, "powerpc", 7)) {
+               if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
+                       return TEST_SKIP;
+       }
+
        TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
 
        for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
index 94624733af7e225a91456d52420f51baf600ab94..8271ab764eb56c7bf5df40324c72d00a0b85430d 100644 (file)
@@ -22,7 +22,8 @@
 #include "record.h"
 #include "util/synthetic-events.h"
 
-struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+struct btf *btf__load_from_kernel_by_id(__u32 id)
 {
        struct btf *btf;
 #pragma GCC diagnostic push
@@ -32,6 +33,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
 
        return err ? ERR_PTR(err) : btf;
 }
+#endif
 
 int __weak bpf_prog_load(enum bpf_prog_type prog_type,
                         const char *prog_name __maybe_unused,
index f9a320694b8555eee9d32abd8e891a81c71671cf..a7f93f5a1ac81968fd4d2cc4eef37c0806b9493b 100644 (file)
@@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
        struct branch_entry *entries = perf_sample__branch_entries(sample);
        uint64_t i;
 
-       printf("%s: nr:%" PRIu64 "\n",
-               !callstack ? "... branch stack" : "... branch callstack",
-               sample->branch_stack->nr);
+       if (!callstack) {
+               printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+       } else {
+               /* the reason of adding 1 to nr is because after expanding
+                * branch stack it generates nr + 1 callstack records. e.g.,
+                *         B()->C()
+                *         A()->B()
+                * the final callstack should be:
+                *         C()
+                *         B()
+                *         A()
+                */
+               printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+       }
 
        for (i = 0; i < sample->branch_stack->nr; i++) {
                struct branch_entry *e = &entries[i];
@@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
                                (unsigned)e->flags.reserved,
                                e->flags.type ? branch_type_name(e->flags.type) : "");
                } else {
-                       printf("..... %2"PRIu64": %016" PRIx64 "\n",
-                               i, i > 0 ? e->from : e->to);
+                       if (i == 0) {
+                               printf("..... %2"PRIu64": %016" PRIx64 "\n"
+                                      "..... %2"PRIu64": %016" PRIx64 "\n",
+                                               i, e->to, i+1, e->from);
+                       } else {
+                               printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+                       }
                }
        }
 }
index 817a2de264b46956ff9073d6179be2d48c6eeffe..c1af37e11f9898522d48ca5a79ce1faf19f7f7c6 100644 (file)
@@ -472,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 int perf_event__process_stat_event(struct perf_session *session,
                                   union perf_event *event)
 {
-       struct perf_counts_values count;
+       struct perf_counts_values count, *ptr;
        struct perf_record_stat *st = &event->stat;
        struct evsel *counter;
+       int cpu_map_idx;
 
        count.val = st->val;
        count.ena = st->ena;
@@ -485,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session,
                pr_err("Failed to resolve counter for stat event.\n");
                return -EINVAL;
        }
-
-       *perf_counts(counter->counts, st->cpu, st->thread) = count;
+       cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+       if (cpu_map_idx == -1) {
+               pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+               return -EINVAL;
+       }
+       ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+       if (ptr == NULL) {
+               pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+                       st->cpu, st->thread, evsel__name(counter));
+               return -EINVAL;
+       }
+       *ptr = count;
        counter->supported = true;
        return 0;
 }
index 185b8c588e1d7623d5488b8da4d66b57a6710b91..38f9b9da817021a0c9c3616862b17d0d54faa430 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: cmfsize - Common get file size function
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 3c265bc917a10299b81175e3bfe4c7794dd7b5ef..96fd6cec78e21e0543e2b62e7eb66ee5c3c8a4e5 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: getopt
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index ccabdbaae6a48d5af4a5f6add321faf760fbdeb3..bd08f36df4a7b01c8382ae48c20bc44820733546 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index edd99274cd1221eb7daaf93c655501c2dbf2641e..5107892d054bddacfa8eb2cef929eafa39321e9a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: osunixdir - Unix directory access interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index fee0022560d5f583c338539b0ef19002fc3db09b..6ff4edd8dc3bb6a465f78cf3b0dfcd065cf67df1 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: osunixmap - Unix OSL for file mappings
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 0861728da562196dead5e704bb7ae40dc5f7da35..b3651a04d68cf50cefe147b0b9390c6b82dbc160 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: osunixxf - UNIX OSL interfaces
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e0ebc1dab1cc77804e2b076aae4a9a720be3263b..153249c87fd7f6fe92eaa497482549704e0b3930 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: acpidump.h - Include file for acpi_dump utility
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index 444e3d78bd89c02be230052dc51a638e2c6546d4..d54dde02b87d750a76c8b52c8102d323b5d6b235 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: apdump - Dump routines for ACPI tables (acpidump)
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index da0c6e13042b19b3d455a918fa5c144f65e92375..2d9b45a9b526c371d53f4dbc38414f111f0f3463 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: apfiles - File-related functions for acpidump utility
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index a4cf6042fcfde4dba382ceb9354d5163e41bf37d..44b23fc53dd9fbd6067ebd756e28a44c9bd089c0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Module Name: apmain - Main module for the acpidump utility
  *
- * Copyright (C) 2000 - 2021, Intel Corp.
+ * Copyright (C) 2000 - 2022, Intel Corp.
  *
  *****************************************************************************/
 
index e8567671137257b098c1612e2ef1e203097431d1..761375062505590c2685843f61bb4c4af31d21cb 100644 (file)
@@ -190,7 +190,7 @@ static int handle_event(struct nl_msg *n, void *arg)
        struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
        struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
        int ret;
-       struct perf_cap perf_cap;
+       struct perf_cap perf_cap = {0};
 
        ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
 
index 060390e88e37430f515f3972a5fed1fbdceecf47..9d35614995ee163501a56729c5479a02263ff11d 100644 (file)
@@ -1892,6 +1892,12 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
        int ret;
        int status = *(int *)arg4;
 
+       if (status && no_turbo()) {
+               isst_display_error_info_message(1, "Turbo mode is disabled", 0, 0);
+               ret = -1;
+               goto disp_results;
+       }
+
        ret = isst_get_ctdp_levels(cpu, &pkg_dev);
        if (ret) {
                isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
index f3e3c94ab9bd56f88a6ed03ad888d48ba4b3eab5..92e139b9c792d7207246e6a1227c52112bcab090 100644 (file)
@@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
 endif
 
 turbostat : turbostat.c
-override CFLAGS +=     -O2 -Wall -I../../../include
+override CFLAGS +=     -O2 -Wall -Wextra -I../../../include
 override CFLAGS +=     -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
 override CFLAGS +=     -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
 override CFLAGS +=     -D_FILE_OFFSET_BITS=64
index 9b17097bc3d7b3282eec8f70c2c9e27c9f5dde67..1e7d3de55a948be5f03afb048f922841e6bfdc08 100644 (file)
@@ -292,7 +292,7 @@ starts a new interval.
 must be run as root.
 Alternatively, non-root users can be enabled to run turbostat this way:
 
-# setcap cap_sys_rawio=ep ./turbostat
+# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat
 
 # chmod +r /dev/cpu/*/msr
 
index bc5ae0872fed97b6dfa00a608eb669c4bdcf0b8c..ede31a4287a077ef08204f1e5d0b74d07a1b6c15 100644 (file)
@@ -3,7 +3,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel and AMD processors.
  *
- * Copyright (c) 2021 Intel Corporation.
+ * Copyright (c) 2022 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  */
 
 #include <asm/unistd.h>
 #include <stdbool.h>
 
+#define UNUSED(x) (void)(x)
+
+/*
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ *    matching on them for --show and --hide.
+ */
+
+/*
+ * buffer size used by sscanf() for added column names
+ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
+ */
+#define        NAME_BYTES 20
+#define PATH_BYTES 128
+
+enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
+enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
+enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+
+struct msr_counter {
+       unsigned int msr_num;
+       char name[NAME_BYTES];
+       char path[PATH_BYTES];
+       unsigned int width;
+       enum counter_type type;
+       enum counter_format format;
+       struct msr_counter *next;
+       unsigned int flags;
+#define        FLAGS_HIDE      (1 << 0)
+#define        FLAGS_SHOW      (1 << 1)
+#define        SYSFS_PERCPU    (1 << 1)
+};
+
+struct msr_counter bic[] = {
+       { 0x0, "usec", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Package", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Node", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
+       { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Core", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "Die", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define        BIC_USEC        (1ULL << 0)
+#define        BIC_TOD         (1ULL << 1)
+#define        BIC_Package     (1ULL << 2)
+#define        BIC_Node        (1ULL << 3)
+#define        BIC_Avg_MHz     (1ULL << 4)
+#define        BIC_Busy        (1ULL << 5)
+#define        BIC_Bzy_MHz     (1ULL << 6)
+#define        BIC_TSC_MHz     (1ULL << 7)
+#define        BIC_IRQ         (1ULL << 8)
+#define        BIC_SMI         (1ULL << 9)
+#define        BIC_sysfs       (1ULL << 10)
+#define        BIC_CPU_c1      (1ULL << 11)
+#define        BIC_CPU_c3      (1ULL << 12)
+#define        BIC_CPU_c6      (1ULL << 13)
+#define        BIC_CPU_c7      (1ULL << 14)
+#define        BIC_ThreadC     (1ULL << 15)
+#define        BIC_CoreTmp     (1ULL << 16)
+#define        BIC_CoreCnt     (1ULL << 17)
+#define        BIC_PkgTmp      (1ULL << 18)
+#define        BIC_GFX_rc6     (1ULL << 19)
+#define        BIC_GFXMHz      (1ULL << 20)
+#define        BIC_Pkgpc2      (1ULL << 21)
+#define        BIC_Pkgpc3      (1ULL << 22)
+#define        BIC_Pkgpc6      (1ULL << 23)
+#define        BIC_Pkgpc7      (1ULL << 24)
+#define        BIC_Pkgpc8      (1ULL << 25)
+#define        BIC_Pkgpc9      (1ULL << 26)
+#define        BIC_Pkgpc10     (1ULL << 27)
+#define BIC_CPU_LPI    (1ULL << 28)
+#define BIC_SYS_LPI    (1ULL << 29)
+#define        BIC_PkgWatt     (1ULL << 30)
+#define        BIC_CorWatt     (1ULL << 31)
+#define        BIC_GFXWatt     (1ULL << 32)
+#define        BIC_PkgCnt      (1ULL << 33)
+#define        BIC_RAMWatt     (1ULL << 34)
+#define        BIC_PKG__       (1ULL << 35)
+#define        BIC_RAM__       (1ULL << 36)
+#define        BIC_Pkg_J       (1ULL << 37)
+#define        BIC_Cor_J       (1ULL << 38)
+#define        BIC_GFX_J       (1ULL << 39)
+#define        BIC_RAM_J       (1ULL << 40)
+#define        BIC_Mod_c6      (1ULL << 41)
+#define        BIC_Totl_c0     (1ULL << 42)
+#define        BIC_Any_c0      (1ULL << 43)
+#define        BIC_GFX_c0      (1ULL << 44)
+#define        BIC_CPUGFX      (1ULL << 45)
+#define        BIC_Core        (1ULL << 46)
+#define        BIC_CPU         (1ULL << 47)
+#define        BIC_APIC        (1ULL << 48)
+#define        BIC_X2APIC      (1ULL << 49)
+#define        BIC_Die         (1ULL << 50)
+#define        BIC_GFXACTMHz   (1ULL << 51)
+#define        BIC_IPC         (1ULL << 52)
+#define        BIC_CORE_THROT_CNT      (1ULL << 53)
+
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
+#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
+#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+
+#define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+
+unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
+#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+
 char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
@@ -48,6 +213,7 @@ struct timespec interval_ts = { 5, 0 };
 unsigned int model_orig;
 
 unsigned int num_iterations;
+unsigned int header_iterations;
 unsigned int debug;
 unsigned int quiet;
 unsigned int shown;
@@ -159,13 +325,6 @@ int ignore_stdin;
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
-/*
- * buffer size used by sscanf() for added column names
- * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
- */
-#define        NAME_BYTES 20
-#define PATH_BYTES 128
-
 int backwards_count;
 char *progname;
 
@@ -205,6 +364,7 @@ struct core_data {
        unsigned int core_temp_c;
        unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
        unsigned int core_id;
+       unsigned long long core_throt_cnt;
        unsigned long long counter[MAX_ADDED_COUNTERS];
 } *core_even, *core_odd;
 
@@ -255,24 +415,6 @@ struct pkg_data {
 
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
-enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
-enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
-enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
-
-struct msr_counter {
-       unsigned int msr_num;
-       char name[NAME_BYTES];
-       char path[PATH_BYTES];
-       unsigned int width;
-       enum counter_type type;
-       enum counter_format format;
-       struct msr_counter *next;
-       unsigned int flags;
-#define        FLAGS_HIDE      (1 << 0)
-#define        FLAGS_SHOW      (1 << 1)
-#define        SYSFS_PERCPU    (1 << 1)
-};
-
 /*
  * The accumulated sum of MSR is defined as a monotonic
  * increasing MSR, it will be accumulated periodically,
@@ -522,8 +664,10 @@ static int perf_instr_count_open(int cpu_num)
 
        /* counter for cpu_num, including user + kernel and all processes */
        fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
-       if (fd == -1)
-               err(-1, "cpu%d: perf instruction counter\n", cpu_num);
+       if (fd == -1) {
+               warn("cpu%d: perf instruction counter", cpu_num);
+               BIC_NOT_PRESENT(BIC_IPC);
+       }
 
        return fd;
 }
@@ -550,143 +694,10 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
        return 0;
 }
 
-/*
- * This list matches the column headers, except
- * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
- * 2. Core and CPU are moved to the end, we can't have strings that contain them
- *    matching on them for --show and --hide.
- */
-struct msr_counter bic[] = {
-       { 0x0, "usec" },
-       { 0x0, "Time_Of_Day_Seconds" },
-       { 0x0, "Package" },
-       { 0x0, "Node" },
-       { 0x0, "Avg_MHz" },
-       { 0x0, "Busy%" },
-       { 0x0, "Bzy_MHz" },
-       { 0x0, "TSC_MHz" },
-       { 0x0, "IRQ" },
-       { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL },
-       { 0x0, "sysfs" },
-       { 0x0, "CPU%c1" },
-       { 0x0, "CPU%c3" },
-       { 0x0, "CPU%c6" },
-       { 0x0, "CPU%c7" },
-       { 0x0, "ThreadC" },
-       { 0x0, "CoreTmp" },
-       { 0x0, "CoreCnt" },
-       { 0x0, "PkgTmp" },
-       { 0x0, "GFX%rc6" },
-       { 0x0, "GFXMHz" },
-       { 0x0, "Pkg%pc2" },
-       { 0x0, "Pkg%pc3" },
-       { 0x0, "Pkg%pc6" },
-       { 0x0, "Pkg%pc7" },
-       { 0x0, "Pkg%pc8" },
-       { 0x0, "Pkg%pc9" },
-       { 0x0, "Pk%pc10" },
-       { 0x0, "CPU%LPI" },
-       { 0x0, "SYS%LPI" },
-       { 0x0, "PkgWatt" },
-       { 0x0, "CorWatt" },
-       { 0x0, "GFXWatt" },
-       { 0x0, "PkgCnt" },
-       { 0x0, "RAMWatt" },
-       { 0x0, "PKG_%" },
-       { 0x0, "RAM_%" },
-       { 0x0, "Pkg_J" },
-       { 0x0, "Cor_J" },
-       { 0x0, "GFX_J" },
-       { 0x0, "RAM_J" },
-       { 0x0, "Mod%c6" },
-       { 0x0, "Totl%C0" },
-       { 0x0, "Any%C0" },
-       { 0x0, "GFX%C0" },
-       { 0x0, "CPUGFX%" },
-       { 0x0, "Core" },
-       { 0x0, "CPU" },
-       { 0x0, "APIC" },
-       { 0x0, "X2APIC" },
-       { 0x0, "Die" },
-       { 0x0, "GFXAMHz" },
-       { 0x0, "IPC" },
-};
-
-#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
-#define        BIC_USEC        (1ULL << 0)
-#define        BIC_TOD         (1ULL << 1)
-#define        BIC_Package     (1ULL << 2)
-#define        BIC_Node        (1ULL << 3)
-#define        BIC_Avg_MHz     (1ULL << 4)
-#define        BIC_Busy        (1ULL << 5)
-#define        BIC_Bzy_MHz     (1ULL << 6)
-#define        BIC_TSC_MHz     (1ULL << 7)
-#define        BIC_IRQ         (1ULL << 8)
-#define        BIC_SMI         (1ULL << 9)
-#define        BIC_sysfs       (1ULL << 10)
-#define        BIC_CPU_c1      (1ULL << 11)
-#define        BIC_CPU_c3      (1ULL << 12)
-#define        BIC_CPU_c6      (1ULL << 13)
-#define        BIC_CPU_c7      (1ULL << 14)
-#define        BIC_ThreadC     (1ULL << 15)
-#define        BIC_CoreTmp     (1ULL << 16)
-#define        BIC_CoreCnt     (1ULL << 17)
-#define        BIC_PkgTmp      (1ULL << 18)
-#define        BIC_GFX_rc6     (1ULL << 19)
-#define        BIC_GFXMHz      (1ULL << 20)
-#define        BIC_Pkgpc2      (1ULL << 21)
-#define        BIC_Pkgpc3      (1ULL << 22)
-#define        BIC_Pkgpc6      (1ULL << 23)
-#define        BIC_Pkgpc7      (1ULL << 24)
-#define        BIC_Pkgpc8      (1ULL << 25)
-#define        BIC_Pkgpc9      (1ULL << 26)
-#define        BIC_Pkgpc10     (1ULL << 27)
-#define BIC_CPU_LPI    (1ULL << 28)
-#define BIC_SYS_LPI    (1ULL << 29)
-#define        BIC_PkgWatt     (1ULL << 30)
-#define        BIC_CorWatt     (1ULL << 31)
-#define        BIC_GFXWatt     (1ULL << 32)
-#define        BIC_PkgCnt      (1ULL << 33)
-#define        BIC_RAMWatt     (1ULL << 34)
-#define        BIC_PKG__       (1ULL << 35)
-#define        BIC_RAM__       (1ULL << 36)
-#define        BIC_Pkg_J       (1ULL << 37)
-#define        BIC_Cor_J       (1ULL << 38)
-#define        BIC_GFX_J       (1ULL << 39)
-#define        BIC_RAM_J       (1ULL << 40)
-#define        BIC_Mod_c6      (1ULL << 41)
-#define        BIC_Totl_c0     (1ULL << 42)
-#define        BIC_Any_c0      (1ULL << 43)
-#define        BIC_GFX_c0      (1ULL << 44)
-#define        BIC_CPUGFX      (1ULL << 45)
-#define        BIC_Core        (1ULL << 46)
-#define        BIC_CPU         (1ULL << 47)
-#define        BIC_APIC        (1ULL << 48)
-#define        BIC_X2APIC      (1ULL << 49)
-#define        BIC_Die         (1ULL << 50)
-#define        BIC_GFXACTMHz   (1ULL << 51)
-#define        BIC_IPC         (1ULL << 52)
-
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-
-#define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
-
-unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
-
-#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
-#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
-#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
-#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
-#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
-#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
-
 #define MAX_DEFERRED 16
+char *deferred_add_names[MAX_DEFERRED];
 char *deferred_skip_names[MAX_DEFERRED];
+int deferred_add_index;
 int deferred_skip_index;
 
 /*
@@ -720,6 +731,8 @@ void help(void)
                "  -l, --list   list column headers only\n"
                "  -n, --num_iterations num\n"
                "               number of the measurement iterations\n"
+               "  -N, --header_iterations num\n"
+               "               print header every num iterations\n"
                "  -o, --out file\n"
                "               create or truncate \"file\" for all output\n"
                "  -q, --quiet  skip decoding system configuration header\n"
@@ -741,7 +754,7 @@ void help(void)
  */
 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
 {
-       int i;
+       unsigned int i;
        unsigned long long retval = 0;
 
        while (name_list) {
@@ -752,40 +765,51 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
                if (comma)
                        *comma = '\0';
 
-               if (!strcmp(name_list, "all"))
-                       return ~0;
-               if (!strcmp(name_list, "topology"))
-                       return BIC_TOPOLOGY;
-               if (!strcmp(name_list, "power"))
-                       return BIC_THERMAL_PWR;
-               if (!strcmp(name_list, "idle"))
-                       return BIC_IDLE;
-               if (!strcmp(name_list, "frequency"))
-                       return BIC_FREQUENCY;
-               if (!strcmp(name_list, "other"))
-                       return BIC_OTHER;
-               if (!strcmp(name_list, "all"))
-                       return 0;
-
                for (i = 0; i < MAX_BIC; ++i) {
                        if (!strcmp(name_list, bic[i].name)) {
                                retval |= (1ULL << i);
                                break;
                        }
+                       if (!strcmp(name_list, "all")) {
+                               retval |= ~0;
+                               break;
+                       } else if (!strcmp(name_list, "topology")) {
+                               retval |= BIC_TOPOLOGY;
+                               break;
+                       } else if (!strcmp(name_list, "power")) {
+                               retval |= BIC_THERMAL_PWR;
+                               break;
+                       } else if (!strcmp(name_list, "idle")) {
+                               retval |= BIC_IDLE;
+                               break;
+                       } else if (!strcmp(name_list, "frequency")) {
+                               retval |= BIC_FREQUENCY;
+                               break;
+                       } else if (!strcmp(name_list, "other")) {
+                               retval |= BIC_OTHER;
+                               break;
+                       }
+
                }
                if (i == MAX_BIC) {
                        if (mode == SHOW_LIST) {
-                               fprintf(stderr, "Invalid counter name: %s\n", name_list);
-                               exit(-1);
-                       }
-                       deferred_skip_names[deferred_skip_index++] = name_list;
-                       if (debug)
-                               fprintf(stderr, "deferred \"%s\"\n", name_list);
-                       if (deferred_skip_index >= MAX_DEFERRED) {
-                               fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
-                                       MAX_DEFERRED, name_list);
-                               help();
-                               exit(1);
+                               deferred_add_names[deferred_add_index++] = name_list;
+                               if (deferred_add_index >= MAX_DEFERRED) {
+                                       fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
+                                               MAX_DEFERRED, name_list);
+                                       help();
+                                       exit(1);
+                               }
+                       } else {
+                               deferred_skip_names[deferred_skip_index++] = name_list;
+                               if (debug)
+                                       fprintf(stderr, "deferred \"%s\"\n", name_list);
+                               if (deferred_skip_index >= MAX_DEFERRED) {
+                                       fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+                                               MAX_DEFERRED, name_list);
+                                       help();
+                                       exit(1);
+                               }
                        }
                }
 
@@ -872,6 +896,9 @@ void print_header(char *delim)
        if (DO_BIC(BIC_CoreTmp))
                outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
 
+       if (DO_BIC(BIC_CORE_THROT_CNT))
+               outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
+
        if (do_rapl && !rapl_joules) {
                if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
@@ -1011,6 +1038,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                outp += sprintf(outp, "c6: %016llX\n", c->c6);
                outp += sprintf(outp, "c7: %016llX\n", c->c7);
                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+               outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
                outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
 
                for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -1225,6 +1253,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_CoreTmp))
                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
 
+       /* Core throttle count */
+       if (DO_BIC(BIC_CORE_THROT_CNT))
+               outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
+
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW) {
                        if (mp->width == 32)
@@ -1311,6 +1343,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_PkgWatt))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
+
        if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
@@ -1386,14 +1419,14 @@ void flush_output_stderr(void)
 
 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
-       static int printed;
+       static int count;
 
-       if (!printed || !summary_only)
+       if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
                print_header("\t");
 
        format_counters(&average.threads, &average.cores, &average.packages);
 
-       printed = 1;
+       count++;
 
        if (summary_only)
                return;
@@ -1467,6 +1500,7 @@ void delta_core(struct core_data *new, struct core_data *old)
        old->c6 = new->c6 - old->c6;
        old->c7 = new->c7 - old->c7;
        old->core_temp_c = new->core_temp_c;
+       old->core_throt_cnt = new->core_throt_cnt;
        old->mc6_us = new->mc6_us - old->mc6_us;
 
        DELTA_WRAP32(new->core_energy, old->core_energy);
@@ -1626,6 +1660,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        c->mc6_us = 0;
        c->core_temp_c = 0;
        c->core_energy = 0;
+       c->core_throt_cnt = 0;
 
        p->pkg_wtd_core_c0 = 0;
        p->pkg_any_core_c0 = 0;
@@ -1710,6 +1745,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        average.cores.mc6_us += c->mc6_us;
 
        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
+       average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
 
        average.cores.core_energy += c->core_energy;
 
@@ -1987,6 +2023,26 @@ void get_apic_id(struct thread_data *t)
                fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
 }
 
+int get_core_throt_cnt(int cpu, unsigned long long *cnt)
+{
+       char path[128 + PATH_BYTES];
+       unsigned long long tmp;
+       FILE *fp;
+       int ret;
+
+       sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
+       fp = fopen(path, "r");
+       if (!fp)
+               return -1;
+       ret = fscanf(fp, "%lld", &tmp);
+       if (ret != 1)
+               return -1;
+       fclose(fp);
+       *cnt = tmp;
+
+       return 0;
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -2129,6 +2185,9 @@ retry:
                c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
        }
 
+       if (DO_BIC(BIC_CORE_THROT_CNT))
+               get_core_throt_cnt(cpu, &c->core_throt_cnt);
+
        if (do_rapl & RAPL_AMD_F17H) {
                if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
                        return -14;
@@ -2428,6 +2487,9 @@ int has_turbo_ratio_group_limits(int family, int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ATOM_GOLDMONT:
        case INTEL_FAM6_SKYLAKE_X:
@@ -2435,8 +2497,9 @@ int has_turbo_ratio_group_limits(int family, int model)
        case INTEL_FAM6_ATOM_GOLDMONT_D:
        case INTEL_FAM6_ATOM_TREMONT_D:
                return 1;
+       default:
+               return 0;
        }
-       return 0;
 }
 
 static void dump_turbo_ratio_limits(int family, int model)
@@ -3027,6 +3090,8 @@ void set_max_cpu_num(void)
  */
 int count_cpus(int cpu)
 {
+       UNUSED(cpu);
+
        topo.num_cpus++;
        return 0;
 }
@@ -3361,6 +3426,9 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
        int i, ret;
        int cpu = t->cpu_id;
 
+       UNUSED(c);
+       UNUSED(p);
+
        for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
                unsigned long long msr_cur, msr_last;
                off_t offset;
@@ -3387,6 +3455,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
 
 static void msr_record_handler(union sigval v)
 {
+       UNUSED(v);
+
        for_all_cpus(update_msr_sum, EVEN_COUNTERS);
 }
 
@@ -3439,6 +3509,9 @@ release_msr:
 /*
  * set_my_sched_priority(pri)
  * return previous
+ *
+ * if non-root, do this:
+ * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat
  */
 int set_my_sched_priority(int priority)
 {
@@ -3457,7 +3530,7 @@ int set_my_sched_priority(int priority)
        errno = 0;
        retval = getpriority(PRIO_PROCESS, 0);
        if (retval != priority)
-               err(-1, "getpriority(%d) != setpriority(%d)", retval, priority);
+               err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
 
        return original_priority;
 }
@@ -3466,7 +3539,7 @@ void turbostat_loop()
 {
        int retval;
        int restarted = 0;
-       int done_iters = 0;
+       unsigned int done_iters = 0;
 
        setup_signal_handler();
 
@@ -3678,6 +3751,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
                break;
        case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
                no_MSR_MISC_PWR_MGMT = 1;
+               /* FALLTHRU */
        case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
                pkg_cstate_limits = slv_pkg_cstate_limits;
                break;
@@ -3721,6 +3795,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ATOM_SILVERMONT:
        case INTEL_FAM6_ATOM_SILVERMONT_MID:
@@ -3736,6 +3813,9 @@ int is_dnv(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ATOM_GOLDMONT_D:
                return 1;
@@ -3749,6 +3829,9 @@ int is_bdx(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_BROADWELL_X:
                return 1;
@@ -3762,6 +3845,9 @@ int is_skx(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_SKYLAKE_X:
                return 1;
@@ -3775,6 +3861,9 @@ int is_icx(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ICELAKE_X:
                return 1;
@@ -3787,6 +3876,9 @@ int is_ehl(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ATOM_TREMONT:
                return 1;
@@ -3799,6 +3891,9 @@ int is_jvl(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ATOM_TREMONT_D:
                return 1;
@@ -3811,6 +3906,9 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model)
        if (has_slv_msrs(family, model))
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
                /* Nehalem compatible, but do not include turbo-ratio limit support */
        case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
@@ -4125,6 +4223,9 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        char *epb_string;
        int cpu, epb;
 
+       UNUSED(c);
+       UNUSED(p);
+
        if (!has_epb)
                return 0;
 
@@ -4171,6 +4272,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        unsigned long long msr;
        int cpu;
 
+       UNUSED(c);
+       UNUSED(p);
+
        if (!has_hwp)
                return 0;
 
@@ -4254,6 +4358,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
        unsigned long long msr;
        int cpu;
 
+       UNUSED(c);
+       UNUSED(p);
+
        cpu = t->cpu_id;
 
        /* per-package */
@@ -4359,6 +4466,8 @@ double get_tdp_intel(unsigned int model)
 
 double get_tdp_amd(unsigned int family)
 {
+       UNUSED(family);
+
        /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
        return 280.0;
 }
@@ -4376,6 +4485,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
        case INTEL_FAM6_BROADWELL_X:    /* BDX */
        case INTEL_FAM6_SKYLAKE_X:      /* SKX */
        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_ICELAKE_X:      /* ICX */
                return (rapl_dram_energy_units = 15.3 / 1000000);
        default:
                return (rapl_energy_units);
@@ -4559,6 +4669,8 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
        unsigned int has_rapl = 0;
        double tdp;
 
+       UNUSED(model);
+
        if (max_extended_level >= 0x80000007) {
                __cpuid(0x80000007, eax, ebx, ecx, edx);
                /* RAPL (Fam 17h+) */
@@ -4617,6 +4729,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
        case INTEL_FAM6_HASWELL_L:      /* HSW */
        case INTEL_FAM6_HASWELL_G:      /* HSW */
                do_gfx_perf_limit_reasons = 1;
+               /* FALLTHRU */
        case INTEL_FAM6_HASWELL_X:      /* HSX */
                do_core_perf_limit_reasons = 1;
                do_ring_perf_limit_reasons = 1;
@@ -4643,6 +4756,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
        unsigned int dts, dts2;
        int cpu;
 
+       UNUSED(c);
+       UNUSED(p);
+
        if (!(do_dts || do_ptm))
                return 0;
 
@@ -4698,7 +4814,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 {
-       fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+       fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
                cpu, label,
                ((msr >> 15) & 1) ? "EN" : "DIS",
                ((msr >> 0) & 0x7FFF) * rapl_power_units,
@@ -4714,6 +4830,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        const char *msr_name;
        int cpu;
 
+       UNUSED(c);
+       UNUSED(p);
+
        if (!do_rapl)
                return 0;
 
@@ -4762,12 +4881,19 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        cpu, msr, (msr >> 63) & 1 ? "" : "UN");
 
                print_power_limit_msr(cpu, msr, "PKG Limit #1");
-               fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+               fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
                        cpu,
                        ((msr >> 47) & 1) ? "EN" : "DIS",
                        ((msr >> 32) & 0x7FFF) * rapl_power_units,
                        (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
                        ((msr >> 48) & 1) ? "EN" : "DIS");
+
+               if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
+                       return -9;
+
+               fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
+               fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
+                       cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
        }
 
        if (do_rapl & RAPL_DRAM_POWER_INFO) {
@@ -4830,6 +4956,9 @@ int has_snb_msrs(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_SANDYBRIDGE:
        case INTEL_FAM6_SANDYBRIDGE_X:
@@ -4873,6 +5002,9 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_HASWELL_L:      /* HSW */
        case INTEL_FAM6_BROADWELL:      /* BDW */
@@ -4899,6 +5031,9 @@ int has_skl_msrs(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
@@ -4911,6 +5046,10 @@ int is_slm(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
                return 0;
+
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
        case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
@@ -4923,6 +5062,10 @@ int is_knl(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
                return 0;
+
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
                return 1;
@@ -4935,6 +5078,9 @@ int is_cnl(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
 
+       if (family != 6)
+               return 0;
+
        switch (model) {
        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
                return 1;
@@ -4989,6 +5135,9 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        unsigned int eax, ebx, ecx, edx;
 
+       UNUSED(c);
+       UNUSED(p);
+
        if (!genuine_intel)
                return 0;
 
@@ -5025,6 +5174,9 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        unsigned int tcc_default, tcc_offset;
        int cpu;
 
+       UNUSED(c);
+       UNUSED(p);
+
        /* tj_max is used only for dts or ptm */
        if (!(do_dts || do_ptm))
                return 0;
@@ -5572,6 +5724,11 @@ void process_cpuid()
        else
                BIC_NOT_PRESENT(BIC_CPU_LPI);
 
+       if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+               BIC_PRESENT(BIC_CORE_THROT_CNT);
+       else
+               BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
        if (!access(sys_lpi_file_sysfs, R_OK)) {
                sys_lpi_file = sys_lpi_file_sysfs;
                BIC_PRESENT(BIC_SYS_LPI);
@@ -5601,11 +5758,6 @@ int dir_filter(const struct dirent *dirp)
                return 0;
 }
 
-int open_dev_cpu_msr(int dummy1)
-{
-       return 0;
-}
-
 void topology_probe()
 {
        int i;
@@ -5896,6 +6048,9 @@ void turbostat_init()
 
        if (!quiet && do_irtl_snb)
                print_irtl();
+
+       if (DO_BIC(BIC_IPC))
+               (void)get_instr_count_fd(base_cpu);
 }
 
 int fork_it(char **argv)
@@ -5973,7 +6128,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-       fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n");
+       fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n");
 }
 
 int add_counter(unsigned int msr_num, char *path, char *name,
@@ -6138,6 +6293,16 @@ next:
        }
 }
 
+int is_deferred_add(char *name)
+{
+       int i;
+
+       for (i = 0; i < deferred_add_index; ++i)
+               if (!strcmp(name, deferred_add_names[i]))
+                       return 1;
+       return 0;
+}
+
 int is_deferred_skip(char *name)
 {
        int i;
@@ -6156,9 +6321,6 @@ void probe_sysfs(void)
        int state;
        char *sp;
 
-       if (!DO_BIC(BIC_sysfs))
-               return;
-
        for (state = 10; state >= 0; --state) {
 
                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
@@ -6181,6 +6343,9 @@ void probe_sysfs(void)
 
                sprintf(path, "cpuidle/state%d/time", state);
 
+               if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+                       continue;
+
                if (is_deferred_skip(name_buf))
                        continue;
 
@@ -6206,6 +6371,9 @@ void probe_sysfs(void)
 
                sprintf(path, "cpuidle/state%d/usage", state);
 
+               if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+                       continue;
+
                if (is_deferred_skip(name_buf))
                        continue;
 
@@ -6313,6 +6481,7 @@ void cmdline(int argc, char **argv)
                { "interval", required_argument, 0, 'i' },
                { "IPC", no_argument, 0, 'I' },
                { "num_iterations", required_argument, 0, 'n' },
+               { "header_iterations", required_argument, 0, 'N' },
                { "help", no_argument, 0, 'h' },
                { "hide", required_argument, 0, 'H' },  // meh, -h taken by --help
                { "Joules", no_argument, 0, 'J' },
@@ -6394,6 +6563,14 @@ void cmdline(int argc, char **argv)
                                exit(2);
                        }
                        break;
+               case 'N':
+                       header_iterations = strtod(optarg, NULL);
+
+                       if (header_iterations <= 0) {
+                               fprintf(outf, "iterations %d should be positive number\n", header_iterations);
+                               exit(2);
+                       }
+                       break;
                case 's':
                        /*
                         * --show: show only those specified
@@ -6432,6 +6609,8 @@ int main(int argc, char **argv)
 
        turbostat_init();
 
+       msr_sum_record();
+
        /* dump counters and exit */
        if (dump_only)
                return get_and_dump_counters();
@@ -6443,7 +6622,6 @@ int main(int argc, char **argv)
                return 0;
        }
 
-       msr_sum_record();
        /*
         * if any params left, it must be a command to fork
         */
index 1e8d9a8f59df859f566d9dcfc1e5e3b4153b8326..9460cbe81bcc9f26c7e55d675ac3018934adb3be 100644 (file)
@@ -17,16 +17,7 @@ top_srcdir = $(realpath ../../../../)
 # Additional include paths needed by kselftest.h and local headers
 CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
 
-# Guessing where the Kernel headers could have been installed
-# depending on ENV config
-ifeq ($(KBUILD_OUTPUT),)
-khdr_dir = $(top_srcdir)/usr/include
-else
-# the KSFT preferred location when KBUILD_OUTPUT is set
-khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
-endif
-
-CFLAGS += -I$(khdr_dir)
+CFLAGS += $(KHDR_INCLUDES)
 
 export CFLAGS
 export top_srcdir
index b79cf5814c2307e086f13879cb83d8e6e4e4be1b..b9e54417250d811cba6b0183b4ab683856f5290b 100644 (file)
@@ -1 +1,2 @@
 syscall-abi
+tpidr2
index 96eba974ac8d521097a8727698c09dd75908e78f..c8d7f2495eb21c3fbcc348701a3e2ec4f14b19f7 100644 (file)
@@ -1,8 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2021 ARM Limited
 
-TEST_GEN_PROGS := syscall-abi
+TEST_GEN_PROGS := syscall-abi tpidr2
 
 include ../../lib.mk
 
 $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
+
+# Build with nolibc since TPIDR2 is intended to be actively managed by
+# libc and we're trying to test the functionality that it depends on here.
+$(OUTPUT)/tpidr2: tpidr2.c
+       $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+               -static -include ../../../../include/nolibc/nolibc.h \
+               -ffreestanding -Wall $^ -o $@ -lgcc
index 983467cfcee0eac52d148cee4b7c0219a2d2de36..b523c21c2278164025cd1f4237c874ec5e69cba6 100644 (file)
@@ -9,15 +9,42 @@
 // invoked is configured in x8 of the input GPR data.
 //
 // x0: SVE VL, 0 for FP only
+// x1: SME VL
 //
 //     GPRs:   gpr_in, gpr_out
 //     FPRs:   fpr_in, fpr_out
 //     Zn:     z_in, z_out
 //     Pn:     p_in, p_out
 //     FFR:    ffr_in, ffr_out
+//     ZA:     za_in, za_out
+//     SVCR:   svcr_in, svcr_out
+
+#include "syscall-abi.h"
 
 .arch_extension sve
 
+/*
+ * LDR (vector to ZA array):
+ *     LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+       .inst   0xe1000000                      \
+               | (((\nw) & 3) << 13)           \
+               | ((\nxbase) << 5)              \
+               | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ *     STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+       .inst   0xe1200000                      \
+               | (((\nw) & 3) << 13)           \
+               | ((\nxbase) << 5)              \
+               | ((\offset) & 7)
+.endm
+
 .globl do_syscall
 do_syscall:
        // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
@@ -30,6 +57,24 @@ do_syscall:
        stp     x25, x26, [sp, #80]
        stp     x27, x28, [sp, #96]
 
+       // Set SVCR if we're doing SME
+       cbz     x1, 1f
+       adrp    x2, svcr_in
+       ldr     x2, [x2, :lo12:svcr_in]
+       msr     S3_3_C4_C2_2, x2
+1:
+
+       // Load ZA if it's enabled - uses x12 as scratch due to SME LDR
+       tbz     x2, #SVCR_ZA_SHIFT, 1f
+       mov     w12, #0
+       ldr     x2, =za_in
+2:     _ldr_za 12, 2
+       add     x2, x2, x1
+       add     x12, x12, #1
+       cmp     x1, x12
+       bne     2b
+1:
+
        // Load GPRs x8-x28, and save our SP/FP for later comparison
        ldr     x2, =gpr_in
        add     x2, x2, #64
@@ -68,7 +113,7 @@ do_syscall:
        ldp     q30, q31, [x2, #16 * 30]
 1:
 
-       // Load the SVE registers if we're doing SVE
+       // Load the SVE registers if we're doing SVE/SME
        cbz     x0, 1f
 
        ldr     x2, =z_in
@@ -105,9 +150,14 @@ do_syscall:
        ldr     z30, [x2, #30, MUL VL]
        ldr     z31, [x2, #31, MUL VL]
 
+       // Only set a non-zero FFR, test patterns must be zero since the
+       // syscall should clear it - this lets us handle FA64.
        ldr     x2, =ffr_in
        ldr     p0, [x2, #0]
+       ldr     x2, [x2, #0]
+       cbz     x2, 2f
        wrffr   p0.b
+2:
 
        ldr     x2, =p_in
        ldr     p0, [x2, #0, MUL VL]
@@ -169,6 +219,24 @@ do_syscall:
        stp     q28, q29, [x2, #16 * 28]
        stp     q30, q31, [x2, #16 * 30]
 
+       // Save SVCR if we're doing SME
+       cbz     x1, 1f
+       mrs     x2, S3_3_C4_C2_2
+       adrp    x3, svcr_out
+       str     x2, [x3, :lo12:svcr_out]
+1:
+
+       // Save ZA if it's enabled - uses x12 as scratch due to SME STR
+       tbz     x2, #SVCR_ZA_SHIFT, 1f
+       mov     w12, #0
+       ldr     x2, =za_out
+2:     _str_za 12, 2
+       add     x2, x2, x1
+       add     x12, x12, #1
+       cmp     x1, x12
+       bne     2b
+1:
+
        // Save the SVE state if we have some
        cbz     x0, 1f
 
@@ -224,6 +292,10 @@ do_syscall:
        str     p14, [x2, #14, MUL VL]
        str     p15, [x2, #15, MUL VL]
 
+       // Only save FFR if we wrote a value for SME
+       ldr     x2, =ffr_in
+       ldr     x2, [x2, #0]
+       cbz     x2, 1f
        ldr     x2, =ffr_out
        rdffr   p0.b
        str     p0, [x2, #0]
@@ -237,4 +309,9 @@ do_syscall:
        ldp     x27, x28, [sp, #96]
        ldp     x29, x30, [sp], #112
 
+       // Clear SVCR if we were doing SME so future tests don't have ZA
+       cbz     x1, 1f
+       msr     S3_3_C4_C2_2, xzr
+1:
+
        ret
index 1e13b7523918efbb2da9eb509d7aa3cf14083a20..b632bfe9e0227c053bc929df79b9819071ac6454 100644 (file)
 
 #include "../../kselftest.h"
 
+#include "syscall-abi.h"
+
 #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
 
-extern void do_syscall(int sve_vl);
+static int default_sme_vl;
+
+extern void do_syscall(int sve_vl, int sme_vl);
 
 static void fill_random(void *buf, size_t size)
 {
@@ -48,14 +52,15 @@ static struct syscall_cfg {
 uint64_t gpr_in[NUM_GPR];
 uint64_t gpr_out[NUM_GPR];
 
-static void setup_gpr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                     uint64_t svcr)
 {
        fill_random(gpr_in, sizeof(gpr_in));
        gpr_in[8] = cfg->syscall_nr;
        memset(gpr_out, 0, sizeof(gpr_out));
 }
 
-static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
+static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr)
 {
        int errors = 0;
        int i;
@@ -79,13 +84,15 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
 uint64_t fpr_in[NUM_FPR * 2];
 uint64_t fpr_out[NUM_FPR * 2];
 
-static void setup_fpr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                     uint64_t svcr)
 {
        fill_random(fpr_in, sizeof(fpr_in));
        memset(fpr_out, 0, sizeof(fpr_out));
 }
 
-static int check_fpr(struct syscall_cfg *cfg, int sve_vl)
+static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                    uint64_t svcr)
 {
        int errors = 0;
        int i;
@@ -109,13 +116,15 @@ static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
 uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
 uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
 
-static void setup_z(struct syscall_cfg *cfg, int sve_vl)
+static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                   uint64_t svcr)
 {
        fill_random(z_in, sizeof(z_in));
        fill_random(z_out, sizeof(z_out));
 }
 
-static int check_z(struct syscall_cfg *cfg, int sve_vl)
+static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                  uint64_t svcr)
 {
        size_t reg_size = sve_vl;
        int errors = 0;
@@ -126,13 +135,17 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl)
 
        /*
         * After a syscall the low 128 bits of the Z registers should
-        * be preserved and the rest be zeroed or preserved.
+        * be preserved and the rest be zeroed or preserved, except if
+        * we were in streaming mode in which case the low 128 bits may
+        * also be cleared by the transition out of streaming mode.
         */
        for (i = 0; i < SVE_NUM_ZREGS; i++) {
                void *in = &z_in[reg_size * i];
                void *out = &z_out[reg_size * i];
 
-               if (memcmp(in, out, SVE_VQ_BYTES) != 0) {
+               if ((memcmp(in, out, SVE_VQ_BYTES) != 0) &&
+                   !((svcr & SVCR_SM_MASK) &&
+                     memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) {
                        ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
                                       cfg->name, sve_vl, i);
                        errors++;
@@ -145,13 +158,15 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl)
 uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
 uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
 
-static void setup_p(struct syscall_cfg *cfg, int sve_vl)
+static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                   uint64_t svcr)
 {
        fill_random(p_in, sizeof(p_in));
        fill_random(p_out, sizeof(p_out));
 }
 
-static int check_p(struct syscall_cfg *cfg, int sve_vl)
+static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                  uint64_t svcr)
 {
        size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
 
@@ -175,8 +190,19 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl)
 uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)];
 uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)];
 
-static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                     uint64_t svcr)
 {
+       /*
+        * If we are in streaming mode and do not have FA64 then FFR
+        * is unavailable.
+        */
+       if ((svcr & SVCR_SM_MASK) &&
+           !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) {
+               memset(&ffr_in, 0, sizeof(ffr_in));
+               return;
+       }
+
        /*
         * It is only valid to set a contiguous set of bits starting
         * at 0.  For now since we're expecting this to be cleared by
@@ -186,7 +212,8 @@ static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
        fill_random(ffr_out, sizeof(ffr_out));
 }
 
-static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
+static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                    uint64_t svcr)
 {
        size_t reg_size = sve_vq_from_vl(sve_vl) * 2;  /* 1 bit per VL byte */
        int errors = 0;
@@ -195,6 +222,10 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
        if (!sve_vl)
                return 0;
 
+       if ((svcr & SVCR_SM_MASK) &&
+           !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64))
+               return 0;
+
        /* After a syscall the P registers should be preserved or zeroed */
        for (i = 0; i < reg_size; i++)
                if (ffr_out[i] && (ffr_in[i] != ffr_out[i]))
@@ -206,8 +237,65 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
        return errors;
 }
 
-typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl);
-typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl);
+uint64_t svcr_in, svcr_out;
+
+static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                   uint64_t svcr)
+{
+       svcr_in = svcr;
+}
+
+static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                     uint64_t svcr)
+{
+       int errors = 0;
+
+       if (svcr_out & SVCR_SM_MASK) {
+               ksft_print_msg("%s Still in SM, SVCR %llx\n",
+                              cfg->name, svcr_out);
+               errors++;
+       }
+
+       if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) {
+               ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n",
+                              cfg->name, svcr_in, svcr_out);
+               errors++;
+       }
+
+       return errors;
+}
+
+uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+
+static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                    uint64_t svcr)
+{
+       fill_random(za_in, sizeof(za_in));
+       memset(za_out, 0, sizeof(za_out));
+}
+
+static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                   uint64_t svcr)
+{
+       size_t reg_size = sme_vl * sme_vl;
+       int errors = 0;
+
+       if (!(svcr & SVCR_ZA_MASK))
+               return 0;
+
+       if (memcmp(za_in, za_out, reg_size) != 0) {
+               ksft_print_msg("SME VL %d ZA does not match\n", sme_vl);
+               errors++;
+       }
+
+       return errors;
+}
+
+typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                        uint64_t svcr);
+typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                       uint64_t svcr);
 
 /*
  * Each set of registers has a setup function which is called before
@@ -225,20 +313,23 @@ static struct {
        { setup_z, check_z },
        { setup_p, check_p },
        { setup_ffr, check_ffr },
+       { setup_svcr, check_svcr },
+       { setup_za, check_za },
 };
 
-static bool do_test(struct syscall_cfg *cfg, int sve_vl)
+static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+                   uint64_t svcr)
 {
        int errors = 0;
        int i;
 
        for (i = 0; i < ARRAY_SIZE(regset); i++)
-               regset[i].setup(cfg, sve_vl);
+               regset[i].setup(cfg, sve_vl, sme_vl, svcr);
 
-       do_syscall(sve_vl);
+       do_syscall(sve_vl, sme_vl);
 
        for (i = 0; i < ARRAY_SIZE(regset); i++)
-               errors += regset[i].check(cfg, sve_vl);
+               errors += regset[i].check(cfg, sve_vl, sme_vl, svcr);
 
        return errors == 0;
 }
@@ -246,9 +337,10 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl)
 static void test_one_syscall(struct syscall_cfg *cfg)
 {
        int sve_vq, sve_vl;
+       int sme_vq, sme_vl;
 
        /* FPSIMD only case */
-       ksft_test_result(do_test(cfg, 0),
+       ksft_test_result(do_test(cfg, 0, default_sme_vl, 0),
                         "%s FPSIMD\n", cfg->name);
 
        if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
@@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg)
                if (sve_vq != sve_vq_from_vl(sve_vl))
                        sve_vq = sve_vq_from_vl(sve_vl);
 
-               ksft_test_result(do_test(cfg, sve_vl),
+               ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0),
                                 "%s SVE VL %d\n", cfg->name, sve_vl);
+
+               if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+                       continue;
+
+               for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) {
+                       sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16);
+                       if (sme_vl == -1)
+                               ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+                                                  strerror(errno), errno);
+
+                       sme_vl &= PR_SME_VL_LEN_MASK;
+
+                       if (sme_vq != sve_vq_from_vl(sme_vl))
+                               sme_vq = sve_vq_from_vl(sme_vl);
+
+                       ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+                                                SVCR_ZA_MASK | SVCR_SM_MASK),
+                                        "%s SVE VL %d/SME VL %d SM+ZA\n",
+                                        cfg->name, sve_vl, sme_vl);
+                       ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+                                                SVCR_SM_MASK),
+                                        "%s SVE VL %d/SME VL %d SM\n",
+                                        cfg->name, sve_vl, sme_vl);
+                       ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+                                                SVCR_ZA_MASK),
+                                        "%s SVE VL %d/SME VL %d ZA\n",
+                                        cfg->name, sve_vl, sme_vl);
+               }
        }
 }
 
@@ -299,14 +419,54 @@ int sve_count_vls(void)
        return vl_count;
 }
 
+int sme_count_vls(void)
+{
+       unsigned int vq;
+       int vl_count = 0;
+       int vl;
+
+       if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+               return 0;
+
+       /* Ensure we configure a SME VL, used to flag if SVCR is set */
+       default_sme_vl = 16;
+
+       /*
+        * Enumerate up to SVE_VQ_MAX vector lengths
+        */
+       for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+               vl = prctl(PR_SME_SET_VL, vq * 16);
+               if (vl == -1)
+                       ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+                                          strerror(errno), errno);
+
+               vl &= PR_SME_VL_LEN_MASK;
+
+               if (vq != sve_vq_from_vl(vl))
+                       vq = sve_vq_from_vl(vl);
+
+               vl_count++;
+       }
+
+       return vl_count;
+}
+
 int main(void)
 {
        int i;
+       int tests = 1;  /* FPSIMD */
 
        srandom(getpid());
 
        ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1));
+       tests += sve_count_vls();
+       tests += (sve_count_vls() * sme_count_vls()) * 3;
+       ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
+
+       if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+               ksft_print_msg("SME with FA64\n");
+       else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+               ksft_print_msg("SME without FA64\n");
 
        for (i = 0; i < ARRAY_SIZE(syscalls); i++)
                test_one_syscall(&syscalls[i]);
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h
new file mode 100644 (file)
index 0000000..bda5a87
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#ifndef SYSCALL_ABI_H
+#define SYSCALL_ABI_H
+
+#define SVCR_ZA_MASK           2
+#define SVCR_SM_MASK           1
+
+#define SVCR_ZA_SHIFT          1
+#define SVCR_SM_SHIFT          0
+
+#endif
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
new file mode 100644 (file)
index 0000000..351a098
--- /dev/null
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+#define EXPECTED_TESTS 5
+
+static void putstr(const char *str)
+{
+       write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+       char c;
+
+       if (num / 10)
+               putnum(num / 10);
+
+       c = '0' + (num % 10);
+       write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void set_tpidr2(uint64_t val)
+{
+       asm volatile (
+               "msr    " SYS_TPIDR2 ", %0\n"
+               :
+               : "r"(val)
+               : "cc");
+}
+
+static uint64_t get_tpidr2(void)
+{
+       uint64_t val;
+
+       asm volatile (
+               "mrs    %0, " SYS_TPIDR2 "\n"
+               : "=r"(val)
+               :
+               : "cc");
+
+       return val;
+}
+
+static void print_summary(void)
+{
+       if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+               putstr("# UNEXPECTED TEST COUNT: ");
+
+       putstr("# Totals: pass:");
+       putnum(tests_passed);
+       putstr(" fail:");
+       putnum(tests_failed);
+       putstr(" xfail:0 xpass:0 skip:");
+       putnum(tests_skipped);
+       putstr(" error:0\n");
+}
+
+/* Processes should start with TPIDR2 == 0 */
+static int default_value(void)
+{
+       return get_tpidr2() == 0;
+}
+
+/* If we set TPIDR2 we should read that value */
+static int write_read(void)
+{
+       set_tpidr2(getpid());
+
+       return getpid() == get_tpidr2();
+}
+
+/* If we set a value we should read the same value after scheduling out */
+static int write_sleep_read(void)
+{
+       set_tpidr2(getpid());
+
+       msleep(100);
+
+       return getpid() == get_tpidr2();
+}
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value and be able to set its own
+ * value.
+ */
+static int write_fork_read(void)
+{
+       pid_t newpid, waiting, oldpid;
+       int status;
+
+       set_tpidr2(getpid());
+
+       oldpid = getpid();
+       newpid = fork();
+       if (newpid == 0) {
+               /* In child */
+               if (get_tpidr2() != oldpid) {
+                       putstr("# TPIDR2 changed in child: ");
+                       putnum(get_tpidr2());
+                       putstr("\n");
+                       exit(0);
+               }
+
+               set_tpidr2(getpid());
+               if (get_tpidr2() == getpid()) {
+                       exit(1);
+               } else {
+                       putstr("# Failed to set TPIDR2 in child\n");
+                       exit(0);
+               }
+       }
+       if (newpid < 0) {
+               putstr("# fork() failed: -");
+               putnum(-newpid);
+               putstr("\n");
+               return 0;
+       }
+
+       for (;;) {
+               waiting = waitpid(newpid, &status, 0);
+
+               if (waiting < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       putstr("# waitpid() failed: ");
+                       putnum(errno);
+                       putstr("\n");
+                       return 0;
+               }
+               if (waiting != newpid) {
+                       putstr("# waitpid() returned wrong PID\n");
+                       return 0;
+               }
+
+               if (!WIFEXITED(status)) {
+                       putstr("# child did not exit\n");
+                       return 0;
+               }
+
+               if (getpid() != get_tpidr2()) {
+                       putstr("# TPIDR2 corrupted in parent\n");
+                       return 0;
+               }
+
+               return WEXITSTATUS(status);
+       }
+}
+
+/*
+ * sys_clone() has a lot of per architecture variation so just define
+ * it here rather than adding it to nolibc, plus the raw API is a
+ * little more convenient for this test.
+ */
+static int sys_clone(unsigned long clone_flags, unsigned long newsp,
+                    int *parent_tidptr, unsigned long tls,
+                    int *child_tidptr)
+{
+       return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls,
+                          child_tidptr);
+}
+
+/*
+ * If we clone with CLONE_SETTLS then the value in the parent should
+ * be unchanged and the child should start with zero and be able to
+ * set its own value.
+ */
+static int write_clone_read(void)
+{
+       int parent_tid, child_tid;
+       pid_t parent, waiting;
+       int ret, status;
+
+       parent = getpid();
+       set_tpidr2(parent);
+
+       ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid);
+       if (ret == -1) {
+               putstr("# clone() failed\n");
+               putnum(errno);
+               putstr("\n");
+               return 0;
+       }
+
+       if (ret == 0) {
+               /* In child */
+               if (get_tpidr2() != 0) {
+                       putstr("# TPIDR2 non-zero in child: ");
+                       putnum(get_tpidr2());
+                       putstr("\n");
+                       exit(0);
+               }
+
+               if (gettid() == 0)
+                       putstr("# Child TID==0\n");
+               set_tpidr2(gettid());
+               if (get_tpidr2() == gettid()) {
+                       exit(1);
+               } else {
+                       putstr("# Failed to set TPIDR2 in child\n");
+                       exit(0);
+               }
+       }
+
+       for (;;) {
+               waiting = wait4(ret, &status, __WCLONE, NULL);
+
+               if (waiting < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       putstr("# wait4() failed: ");
+                       putnum(errno);
+                       putstr("\n");
+                       return 0;
+               }
+               if (waiting != ret) {
+                       putstr("# wait4() returned wrong PID ");
+                       putnum(waiting);
+                       putstr("\n");
+                       return 0;
+               }
+
+               if (!WIFEXITED(status)) {
+                       putstr("# child did not exit\n");
+                       return 0;
+               }
+
+               if (parent != get_tpidr2()) {
+                       putstr("# TPIDR2 corrupted in parent\n");
+                       return 0;
+               }
+
+               return WEXITSTATUS(status);
+       }
+}
+
+#define run_test(name)                      \
+       if (name()) {                        \
+               tests_passed++;              \
+       } else {                             \
+               tests_failed++;              \
+               putstr("not ");              \
+       }                                    \
+       putstr("ok ");                       \
+       putnum(++tests_run);                 \
+       putstr(" " #name "\n");
+
+int main(int argc, char **argv)
+{
+       int ret, i;
+
+       putstr("TAP version 13\n");
+       putstr("1..");
+       putnum(EXPECTED_TESTS);
+       putstr("\n");
+
+       putstr("# PID: ");
+       putnum(getpid());
+       putstr("\n");
+
+       /*
+        * This test is run with nolibc which doesn't support hwcap and
+        * it's probably disproportionate to implement so instead check
+        * for the default vector length configuration in /proc.
+        */
+       ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+       if (ret >= 0) {
+               run_test(default_value);
+               run_test(write_read);
+               run_test(write_sleep_read);
+               run_test(write_fork_read);
+               run_test(write_clone_read);
+
+       } else {
+               putstr("# SME support not present\n");
+
+               for (i = 0; i < EXPECTED_TESTS; i++) {
+                       putstr("ok ");
+                       putnum(i);
+                       putstr(" skipped, TPIDR2 not supported\n");
+               }
+
+               tests_skipped += EXPECTED_TESTS;
+       }
+
+       print_summary();
+
+       return 0;
+}
index 73e013c082a6526d8572d29b05259352036caf52..ccdac414ad940097279071a57e475f3b651aba46 100644 (file)
@@ -10,7 +10,7 @@ PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
 # cases for statically linked and dynamically lined binaries are
 # slightly different.
 
-CFLAGS_NOBTI = -DBTI=0
+CFLAGS_NOBTI = -mbranch-protection=none -DBTI=0
 CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
 
 CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
@@ -39,7 +39,7 @@ BTI_OBJS =                                      \
        teststubs-bti.o                         \
        trampoline-bti.o
 gen/btitest: $(BTI_OBJS)
-       $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+       $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 NOBTI_OBJS =                                    \
        test-nobti.o                         \
@@ -50,7 +50,7 @@ NOBTI_OBJS =                                    \
        teststubs-nobti.o                       \
        trampoline-nobti.o
 gen/nobtitest: $(NOBTI_OBJS)
-       $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+       $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
 # to account for any OUTPUT target-dirs optionally provided by
index c50d86331ed2fbec7c0e9259d32a938e7e01df7b..ea947af6388243c31504eb5f2468580fd1573bf8 100644 (file)
@@ -1,8 +1,13 @@
 fp-pidbench
 fpsimd-test
+rdvl-sme
 rdvl-sve
 sve-probe-vls
 sve-ptrace
 sve-test
+ssve-test
 vec-syscfg
 vlset
+za-fork
+za-ptrace
+za-test
index 95f0b877a060ba01db39165e659047a7429d8a0b..a7c2286bf65ba96ac96f43792dab3c44fbb2ca57 100644 (file)
@@ -1,24 +1,42 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
-TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
-       rdvl-sve \
-       sve-test sve-stress \
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../../)
+
+CFLAGS += -I$(top_srcdir)/usr/include/
+
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace
+TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
+       rdvl-sme rdvl-sve \
+       sve-test \
+       ssve-test \
+       za-test \
        vlset
+TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
 
-all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
 
-fp-pidbench: fp-pidbench.S asm-utils.o
+# Build with nolibc to avoid effects due to libc's clone() support
+$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o
+       $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o
        $(CC) -nostdlib $^ -o $@
-fpsimd-test: fpsimd-test.o asm-utils.o
+$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-ptrace: sve-ptrace.c
+$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o
        $(CC) -nostdlib $^ -o $@
-rdvl-sve: rdvl-sve.o rdvl.o
-sve-ptrace: sve-ptrace.o
-sve-probe-vls: sve-probe-vls.o rdvl.o
-sve-test: sve-test.o asm-utils.o
+$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o
+       $(CC) -DSSVE -nostdlib $^ -o $@
+$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/vlset: vlset.c
+$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
+       $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+               -include ../../../../include/nolibc/nolibc.h \
+               -static -ffreestanding -Wall $^ -o $@
+$(OUTPUT)/za-ptrace: za-ptrace.c
+$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
        $(CC) -nostdlib $^ -o $@
-vec-syscfg: vec-syscfg.o rdvl.o
-vlset: vlset.o
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c
new file mode 100644 (file)
index 0000000..49b0b2e
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+       int vl = rdvl_sme();
+
+       printf("%d\n", vl);
+
+       return 0;
+}
index c916c1c9defdc877946f3ae405a0e19f5d821a81..20dc29996dc69e962e6f1fde7b4098eeb94ccce3 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 // Copyright (C) 2021 ARM Limited.
 
+#include "sme-inst.h"
+
 .arch_extension sve
 
 .globl rdvl_sve
@@ -8,3 +10,11 @@ rdvl_sve:
        hint    34                      // BTI C
        rdvl    x0, #1
        ret
+
+.globl rdvl_sme
+rdvl_sme:
+       hint    34                      // BTI C
+
+       rdsvl   0, 1
+
+       ret
index 7c9d953fc9e7337e5794196af9574265797cd2fd..5d323679fbc940bf1325197cacf49e8e96f2d0c3 100644 (file)
@@ -3,6 +3,7 @@
 #ifndef RDVL_H
 #define RDVL_H
 
+int rdvl_sme(void);
 int rdvl_sve(void);
 
 #endif
diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h
new file mode 100644 (file)
index 0000000..7191e53
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+
+#ifndef SME_INST_H
+#define SME_INST_H
+
+/*
+ * RDSVL X\nx, #\imm
+ */
+.macro rdsvl nx, imm
+       .inst   0x4bf5800                       \
+               | (\imm << 5)                   \
+               | (\nx)
+.endm
+
+.macro smstop
+       msr     S0_3_C4_C6_3, xzr
+.endm
+
+.macro smstart_za
+       msr     S0_3_C4_C5_3, xzr
+.endm
+
+.macro smstart_sm
+       msr     S0_3_C4_C3_3, xzr
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ *     LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+       .inst   0xe1000000                      \
+               | (((\nw) & 3) << 13)           \
+               | ((\nxbase) << 5)              \
+               | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ *     STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+       .inst   0xe1200000                      \
+               | (((\nw) & 3) << 13)           \
+               | ((\nxbase) << 5)              \
+               | ((\offset) & 7)
+.endm
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress
new file mode 100644 (file)
index 0000000..e2bd2cc
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+       trap - INT TERM CHLD
+       set +e
+
+       if [ -n "$pids" ]; then
+               kill $pids
+               wait $pids
+               pids=
+       fi
+
+       if [ -n "$logs" ]; then
+               cat $logs
+               rm $logs
+               logs=
+       fi
+}
+
+interrupt () {
+       cleanup
+       exit 0
+}
+
+child_died () {
+       cleanup
+       exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+       log=`mktemp`
+       logs=$logs\ $log
+       ./ssve-test >$log &
+       pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+       kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
index 4c418b2021e02ebb8e1c5792c65d607d95f51d6f..8c48479775837c186872a64671a8e2be8ee0e9ee 100644 (file)
 #define NT_ARM_SVE 0x405
 #endif
 
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
+
 struct vec_type {
        const char *name;
        unsigned long hwcap_type;
@@ -42,11 +46,18 @@ static const struct vec_type vec_types[] = {
                .regset = NT_ARM_SVE,
                .prctl_set = PR_SVE_SET_VL,
        },
+       {
+               .name = "Streaming SVE",
+               .hwcap_type = AT_HWCAP2,
+               .hwcap = HWCAP2_SME,
+               .regset = NT_ARM_SSVE,
+               .prctl_set = PR_SME_SET_VL,
+       },
 };
 
-#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
 #define FLAG_TESTS 2
-#define FPSIMD_TESTS 3
+#define FPSIMD_TESTS 2
 
 #define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
 
@@ -78,6 +89,15 @@ static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
        return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
 }
 
+static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+       struct iovec iov;
+
+       iov.iov_base = fpsimd;
+       iov.iov_len = sizeof(*fpsimd);
+       return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+}
+
 static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
                                       void **buf, size_t *size)
 {
@@ -240,28 +260,24 @@ static void check_u32(unsigned int vl, const char *reg,
 /* Access the FPSIMD registers via the SVE regset */
 static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
 {
-       void *svebuf = NULL;
-       size_t svebufsz = 0;
+       void *svebuf;
        struct user_sve_header *sve;
        struct user_fpsimd_state *fpsimd, new_fpsimd;
        unsigned int i, j;
        unsigned char *p;
+       int ret;
 
-       /* New process should start with FPSIMD registers only */
-       sve = get_sve(child, type, &svebuf, &svebufsz);
-       if (!sve) {
-               ksft_test_result_fail("get_sve(%s): %s\n",
-                                     type->name, strerror(errno));
-
+       svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+       if (!svebuf) {
+               ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
                return;
-       } else {
-               ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name);
        }
 
-       ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
-                        "Got FPSIMD registers via %s\n", type->name);
-       if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
-               goto out;
+       memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+       sve = svebuf;
+       sve->flags = SVE_PT_REGS_FPSIMD;
+       sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+       sve->vl = 16;  /* We don't care what the VL is */
 
        /* Try to set a known FPSIMD state via PT_REGS_SVE */
        fpsimd = (struct user_fpsimd_state *)((char *)sve +
@@ -273,12 +289,11 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
                        p[j] = j;
        }
 
-       if (set_sve(child, type, sve)) {
-               ksft_test_result_fail("set_sve(%s FPSIMD): %s\n",
-                                     type->name, strerror(errno));
-
+       ret = set_sve(child, type, sve);
+       ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+                        type->name, ret);
+       if (ret)
                goto out;
-       }
 
        /* Verify via the FPSIMD regset */
        if (get_fpsimd(child, &new_fpsimd)) {
@@ -395,7 +410,7 @@ out:
        free(write_buf);
 }
 
-/* Validate attempting to set SVE data and read SVE data */
+/* Validate attempting to set SVE data and read it via the FPSIMD regset */
 static void ptrace_set_sve_get_fpsimd_data(pid_t child,
                                           const struct vec_type *type,
                                           unsigned int vl)
@@ -478,6 +493,115 @@ out:
        free(write_buf);
 }
 
+/* Validate attempting to set FPSIMD data and read it via the SVE regset */
+static void ptrace_set_fpsimd_get_sve_data(pid_t child,
+                                          const struct vec_type *type,
+                                          unsigned int vl)
+{
+       void *read_buf = NULL;
+       unsigned char *p;
+       struct user_sve_header *read_sve;
+       unsigned int vq = sve_vq_from_vl(vl);
+       struct user_fpsimd_state write_fpsimd;
+       int ret, i, j;
+       size_t read_sve_size = 0;
+       size_t expected_size;
+       int errors = 0;
+
+       if (__BYTE_ORDER == __BIG_ENDIAN) {
+               ksft_test_result_skip("Big endian not supported\n");
+               return;
+       }
+
+       for (i = 0; i < 32; ++i) {
+               p = (unsigned char *)&write_fpsimd.vregs[i];
+
+               for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j)
+                       p[j] = j;
+       }
+
+       ret = set_fpsimd(child, &write_fpsimd);
+       if (ret != 0) {
+               ksft_test_result_fail("Failed to set FPSIMD state: %d\n)",
+                                     ret);
+               return;
+       }
+
+       if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+               ksft_test_result_fail("Failed to read %s VL %u data\n",
+                                     type->name, vl);
+               return;
+       }
+       read_sve = read_buf;
+
+       if (read_sve->vl != vl) {
+               ksft_test_result_fail("Child VL != expected VL %d\n",
+                                     read_sve->vl, vl);
+               goto out;
+       }
+
+       /* The kernel may return either SVE or FPSIMD format */
+       switch (read_sve->flags & SVE_PT_REGS_MASK) {
+       case SVE_PT_REGS_FPSIMD:
+               expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD);
+               if (read_sve_size < expected_size) {
+                       ksft_test_result_fail("Read %d bytes, expected %d\n",
+                                             read_sve_size, expected_size);
+                       goto out;
+               }
+
+               ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET,
+                            sizeof(write_fpsimd));
+               if (ret != 0) {
+                       ksft_print_msg("Read FPSIMD data mismatch\n");
+                       errors++;
+               }
+               break;
+
+       case SVE_PT_REGS_SVE:
+               expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+               if (read_sve_size < expected_size) {
+                       ksft_test_result_fail("Read %d bytes, expected %d\n",
+                                             read_sve_size, expected_size);
+                       goto out;
+               }
+
+               for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+                       __uint128_t tmp = 0;
+
+                       /*
+                        * Z regs are stored endianness invariant, this won't
+                        * work for big endian
+                        */
+                       memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+                              sizeof(tmp));
+
+                       if (tmp != write_fpsimd.vregs[i]) {
+                               ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n",
+                                              type->name, vl, i, i);
+                               errors++;
+                       }
+               }
+
+               check_u32(vl, "FPSR", &write_fpsimd.fpsr,
+                         read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+               check_u32(vl, "FPCR", &write_fpsimd.fpcr,
+                         read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+               break;
+       default:
+               ksft_print_msg("Unexpected regs type %d\n",
+                              read_sve->flags & SVE_PT_REGS_MASK);
+               errors++;
+               break;
+       }
+
+       ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n",
+                        type->name, vl);
+
+out:
+       free(read_buf);
+}
+
 static int do_parent(pid_t child)
 {
        int ret = EXIT_FAILURE;
@@ -548,11 +672,9 @@ static int do_parent(pid_t child)
                if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
                        ptrace_sve_fpsimd(child, &vec_types[i]);
                } else {
-                       ksft_test_result_skip("%s FPSIMD get via SVE\n",
-                                             vec_types[i].name);
                        ksft_test_result_skip("%s FPSIMD set via SVE\n",
                                              vec_types[i].name);
-                       ksft_test_result_skip("%s set read via FPSIMD\n",
+                       ksft_test_result_skip("%s FPSIMD read\n",
                                              vec_types[i].name);
                }
 
@@ -585,11 +707,14 @@ static int do_parent(pid_t child)
                        if (vl_supported) {
                                ptrace_set_sve_get_sve_data(child, &vec_types[i], vl);
                                ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl);
+                               ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl);
                        } else {
                                ksft_test_result_skip("%s set SVE get SVE for VL %d\n",
                                                      vec_types[i].name, vl);
                                ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n",
                                                      vec_types[i].name, vl);
+                               ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n",
+                                                     vec_types[i].name, vl);
                        }
                }
        }
index f5b1b48ffff2b70ffcb1e3a8170c26597bd50f6c..589264231a2d0afd8b1184f63d6c629b882d900c 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/unistd.h>
 #include "assembler.h"
 #include "asm-offsets.h"
+#include "sme-inst.h"
 
 #define NZR    32
 #define NPR    16
@@ -156,6 +157,7 @@ endfunction
 // We fill the upper lanes of FFR with zeros.
 // Beware: corrupts P0.
 function setup_ffr
+#ifndef SSVE
        mov     x4, x30
 
        and     w0, w0, #0x3
@@ -178,6 +180,9 @@ function setup_ffr
        wrffr   p0.b
 
        ret     x4
+#else
+       ret
+#endif
 endfunction
 
 // Trivial memory compare: compare x2 bytes starting at address x0 with
@@ -260,6 +265,7 @@ endfunction
 // Beware -- corrupts P0.
 // Clobbers x0-x5.
 function check_ffr
+#ifndef SSVE
        mov     x3, x30
 
        ldr     x4, =scratch
@@ -280,6 +286,9 @@ function check_ffr
        mov     x2, x5
        mov     x30, x3
        b       memcmp
+#else
+       ret
+#endif
 endfunction
 
 // Any SVE register modified here can cause corruption in the main
@@ -295,10 +304,12 @@ function irritator_handler
        movi    v0.8b, #1
        movi    v9.16b, #2
        movi    v31.8b, #3
+#ifndef SSVE
        // And P0
        rdffr   p0.b
        // And FFR
        wrffr   p15.b
+#endif
 
        ret
 endfunction
@@ -359,6 +370,11 @@ endfunction
 .globl _start
 function _start
 _start:
+#ifdef SSVE
+       puts    "Streaming mode "
+       smstart_sm
+#endif
+
        // Sanity-check and report the vector length
 
        rdvl    x19, #8
@@ -407,6 +423,10 @@ _start:
        orr     w2, w2, #SA_NODEFER
        bl      setsignal
 
+#ifdef SSVE
+       smstart_sm              // syscalls will have exited streaming mode
+#endif
+
        mov     x22, #0         // generation number, increments per iteration
 .Ltest_loop:
        rdvl    x0, #8
index c90658811a83857ffcdd0c32c6a4062cff9b22bc..9bcfcdc34ee9475c1b9720dcb7faebe997cb57b7 100644 (file)
@@ -51,6 +51,16 @@ static struct vec_data vec_data[] = {
                .prctl_set = PR_SVE_SET_VL,
                .default_vl_file = "/proc/sys/abi/sve_default_vector_length",
        },
+       {
+               .name = "SME",
+               .hwcap_type = AT_HWCAP2,
+               .hwcap = HWCAP2_SME,
+               .rdvl = rdvl_sme,
+               .rdvl_binary = "./rdvl-sme",
+               .prctl_get = PR_SME_GET_VL,
+               .prctl_set = PR_SME_SET_VL,
+               .default_vl_file = "/proc/sys/abi/sme_default_vector_length",
+       },
 };
 
 static int stdio_read_integer(FILE *f, const char *what, int *val)
index 308d27a68226987cc7063b2479976bcb5d7a144d..76912a581a95fc30a01e041d7499380b0e2d90aa 100644 (file)
@@ -22,12 +22,15 @@ static int inherit = 0;
 static int no_inherit = 0;
 static int force = 0;
 static unsigned long vl;
+static int set_ctl = PR_SVE_SET_VL;
+static int get_ctl = PR_SVE_GET_VL;
 
 static const struct option options[] = {
        { "force",      no_argument, NULL, 'f' },
        { "inherit",    no_argument, NULL, 'i' },
        { "max",        no_argument, NULL, 'M' },
        { "no-inherit", no_argument, &no_inherit, 1 },
+       { "sme",        no_argument, NULL, 's' },
        { "help",       no_argument, NULL, '?' },
        {}
 };
@@ -50,6 +53,9 @@ static int parse_options(int argc, char **argv)
                case 'M':       vl = SVE_VL_MAX; break;
                case 'f':       force = 1; break;
                case 'i':       inherit = 1; break;
+               case 's':       set_ctl = PR_SME_SET_VL;
+                               get_ctl = PR_SME_GET_VL;
+                               break;
                case 0:         break;
                default:        goto error;
                }
@@ -125,14 +131,14 @@ int main(int argc, char **argv)
        if (inherit)
                flags |= PR_SVE_VL_INHERIT;
 
-       t = prctl(PR_SVE_SET_VL, vl | flags);
+       t = prctl(set_ctl, vl | flags);
        if (t < 0) {
                fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
                        program_name, strerror(errno));
                goto error;
        }
 
-       t = prctl(PR_SVE_GET_VL);
+       t = prctl(get_ctl);
        if (t == -1) {
                fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
                        program_name, strerror(errno));
diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S
new file mode 100644 (file)
index 0000000..2fafadd
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAGIC     42
+
+#define MAXVL     2048
+#define MAXVL_B   (MAXVL / 8)
+
+.pushsection .text
+.data
+.align 4
+scratch:
+       .space  MAXVL_B
+.popsection
+
+.globl fork_test
+fork_test:
+       smstart_za
+
+       // For simplicity just set one word in one vector, other tests
+       // cover general data corruption issues.
+       ldr     x0, =scratch
+       mov     x1, #MAGIC
+       str     x1, [x0]
+       mov     w12, wzr
+       _ldr_za 12, 0                   // ZA.H[W12] loaded from [X0]
+
+       // Tail call into the C portion that does the fork & verify
+       b       fork_test_c
+
+.globl verify_fork
+verify_fork:
+       // SVCR should have ZA=1, SM=0
+       mrs     x0, S3_3_C4_C2_2
+       and     x1, x0, #3
+       cmp     x1, #2
+       beq     1f
+       mov     x0, xzr
+       b       100f
+1:
+
+       // ZA should still have the value we loaded
+       ldr     x0, =scratch
+       mov     w12, wzr
+       _str_za 12, 0                   // ZA.H[W12] stored to [X0]
+       ldr     x1, [x0]
+       cmp     x1, #MAGIC
+       beq     2f
+       mov     x0, xzr
+       b       100f
+
+2:
+       // All tests passed
+       mov     x0, #1
+100:
+       ret
+
diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c
new file mode 100644 (file)
index 0000000..ff475c6
--- /dev/null
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define EXPECTED_TESTS 1
+
+static void putstr(const char *str)
+{
+       write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+       char c;
+
+       if (num / 10)
+               putnum(num / 10);
+
+       c = '0' + (num % 10);
+       write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void print_summary(void)
+{
+       if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+               putstr("# UNEXPECTED TEST COUNT: ");
+
+       putstr("# Totals: pass:");
+       putnum(tests_passed);
+       putstr(" fail:");
+       putnum(tests_failed);
+       putstr(" xfail:0 xpass:0 skip:");
+       putnum(tests_skipped);
+       putstr(" error:0\n");
+}
+
+int fork_test(void);
+int verify_fork(void);
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value.  This is called from the
+ * fork_test() asm function.
+ */
+int fork_test_c(void)
+{
+       pid_t newpid, waiting;
+       int child_status, parent_result;
+
+       newpid = fork();
+       if (newpid == 0) {
+               /* In child */
+               if (!verify_fork()) {
+                       putstr("# ZA state invalid in child\n");
+                       exit(0);
+               } else {
+                       exit(1);
+               }
+       }
+       if (newpid < 0) {
+               putstr("# fork() failed: -");
+               putnum(-newpid);
+               putstr("\n");
+               return 0;
+       }
+
+       parent_result = verify_fork();
+       if (!parent_result)
+               putstr("# ZA state invalid in parent\n");
+
+       for (;;) {
+               waiting = waitpid(newpid, &child_status, 0);
+
+               if (waiting < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       putstr("# waitpid() failed: ");
+                       putnum(errno);
+                       putstr("\n");
+                       return 0;
+               }
+               if (waiting != newpid) {
+                       putstr("# waitpid() returned wrong PID\n");
+                       return 0;
+               }
+
+               if (!WIFEXITED(child_status)) {
+                       putstr("# child did not exit\n");
+                       return 0;
+               }
+
+               return WEXITSTATUS(child_status) && parent_result;
+       }
+}
+
+#define run_test(name)                      \
+       if (name()) {                        \
+               tests_passed++;              \
+       } else {                             \
+               tests_failed++;              \
+               putstr("not ");              \
+       }                                    \
+       putstr("ok ");                       \
+       putnum(++tests_run);                 \
+       putstr(" " #name "\n");
+
+int main(int argc, char **argv)
+{
+       int ret, i;
+
+       putstr("TAP version 13\n");
+       putstr("1..");
+       putnum(EXPECTED_TESTS);
+       putstr("\n");
+
+       putstr("# PID: ");
+       putnum(getpid());
+       putstr("\n");
+
+       /*
+        * This test is run with nolibc which doesn't support hwcap and
+        * it's probably disproportionate to implement so instead check
+        * for the default vector length configuration in /proc.
+        */
+       ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+       if (ret >= 0) {
+               run_test(fork_test);
+
+       } else {
+               putstr("# SME support not present\n");
+
+               for (i = 0; i < EXPECTED_TESTS; i++) {
+                       putstr("ok ");
+                       putnum(i);
+                       putstr(" skipped\n");
+               }
+
+               tests_skipped += EXPECTED_TESTS;
+       }
+
+       print_summary();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
new file mode 100644 (file)
index 0000000..bf61586
--- /dev/null
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+#define EXPECTED_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+
+static void fill_buf(char *buf, size_t size)
+{
+       int i;
+
+       for (i = 0; i < size; i++)
+               buf[i] = random();
+}
+
+static int do_child(void)
+{
+       if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+               ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+       if (raise(SIGSTOP))
+               ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+       return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+       struct user_za_header *za;
+       void *p;
+       size_t sz = sizeof(*za);
+       struct iovec iov;
+
+       while (1) {
+               if (*size < sz) {
+                       p = realloc(*buf, sz);
+                       if (!p) {
+                               errno = ENOMEM;
+                               goto error;
+                       }
+
+                       *buf = p;
+                       *size = sz;
+               }
+
+               iov.iov_base = *buf;
+               iov.iov_len = sz;
+               if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+                       goto error;
+
+               za = *buf;
+               if (za->size <= sz)
+                       break;
+
+               sz = za->size;
+       }
+
+       return za;
+
+error:
+       return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+       struct iovec iov;
+
+       iov.iov_base = (void *)za;
+       iov.iov_len = za->size;
+       return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+{
+       struct user_za_header za;
+       struct user_za_header *new_za = NULL;
+       size_t new_za_size = 0;
+       int ret, prctl_vl;
+
+       *supported = false;
+
+       /* Check if the VL is supported in this process */
+       prctl_vl = prctl(PR_SME_SET_VL, vl);
+       if (prctl_vl == -1)
+               ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n",
+                                  strerror(errno), errno);
+
+       /* If the VL is not supported then a supported VL will be returned */
+       *supported = (prctl_vl == vl);
+
+       /* Set the VL by doing a set with no register payload */
+       memset(&za, 0, sizeof(za));
+       za.size = sizeof(za);
+       za.vl = vl;
+       ret = set_za(child, &za);
+       if (ret != 0) {
+               ksft_test_result_fail("Failed to set VL %u\n", vl);
+               return;
+       }
+
+       /*
+        * Read back the new register state and verify that we have the
+        * same VL that we got from prctl() on ourselves.
+        */
+       if (!get_za(child, (void **)&new_za, &new_za_size)) {
+               ksft_test_result_fail("Failed to read VL %u\n", vl);
+               return;
+       }
+
+       ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl);
+
+       free(new_za);
+}
+
+/* Validate attempting to set no ZA data and read it back */
+static void ptrace_set_no_data(pid_t child, unsigned int vl)
+{
+       void *read_buf = NULL;
+       struct user_za_header write_za;
+       struct user_za_header *read_za;
+       size_t read_za_size = 0;
+       int ret;
+
+       /* Set up some data and write it out */
+       memset(&write_za, 0, sizeof(write_za));
+       write_za.size = ZA_PT_ZA_OFFSET;
+       write_za.vl = vl;
+
+       ret = set_za(child, &write_za);
+       if (ret != 0) {
+               ksft_test_result_fail("Failed to set VL %u no data\n", vl);
+               return;
+       }
+
+       /* Read the data back */
+       if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+               ksft_test_result_fail("Failed to read VL %u no data\n", vl);
+               return;
+       }
+       read_za = read_buf;
+
+       /* We might read more data if there's extensions we don't know */
+       if (read_za->size < write_za.size) {
+               ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+                                     vl, write_za.size, read_za->size);
+               goto out_read;
+       }
+
+       ksft_test_result(read_za->size == write_za.size,
+                        "Disabled ZA for VL %u\n", vl);
+
+out_read:
+       free(read_buf);
+}
+
+/* Validate attempting to set data and read it back */
+static void ptrace_set_get_data(pid_t child, unsigned int vl)
+{
+       void *write_buf;
+       void *read_buf = NULL;
+       struct user_za_header *write_za;
+       struct user_za_header *read_za;
+       size_t read_za_size = 0;
+       unsigned int vq = sve_vq_from_vl(vl);
+       int ret;
+       size_t data_size;
+
+       data_size = ZA_PT_SIZE(vq);
+       write_buf = malloc(data_size);
+       if (!write_buf) {
+               ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
+                                     data_size, vl);
+               return;
+       }
+       write_za = write_buf;
+
+       /* Set up some data and write it out */
+       memset(write_za, 0, data_size);
+       write_za->size = data_size;
+       write_za->vl = vl;
+
+       fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq));
+
+       ret = set_za(child, write_za);
+       if (ret != 0) {
+               ksft_test_result_fail("Failed to set VL %u data\n", vl);
+               goto out;
+       }
+
+       /* Read the data back */
+       if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+               ksft_test_result_fail("Failed to read VL %u data\n", vl);
+               goto out;
+       }
+       read_za = read_buf;
+
+       /* We might read more data if there's extensions we don't know */
+       if (read_za->size < write_za->size) {
+               ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+                                     vl, write_za->size, read_za->size);
+               goto out_read;
+       }
+
+       ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET,
+                               read_buf + ZA_PT_ZA_OFFSET,
+                               ZA_PT_ZA_SIZE(vq)) == 0,
+                        "Data match for VL %u\n", vl);
+
+out_read:
+       free(read_buf);
+out:
+       free(write_buf);
+}
+
+static int do_parent(pid_t child)
+{
+       int ret = EXIT_FAILURE;
+       pid_t pid;
+       int status;
+       siginfo_t si;
+       unsigned int vq, vl;
+       bool vl_supported;
+
+       /* Attach to the child */
+       while (1) {
+               int sig;
+
+               pid = wait(&status);
+               if (pid == -1) {
+                       perror("wait");
+                       goto error;
+               }
+
+               /*
+                * This should never happen but it's hard to flag in
+                * the framework.
+                */
+               if (pid != child)
+                       continue;
+
+               if (WIFEXITED(status) || WIFSIGNALED(status))
+                       ksft_exit_fail_msg("Child died unexpectedly\n");
+
+               if (!WIFSTOPPED(status))
+                       goto error;
+
+               sig = WSTOPSIG(status);
+
+               if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+                       if (errno == ESRCH)
+                               goto disappeared;
+
+                       if (errno == EINVAL) {
+                               sig = 0; /* bust group-stop */
+                               goto cont;
+                       }
+
+                       ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+                                             strerror(errno));
+                       goto error;
+               }
+
+               if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+                   si.si_pid == pid)
+                       break;
+
+       cont:
+               if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+                       if (errno == ESRCH)
+                               goto disappeared;
+
+                       ksft_test_result_fail("PTRACE_CONT: %s\n",
+                                             strerror(errno));
+                       goto error;
+               }
+       }
+
+       ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+       /* Step through every possible VQ */
+       for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+               vl = sve_vl_from_vq(vq);
+
+               /* First, try to set this vector length */
+               ptrace_set_get_vl(child, vl, &vl_supported);
+
+               /* If the VL is supported validate data set/get */
+               if (vl_supported) {
+                       ptrace_set_no_data(child, vl);
+                       ptrace_set_get_data(child, vl);
+               } else {
+                       ksft_test_result_skip("Disabled ZA for VL %u\n", vl);
+                       ksft_test_result_skip("Get and set data for VL %u\n",
+                                             vl);
+               }
+       }
+
+       ret = EXIT_SUCCESS;
+
+error:
+       kill(child, SIGKILL);
+
+disappeared:
+       return ret;
+}
+
+int main(void)
+{
+       int ret = EXIT_SUCCESS;
+       pid_t child;
+
+       srandom(getpid());
+
+       ksft_print_header();
+
+       if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+               ksft_set_plan(1);
+               ksft_exit_skip("SME not available\n");
+       }
+
+       ksft_set_plan(EXPECTED_TESTS);
+
+       child = fork();
+       if (!child)
+               return do_child();
+
+       if (do_parent(child))
+               ret = EXIT_FAILURE;
+
+       ksft_print_cnts();
+
+       return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress
new file mode 100644 (file)
index 0000000..5ac386b
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+       trap - INT TERM CHLD
+       set +e
+
+       if [ -n "$pids" ]; then
+               kill $pids
+               wait $pids
+               pids=
+       fi
+
+       if [ -n "$logs" ]; then
+               cat $logs
+               rm $logs
+               logs=
+       fi
+}
+
+interrupt () {
+       cleanup
+       exit 0
+}
+
+child_died () {
+       cleanup
+       exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+       log=`mktemp`
+       logs=$logs\ $log
+       ./za-test >$log &
+       pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+       kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
new file mode 100644 (file)
index 0000000..9ab6f9c
--- /dev/null
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZA context switch test
+// Repeatedly writes unique test patterns into each ZA tile
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAXVL     2048
+#define MAXVL_B   (MAXVL / 8)
+
+// Declare some storage space to shadow ZA register contents and a
+// scratch buffer for a vector.
+.pushsection .text
+.data
+.align 4
+zaref:
+       .space  MAXVL_B * MAXVL_B
+scratch:
+       .space  MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+       cmp     x2, #0
+       b.eq    1f
+0:     ldrb    w3, [x1], #1
+       strb    w3, [x0], #1
+       subs    x2, x2, #1
+       b.ne    0b
+1:     ret
+endfunction
+
+// Generate a test pattern for storage in ZA
+// x0: pid
+// x1: row in ZA
+// x2: generation
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28  generation number (increments once per test_loop)
+// bits 27:16  pid
+// bits 15: 8  row number
+// bits  7: 0  32-bit lane index
+
+function pattern
+       mov     w3, wzr
+       bfi     w3, w0, #16, #12        // PID
+       bfi     w3, w1, #8, #8          // Row
+       bfi     w3, w2, #28, #4         // Generation
+
+       ldr     x0, =scratch
+       mov     w1, #MAXVL_B / 4
+
+0:     str     w3, [x0], #4
+       add     w3, w3, #1              // Lane
+       subs    w1, w1, #1
+       b.ne    0b
+
+       ret
+endfunction
+
+// Get the address of shadow data for ZA horizontal vector xn
+.macro _adrza xd, xn, nrtmp
+       ldr     \xd, =zaref
+       rdsvl   \nrtmp, 1
+       madd    \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a ZA horizontal vector
+// x0: pid
+// x1: row number
+// x2: generation
+function setup_za
+       mov     x4, x30
+       mov     x12, x1                 // Use x12 for vector select
+
+       bl      pattern                 // Get pattern in scratch buffer
+       _adrza  x0, x12, 2              // Shadow buffer pointer to x0 and x5
+       mov     x5, x0
+       ldr     x1, =scratch
+       bl      memcpy                  // length set up in x2 by _adrza
+
+       _ldr_za 12, 5                   // load vector w12 from pointer x5
+
+       ret     x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+       cbz     x2, 2f
+
+       stp     x0, x1, [sp, #-0x20]!
+       str     x2, [sp, #0x10]
+
+       mov     x5, #0
+0:     ldrb    w3, [x0, x5]
+       ldrb    w4, [x1, x5]
+       add     x5, x5, #1
+       cmp     w3, w4
+       b.ne    1f
+       subs    x2, x2, #1
+       b.ne    0b
+
+1:     ldr     x2, [sp, #0x10]
+       ldp     x0, x1, [sp], #0x20
+       b.ne    barf
+
+2:     ret
+endfunction
+
+// Verify that a ZA vector matches its shadow in memory, else abort
+// x0: row number
+// Clobbers x0-x7 and x12.
+function check_za
+       mov     x3, x30
+
+       mov     x12, x0
+       _adrza  x5, x0, 6               // pointer to expected value in x5
+       mov     x4, x0
+       ldr     x7, =scratch            // x7 is scratch
+
+       mov     x0, x7                  // Poison scratch
+       mov     x1, x6
+       bl      memfill_ae
+
+       _str_za 12, 7                   // save vector w12 to pointer x7
+
+       mov     x0, x5
+       mov     x1, x7
+       mov     x2, x6
+       mov     x30, x3
+       b       memcmp
+endfunction
+
+// Any SME register modified here can cause corruption in the main
+// thread -- but *only* the locations modified here.
+function irritator_handler
+       // Increment the irritation signal count (x23):
+       ldr     x0, [x2, #ucontext_regs + 8 * 23]
+       add     x0, x0, #1
+       str     x0, [x2, #ucontext_regs + 8 * 23]
+
+       // Corrupt some random ZA data
+#if 0
+       adr     x0, .text + (irritator_handler - .text) / 16 * 16
+       movi    v0.8b, #1
+       movi    v9.16b, #2
+       movi    v31.8b, #3
+#endif
+
+       ret
+endfunction
+
+function terminate_handler
+       mov     w21, w0
+       mov     x20, x2
+
+       puts    "Terminated by signal "
+       mov     w0, w21
+       bl      putdec
+       puts    ", no error, iterations="
+       ldr     x0, [x20, #ucontext_regs + 8 * 22]
+       bl      putdec
+       puts    ", signals="
+       ldr     x0, [x20, #ucontext_regs + 8 * 23]
+       bl      putdecn
+
+       mov     x0, #0
+       mov     x8, #__NR_exit
+       svc     #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+       str     x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+       mov     w4, w0
+       mov     x5, x1
+       mov     w6, w2
+
+       add     x0, sp, #16
+       mov     x1, #sa_sz
+       bl      memclr
+
+       mov     w0, w4
+       add     x1, sp, #16
+       str     w6, [x1, #sa_flags]
+       str     x5, [x1, #sa_handler]
+       mov     x2, #0
+       mov     x3, #sa_mask_sz
+       mov     x8, #__NR_rt_sigaction
+       svc     #0
+
+       cbz     w0, 1f
+
+       puts    "sigaction failure\n"
+       b       .Labort
+
+1:     ldr     x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+       ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+       puts    "Streaming mode "
+       smstart_za
+
+       // Sanity-check and report the vector length
+
+       rdsvl   19, 8
+       cmp     x19, #128
+       b.lo    1f
+       cmp     x19, #2048
+       b.hi    1f
+       tst     x19, #(8 - 1)
+       b.eq    2f
+
+1:     puts    "bad vector length: "
+       mov     x0, x19
+       bl      putdecn
+       b       .Labort
+
+2:     puts    "vector length:\t"
+       mov     x0, x19
+       bl      putdec
+       puts    " bits\n"
+
+       // Obtain our PID, to ensure test pattern uniqueness between processes
+       mov     x8, #__NR_getpid
+       svc     #0
+       mov     x20, x0
+
+       puts    "PID:\t"
+       mov     x0, x20
+       bl      putdecn
+
+       mov     x23, #0         // Irritation signal count
+
+       mov     w0, #SIGINT
+       adr     x1, terminate_handler
+       mov     w2, #SA_SIGINFO
+       bl      setsignal
+
+       mov     w0, #SIGTERM
+       adr     x1, terminate_handler
+       mov     w2, #SA_SIGINFO
+       bl      setsignal
+
+       mov     w0, #SIGUSR1
+       adr     x1, irritator_handler
+       mov     w2, #SA_SIGINFO
+       orr     w2, w2, #SA_NODEFER
+       bl      setsignal
+
+       mov     x22, #0         // generation number, increments per iteration
+.Ltest_loop:
+       rdsvl   0, 8
+       cmp     x0, x19
+       b.ne    vl_barf
+
+       rdsvl   21, 1           // Set up ZA & shadow with test pattern
+0:     mov     x0, x20
+       sub     x1, x21, #1
+       mov     x2, x22
+       bl      setup_za
+       subs    x21, x21, #1
+       b.ne    0b
+
+       and     x8, x22, #127           // Every 128 interations...
+       cbz     x8, 0f
+       mov     x8, #__NR_getpid        // (otherwise minimal syscall)
+       b       1f
+0:
+       mov     x8, #__NR_sched_yield   // ...encourage preemption
+1:
+       svc     #0
+
+       mrs     x0, S3_3_C4_C2_2        // SVCR should have ZA=1,SM=0
+       and     x1, x0, #3
+       cmp     x1, #2
+       b.ne    svcr_barf
+
+       rdsvl   21, 1                   // Verify that the data made it through
+       rdsvl   24, 1                   // Verify that the data made it through
+0:     sub     x0, x24, x21
+       bl      check_za
+       subs    x21, x21, #1
+       bne     0b
+
+       add     x22, x22, #1    // Everything still working
+       b       .Ltest_loop
+
+.Labort:
+       mov     x0, #0
+       mov     x1, #SIGABRT
+       mov     x8, #__NR_kill
+       svc     #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+//     ldr     w0, =0xdeadc0de
+//     mov     w8, #__NR_exit
+//     svc     #0
+// end hack
+       smstop
+       mov     x10, x0 // expected data
+       mov     x11, x1 // actual data
+       mov     x12, x2 // data size
+
+       puts    "Mismatch: PID="
+       mov     x0, x20
+       bl      putdec
+       puts    ", iteration="
+       mov     x0, x22
+       bl      putdec
+       puts    ", row="
+       mov     x0, x21
+       bl      putdecn
+       puts    "\tExpected ["
+       mov     x0, x10
+       mov     x1, x12
+       bl      dumphex
+       puts    "]\n\tGot      ["
+       mov     x0, x11
+       mov     x1, x12
+       bl      dumphex
+       puts    "]\n"
+
+       mov     x8, #__NR_getpid
+       svc     #0
+// fpsimd.c acitivty log dump hack
+//     ldr     w0, =0xdeadc0de
+//     mov     w8, #__NR_exit
+//     svc     #0
+// ^ end of hack
+       mov     x1, #SIGABRT
+       mov     x8, #__NR_kill
+       svc     #0
+//     mov     x8, #__NR_exit
+//     mov     x1, #1
+//     svc     #0
+endfunction
+
+function vl_barf
+       mov     x10, x0
+
+       puts    "Bad active VL: "
+       mov     x0, x10
+       bl      putdecn
+
+       mov     x8, #__NR_exit
+       mov     x1, #1
+       svc     #0
+endfunction
+
+function svcr_barf
+       mov     x10, x0
+
+       puts    "Bad SVCR: "
+       mov     x0, x10
+       bl      putdecn
+
+       mov     x8, #__NR_exit
+       mov     x1, #1
+       svc     #0
+endfunction
index d1fe4ddf1669045adf561ebb39d87f8de725d164..052d0f9f92b307a0f46bbdb8108b33937e7490ae 100644 (file)
@@ -3,5 +3,6 @@ check_gcr_el1_cswitch
 check_tags_inclusion
 check_child_memory
 check_mmap_options
+check_prctl
 check_ksm_options
 check_user_mem
index 43bd94f853ba70a59e974ebcecbb0ab11d6b6ef8..7597fc632cadf287b72e5cbe744501a619ad61f4 100644 (file)
@@ -85,9 +85,9 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
 {
        char *ptr;
        int run, result;
-       int item = sizeof(sizes)/sizeof(int);
+       int item = ARRAY_SIZE(sizes);
 
-       item = sizeof(sizes)/sizeof(int);
+       item = ARRAY_SIZE(sizes);
        mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
        for (run = 0; run < item; run++) {
                ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -107,7 +107,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
 {
        char *ptr, *map_ptr;
        int run, fd, map_size, result = KSFT_PASS;
-       int total = sizeof(sizes)/sizeof(int);
+       int total = ARRAY_SIZE(sizes);
 
        mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
        for (run = 0; run < total; run++) {
@@ -144,7 +144,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
 int main(int argc, char *argv[])
 {
        int err;
-       int item = sizeof(sizes)/sizeof(int);
+       int item = ARRAY_SIZE(sizes);
 
        page_size = getpagesize();
        if (!page_size) {
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
new file mode 100644 (file)
index 0000000..f139a33
--- /dev/null
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 ARM Limited
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+static int set_tagged_addr_ctrl(int val)
+{
+       int ret;
+
+       ret = prctl(PR_SET_TAGGED_ADDR_CTRL, val, 0, 0, 0);
+       if (ret < 0)
+               ksft_print_msg("PR_SET_TAGGED_ADDR_CTRL: failed %d %d (%s)\n",
+                              ret, errno, strerror(errno));
+       return ret;
+}
+
+static int get_tagged_addr_ctrl(void)
+{
+       int ret;
+
+       ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+       if (ret < 0)
+               ksft_print_msg("PR_GET_TAGGED_ADDR_CTRL failed: %d %d (%s)\n",
+                              ret, errno, strerror(errno));
+       return ret;
+}
+
+/*
+ * Read the current mode without having done any configuration, should
+ * run first.
+ */
+void check_basic_read(void)
+{
+       int ret;
+
+       ret = get_tagged_addr_ctrl();
+       if (ret < 0) {
+               ksft_test_result_fail("check_basic_read\n");
+               return;
+       }
+
+       if (ret & PR_MTE_TCF_SYNC)
+               ksft_print_msg("SYNC enabled\n");
+       if (ret & PR_MTE_TCF_ASYNC)
+               ksft_print_msg("ASYNC enabled\n");
+
+       /* Any configuration is valid */
+       ksft_test_result_pass("check_basic_read\n");
+}
+
+/*
+ * Attempt to set a specified combination of modes.
+ */
+void set_mode_test(const char *name, int hwcap2, int mask)
+{
+       int ret;
+
+       if ((getauxval(AT_HWCAP2) & hwcap2) != hwcap2) {
+               ksft_test_result_skip("%s\n", name);
+               return;
+       }
+
+       ret = set_tagged_addr_ctrl(mask);
+       if (ret < 0) {
+               ksft_test_result_fail("%s\n", name);
+               return;
+       }
+
+       ret = get_tagged_addr_ctrl();
+       if (ret < 0) {
+               ksft_test_result_fail("%s\n", name);
+               return;
+       }
+
+       if ((ret & PR_MTE_TCF_MASK) == mask) {
+               ksft_test_result_pass("%s\n", name);
+       } else {
+               ksft_print_msg("Got %x, expected %x\n",
+                              (ret & PR_MTE_TCF_MASK), mask);
+               ksft_test_result_fail("%s\n", name);
+       }
+}
+
+struct mte_mode {
+       int mask;
+       int hwcap2;
+       const char *name;
+} mte_modes[] = {
+       { PR_MTE_TCF_NONE,  0,          "NONE"  },
+       { PR_MTE_TCF_SYNC,  HWCAP2_MTE, "SYNC"  },
+       { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
+       { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC,  HWCAP2_MTE, "SYNC+ASYNC"  },
+};
+
+int main(void)
+{
+       int i;
+
+       ksft_print_header();
+       ksft_set_plan(5);
+
+       check_basic_read();
+       for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
+               set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+                             mte_modes[i].mask);
+
+       ksft_print_cnts();
+
+       return 0;
+}
index deaef1f610768104abe2b076f0e5166cd5ac211f..2b1425b92b69918fbbc19b99b4897309dcbf189b 100644 (file)
@@ -23,10 +23,13 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
 {
        mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
        /* Check the validity of the tagged pointer */
-       memset((void *)ptr, '1', BUFFER_SIZE);
+       memset(ptr, '1', BUFFER_SIZE);
        mte_wait_after_trig();
-       if (cur_mte_cxt.fault_valid)
+       if (cur_mte_cxt.fault_valid) {
+               ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n",
+                              ptr, ptr + BUFFER_SIZE, mode);
                return KSFT_FAIL;
+       }
        /* Proceed further for nonzero tags */
        if (!MT_FETCH_TAG((uintptr_t)ptr))
                return KSFT_PASS;
@@ -34,27 +37,32 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
        /* Check the validity outside the range */
        ptr[BUFFER_SIZE] = '2';
        mte_wait_after_trig();
-       if (!cur_mte_cxt.fault_valid)
+       if (!cur_mte_cxt.fault_valid) {
+               ksft_print_msg("No valid fault recorded for %p in mode %x\n",
+                              ptr, mode);
                return KSFT_FAIL;
-       else
+       } else {
                return KSFT_PASS;
+       }
 }
 
 static int check_single_included_tags(int mem_type, int mode)
 {
        char *ptr;
-       int tag, run, result = KSFT_PASS;
+       int tag, run, ret, result = KSFT_PASS;
 
-       ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+       ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
        if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
                                   mem_type, false) != KSFT_PASS)
                return KSFT_FAIL;
 
        for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
-               mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+               ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+               if (ret != 0)
+                       result = KSFT_FAIL;
                /* Try to catch a excluded tag by a number of tries. */
                for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
-                       ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+                       ptr = mte_insert_tags(ptr, BUFFER_SIZE);
                        /* Check tag value */
                        if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
                                ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
@@ -66,7 +74,7 @@ static int check_single_included_tags(int mem_type, int mode)
                        result = verify_mte_pointer_validity(ptr, mode);
                }
        }
-       mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+       mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
        return result;
 }
 
@@ -76,7 +84,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
        int tag, run, result = KSFT_PASS;
        unsigned long excl_mask = 0;
 
-       ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+       ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
        if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
                                   mem_type, false) != KSFT_PASS)
                return KSFT_FAIL;
@@ -86,7 +94,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
                mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
                /* Try to catch a excluded tag by a number of tries. */
                for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
-                       ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+                       ptr = mte_insert_tags(ptr, BUFFER_SIZE);
                        /* Check tag value */
                        if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
                                ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
@@ -98,21 +106,23 @@ static int check_multiple_included_tags(int mem_type, int mode)
                        result = verify_mte_pointer_validity(ptr, mode);
                }
        }
-       mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+       mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
        return result;
 }
 
 static int check_all_included_tags(int mem_type, int mode)
 {
        char *ptr;
-       int run, result = KSFT_PASS;
+       int run, ret, result = KSFT_PASS;
 
-       ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+       ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
        if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
                                   mem_type, false) != KSFT_PASS)
                return KSFT_FAIL;
 
-       mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+       ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+       if (ret != 0)
+               return KSFT_FAIL;
        /* Try to catch a excluded tag by a number of tries. */
        for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
                ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -122,20 +132,22 @@ static int check_all_included_tags(int mem_type, int mode)
                 */
                result = verify_mte_pointer_validity(ptr, mode);
        }
-       mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+       mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
        return result;
 }
 
 static int check_none_included_tags(int mem_type, int mode)
 {
        char *ptr;
-       int run;
+       int run, ret;
 
-       ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+       ptr = mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
        if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
                return KSFT_FAIL;
 
-       mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+       ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+       if (ret != 0)
+               return KSFT_FAIL;
        /* Try to catch a excluded tag by a number of tries. */
        for (run = 0; run < RUNS; run++) {
                ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -147,12 +159,12 @@ static int check_none_included_tags(int mem_type, int mode)
                }
                mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
                /* Check the write validity of the untagged pointer */
-               memset((void *)ptr, '1', BUFFER_SIZE);
+               memset(ptr, '1', BUFFER_SIZE);
                mte_wait_after_trig();
                if (cur_mte_cxt.fault_valid)
                        break;
        }
-       mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+       mte_free_memory(ptr, BUFFER_SIZE, mem_type, false);
        if (cur_mte_cxt.fault_valid)
                return KSFT_FAIL;
        else
index 0328a1e08f65925a580e9a35b02aba76500399db..00ffd34c66d301ee7d5c99e6b8d9d5d944520b7f 100644 (file)
@@ -37,6 +37,10 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
                if (si->si_code == SEGV_MTEAERR) {
                        if (cur_mte_cxt.trig_si_code == si->si_code)
                                cur_mte_cxt.fault_valid = true;
+                       else
+                               ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n",
+                                              ((ucontext_t *)uc)->uc_mcontext.pc,
+                                              addr);
                        return;
                }
                /* Compare the context for precise error */
@@ -124,13 +128,16 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
        int prot_flag, map_flag;
        size_t entire_size = size + range_before + range_after;
 
-       if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
-           mem_type != USE_MPROTECT) {
+       switch (mem_type) {
+       case USE_MALLOC:
+               return malloc(entire_size) + range_before;
+       case USE_MMAP:
+       case USE_MPROTECT:
+               break;
+       default:
                ksft_print_msg("FAIL: Invalid allocate request\n");
                return NULL;
        }
-       if (mem_type == USE_MALLOC)
-               return malloc(entire_size) + range_before;
 
        prot_flag = PROT_READ | PROT_WRITE;
        if (mem_type == USE_MMAP)
@@ -269,18 +276,33 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
 {
        unsigned long en = 0;
 
-       if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
-             mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
-               ksft_print_msg("FAIL: Invalid mte config option\n");
+       switch (mte_option) {
+       case MTE_NONE_ERR:
+       case MTE_SYNC_ERR:
+       case MTE_ASYNC_ERR:
+               break;
+       default:
+               ksft_print_msg("FAIL: Invalid MTE option %x\n", mte_option);
+               return -EINVAL;
+       }
+
+       if (incl_mask & ~MT_INCLUDE_TAG_MASK) {
+               ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
                return -EINVAL;
        }
+
        en = PR_TAGGED_ADDR_ENABLE;
-       if (mte_option == MTE_SYNC_ERR)
+       switch (mte_option) {
+       case MTE_SYNC_ERR:
                en |= PR_MTE_TCF_SYNC;
-       else if (mte_option == MTE_ASYNC_ERR)
+               break;
+       case MTE_ASYNC_ERR:
                en |= PR_MTE_TCF_ASYNC;
-       else if (mte_option == MTE_NONE_ERR)
+               break;
+       case MTE_NONE_ERR:
                en |= PR_MTE_TCF_NONE;
+               break;
+       }
 
        en |= (incl_mask << PR_MTE_TAG_SHIFT);
        /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
index 195a7d1879e63e28bed370913fd6f37422f27282..2d3e71724e55c3a98680f42fa59194584bbbb42c 100644 (file)
@@ -75,10 +75,21 @@ unsigned int mte_get_pstate_tco(void);
 /* Test framework static inline functions/macros */
 static inline void evaluate_test(int err, const char *msg)
 {
-       if (err == KSFT_PASS)
+       switch (err) {
+       case KSFT_PASS:
                ksft_test_result_pass(msg);
-       else if (err == KSFT_FAIL)
+               break;
+       case KSFT_FAIL:
                ksft_test_result_fail(msg);
+               break;
+       case KSFT_SKIP:
+               ksft_test_result_skip(msg);
+               break;
+       default:
+               ksft_test_result_error("Unknown return code %d from %s",
+                                      err, msg);
+               break;
+       }
 }
 
 static inline int check_allocated_memory(void *ptr, size_t size,
index c1742755abb9ab583c9cc66be5069dee4aa10bfc..e8d2b57f73ec146e430ef3f1f480f7c24b11ad67 100644 (file)
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 mangle_*
 fake_sigreturn_*
+sme_*
+ssve_*
 sve_*
+za_*
 !*.[ch]
index f909b70d9e980be483ddb7a0a7bcc7d36d28c6ce..c70fdec7d7c4e827fb024d50201dc3bfc8fb6670 100644 (file)
 enum {
        FSSBS_BIT,
        FSVE_BIT,
+       FSME_BIT,
+       FSME_FA64_BIT,
        FMAX_END
 };
 
 #define FEAT_SSBS              (1UL << FSSBS_BIT)
 #define FEAT_SVE               (1UL << FSVE_BIT)
+#define FEAT_SME               (1UL << FSME_BIT)
+#define FEAT_SME_FA64          (1UL << FSME_FA64_BIT)
 
 /*
  * A descriptor used to describe and configure a test case.
index 5743897984b0e70333b994525f8e2e01903121ea..b588d10afd5b7da5db566c70f8dcb16e66485629 100644 (file)
@@ -27,6 +27,8 @@ static int sig_copyctx = SIGTRAP;
 static char const *const feats_names[FMAX_END] = {
        " SSBS ",
        " SVE ",
+       " SME ",
+       " FA64 ",
 };
 
 #define MAX_FEATS_SZ   128
@@ -268,6 +270,10 @@ int test_init(struct tdescr *td)
                        td->feats_supported |= FEAT_SSBS;
                if (getauxval(AT_HWCAP) & HWCAP_SVE)
                        td->feats_supported |= FEAT_SVE;
+               if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+                       td->feats_supported |= FEAT_SME;
+               if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+                       td->feats_supported |= FEAT_SME_FA64;
                if (feats_ok(td)) {
                        if (td->feats_required & td->feats_supported)
                                fprintf(stderr,
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
new file mode 100644 (file)
index 0000000..7ed762b
--- /dev/null
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the streaming SVE vector length in a signal
+ * handler, this is not supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+       int vq, vl;
+
+       /*
+        * Enumerate up to SVE_VQ_MAX vector lengths
+        */
+       for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+               vl = prctl(PR_SVE_SET_VL, vq * 16);
+               if (vl == -1)
+                       return false;
+
+               vl &= PR_SME_VL_LEN_MASK;
+
+               /* Skip missing VLs */
+               vq = sve_vq_from_vl(vl);
+
+               vls[nvls++] = vl;
+       }
+
+       /* We need at least two VLs */
+       if (nvls < 2) {
+               fprintf(stderr, "Only %d VL supported\n", nvls);
+               return false;
+       }
+
+       return true;
+}
+
+static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
+                                        siginfo_t *si, ucontext_t *uc)
+{
+       size_t resv_sz, offset;
+       struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+       struct sve_context *sve;
+
+       /* Get a signal context with a SME ZA frame in it */
+       if (!get_current_context(td, &sf.uc))
+               return 1;
+
+       resv_sz = GET_SF_RESV_SIZE(sf);
+       head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+       if (!head) {
+               fprintf(stderr, "No SVE context\n");
+               return 1;
+       }
+
+       if (head->size != sizeof(struct sve_context)) {
+               fprintf(stderr, "Register data present, aborting\n");
+               return 1;
+       }
+
+       sve = (struct sve_context *)head;
+
+       /* No changes are supported; init left us at minimum VL so go to max */
+       fprintf(stderr, "Attempting to change VL from %d to %d\n",
+               sve->vl, vls[0]);
+       sve->vl = vls[0];
+
+       fake_sigreturn(&sf, sizeof(sf), 0);
+
+       return 1;
+}
+
+struct tdescr tde = {
+       .name = "FAKE_SIGRETURN_SSVE_CHANGE",
+       .descr = "Attempt to change Streaming SVE VL",
+       .feats_required = FEAT_SME,
+       .sig_ok = SIGSEGV,
+       .timeout = 3,
+       .init = sme_get_vls,
+       .run = fake_sigreturn_ssve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
new file mode 100644 (file)
index 0000000..f9d76ae
--- /dev/null
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using a streaming mode instruction without enabling it
+ * generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_no_sm_trigger(struct tdescr *td)
+{
+       /* SMSTART ZA ; ADDHA ZA0.S, P0/M, P0/M, Z0.S */
+       asm volatile(".inst 0xd503457f ; .inst 0xc0900000");
+
+       return 0;
+}
+
+int sme_trap_no_sm_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       return 1;
+}
+
+struct tdescr tde = {
+       .name = "SME trap without SM",
+       .descr = "Check that we get a SIGILL if we use streaming mode without enabling it",
+       .timeout = 3,
+       .feats_required = FEAT_SME,   /* We need a SMSTART ZA */
+       .sanity_disabled = true,
+       .trigger = sme_trap_no_sm_trigger,
+       .run = sme_trap_no_sm_run,
+       .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
new file mode 100644 (file)
index 0000000..e469ae5
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_non_streaming_trigger(struct tdescr *td)
+{
+       /*
+        * The framework will handle SIGILL so we need to exit SM to
+        * stop any other code triggering a further SIGILL down the
+        * line from using a streaming-illegal instruction.
+        */
+       asm volatile(".inst 0xd503437f; /* SMSTART ZA */ \
+                     cnt v0.16b, v0.16b; \
+                      .inst 0xd503447f  /* SMSTOP ZA */");
+
+       return 0;
+}
+
+int sme_trap_non_streaming_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       return 1;
+}
+
+struct tdescr tde = {
+       .name = "SME SM trap unsupported instruction",
+       .descr = "Check that we get a SIGILL if we use an unsupported instruction in streaming mode",
+       .feats_required = FEAT_SME,
+       .feats_incompatible = FEAT_SME_FA64,
+       .timeout = 3,
+       .sanity_disabled = true,
+       .trigger = sme_trap_non_streaming_trigger,
+       .run = sme_trap_non_streaming_run,
+       .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
new file mode 100644 (file)
index 0000000..3a7747a
--- /dev/null
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that accessing ZA without enabling it generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_za_trigger(struct tdescr *td)
+{
+       /* ZERO ZA */
+       asm volatile(".inst 0xc00800ff");
+
+       return 0;
+}
+
+int sme_trap_za_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       return 1;
+}
+
+struct tdescr tde = {
+       .name = "SME ZA trap",
+       .descr = "Check that we get a SIGILL if we access ZA without enabling",
+       .timeout = 3,
+       .sanity_disabled = true,
+       .trigger = sme_trap_za_trigger,
+       .run = sme_trap_za_run,
+       .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
new file mode 100644 (file)
index 0000000..13ff3b3
--- /dev/null
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SME vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sme_vl(struct tdescr *td)
+{
+       int ret = prctl(PR_SME_GET_VL);
+       if (ret == -1)
+               return false;
+
+       vl = ret;
+
+       return true;
+}
+
+static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       size_t resv_sz, offset;
+       struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+       struct za_context *za;
+
+       /* Get a signal context which should have a ZA frame in it */
+       if (!get_current_context(td, &sf.uc))
+               return 1;
+
+       resv_sz = GET_SF_RESV_SIZE(sf);
+       head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+       if (!head) {
+               fprintf(stderr, "No ZA context\n");
+               return 1;
+       }
+       za = (struct za_context *)head;
+
+       if (za->vl != vl) {
+               fprintf(stderr, "ZA sigframe VL %u, expected %u\n",
+                       za->vl, vl);
+               return 1;
+       } else {
+               fprintf(stderr, "got expected VL %u\n", vl);
+       }
+
+       td->pass = 1;
+
+       return 0;
+}
+
+struct tdescr tde = {
+       .name = "SME VL",
+       .descr = "Check that we get the right SME VL reported",
+       .feats_required = FEAT_SME,
+       .timeout = 3,
+       .init = get_sme_vl,
+       .run = sme_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
new file mode 100644 (file)
index 0000000..9022a6c
--- /dev/null
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the streaming SVE register context in signal frames is
+ * set up as expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+       int vq, vl;
+
+       /*
+        * Enumerate up to SVE_VQ_MAX vector lengths
+        */
+       for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+               vl = prctl(PR_SME_SET_VL, vq * 16);
+               if (vl == -1)
+                       return false;
+
+               vl &= PR_SME_VL_LEN_MASK;
+
+               /* Skip missing VLs */
+               vq = sve_vq_from_vl(vl);
+
+               vls[nvls++] = vl;
+       }
+
+       /* We need at least one VL */
+       if (nvls < 1) {
+               fprintf(stderr, "Only %d VL supported\n", nvls);
+               return false;
+       }
+
+       return true;
+}
+
+static void setup_ssve_regs(void)
+{
+       /* smstart sm; real data is TODO */
+       asm volatile(".inst 0xd503437f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+                        unsigned int vl)
+{
+       size_t resv_sz, offset;
+       struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+       struct sve_context *ssve;
+       int ret;
+
+       fprintf(stderr, "Testing VL %d\n", vl);
+
+       ret = prctl(PR_SME_SET_VL, vl);
+       if (ret != vl) {
+               fprintf(stderr, "Failed to set VL, got %d\n", ret);
+               return 1;
+       }
+
+       /*
+        * Get a signal context which should have a SVE frame and registers
+        * in it.
+        */
+       setup_ssve_regs();
+       if (!get_current_context(td, &sf.uc))
+               return 1;
+
+       resv_sz = GET_SF_RESV_SIZE(sf);
+       head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+       if (!head) {
+               fprintf(stderr, "No SVE context\n");
+               return 1;
+       }
+
+       ssve = (struct sve_context *)head;
+       if (ssve->vl != vl) {
+               fprintf(stderr, "Got VL %d, expected %d\n", ssve->vl, vl);
+               return 1;
+       }
+
+       /* The actual size validation is done in get_current_context() */
+       fprintf(stderr, "Got expected size %u and VL %d\n",
+               head->size, ssve->vl);
+
+       return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       int i;
+
+       for (i = 0; i < nvls; i++) {
+               /*
+                * TODO: the signal test helpers can't currently cope
+                * with signal frames bigger than struct sigcontext,
+                * skip VLs that will trigger that.
+                */
+               if (vls[i] > 64) {
+                       printf("Skipping VL %u due to stack size\n", vls[i]);
+                       continue;
+               }
+
+               if (do_one_sme_vl(td, si, uc, vls[i]))
+                       return 1;
+       }
+
+       td->pass = 1;
+
+       return 0;
+}
+
+struct tdescr tde = {
+       .name = "Streaming SVE registers",
+       .descr = "Check that we get the right Streaming SVE registers reported",
+       /*
+        * We shouldn't require FA64 but things like memset() used in the
+        * helpers might use unsupported instructions so for now disable
+        * the test unless we've got the full instruction set.
+        */
+       .feats_required = FEAT_SME | FEAT_SME_FA64,
+       .timeout = 3,
+       .init = sme_get_vls,
+       .run = sme_regs,
+};
index 8c2a57fc2f9cccb3dfdb2047333a752c4e662bfb..84c36bee4d82a19ef336d731fdbb14ea960e2ba3 100644 (file)
@@ -75,6 +75,31 @@ bool validate_sve_context(struct sve_context *sve, char **err)
        return true;
 }
 
+bool validate_za_context(struct za_context *za, char **err)
+{
+       /* Size will be rounded up to a multiple of 16 bytes */
+       size_t regs_size
+               = ((ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za->vl)) + 15) / 16) * 16;
+
+       if (!za || !err)
+               return false;
+
+       /* Either a bare za_context or a za_context followed by regs data */
+       if ((za->head.size != sizeof(struct za_context)) &&
+           (za->head.size != regs_size)) {
+               *err = "bad size for ZA context";
+               return false;
+       }
+
+       if (!sve_vl_valid(za->vl)) {
+               *err = "SME VL in ZA context invalid";
+
+               return false;
+       }
+
+       return true;
+}
+
 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 {
        bool terminated = false;
@@ -82,6 +107,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
        int flags = 0;
        struct extra_context *extra = NULL;
        struct sve_context *sve = NULL;
+       struct za_context *za = NULL;
        struct _aarch64_ctx *head =
                (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
 
@@ -120,6 +146,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
                        sve = (struct sve_context *)head;
                        flags |= SVE_CTX;
                        break;
+               case ZA_MAGIC:
+                       if (flags & ZA_CTX)
+                               *err = "Multiple ZA_MAGIC";
+                       /* Size is validated in validate_za_context() */
+                       za = (struct za_context *)head;
+                       flags |= ZA_CTX;
+                       break;
                case EXTRA_MAGIC:
                        if (flags & EXTRA_CTX)
                                *err = "Multiple EXTRA_MAGIC";
@@ -165,6 +198,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
                if (flags & SVE_CTX)
                        if (!validate_sve_context(sve, err))
                                return false;
+               if (flags & ZA_CTX)
+                       if (!validate_za_context(za, err))
+                               return false;
 
                head = GET_RESV_NEXT_HEAD(head);
        }
index ad884c135314d76cb766d2022d615662ea27428d..49f1d5de7b5b72de8011120c8d6aca7b3fef5874 100644 (file)
@@ -16,7 +16,8 @@
 
 #define FPSIMD_CTX     (1 << 0)
 #define SVE_CTX                (1 << 1)
-#define EXTRA_CTX      (1 << 2)
+#define ZA_CTX         (1 << 2)
+#define EXTRA_CTX      (1 << 3)
 
 #define KSFT_BAD_MAGIC 0xdeadbeef
 
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
new file mode 100644 (file)
index 0000000..b94e4f9
--- /dev/null
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+       int vq, vl;
+
+       /*
+        * Enumerate up to SVE_VQ_MAX vector lengths
+        */
+       for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+               vl = prctl(PR_SVE_SET_VL, vq * 16);
+               if (vl == -1)
+                       return false;
+
+               vl &= PR_SME_VL_LEN_MASK;
+
+               /* Skip missing VLs */
+               vq = sve_vq_from_vl(vl);
+
+               vls[nvls++] = vl;
+       }
+
+       /* We need at least one VL */
+       if (nvls < 1) {
+               fprintf(stderr, "Only %d VL supported\n", nvls);
+               return false;
+       }
+
+       return true;
+}
+
+static void setup_za_regs(void)
+{
+       /* smstart za; real data is TODO */
+       asm volatile(".inst 0xd503457f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+                        unsigned int vl)
+{
+       size_t resv_sz, offset;
+       struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+       struct za_context *za;
+
+       fprintf(stderr, "Testing VL %d\n", vl);
+
+       if (prctl(PR_SME_SET_VL, vl) != vl) {
+               fprintf(stderr, "Failed to set VL\n");
+               return 1;
+       }
+
+       /*
+        * Get a signal context which should have a SVE frame and registers
+        * in it.
+        */
+       setup_za_regs();
+       if (!get_current_context(td, &sf.uc))
+               return 1;
+
+       resv_sz = GET_SF_RESV_SIZE(sf);
+       head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+       if (!head) {
+               fprintf(stderr, "No ZA context\n");
+               return 1;
+       }
+
+       za = (struct za_context *)head;
+       if (za->vl != vl) {
+               fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+               return 1;
+       }
+
+       /* The actual size validation is done in get_current_context() */
+       fprintf(stderr, "Got expected size %u and VL %d\n",
+               head->size, za->vl);
+
+       return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+       int i;
+
+       for (i = 0; i < nvls; i++) {
+               /*
+                * TODO: the signal test helpers can't currently cope
+                * with signal frames bigger than struct sigcontext,
+                * skip VLs that will trigger that.
+                */
+               if (vls[i] > 32) {
+                       printf("Skipping VL %u due to stack size\n", vls[i]);
+                       continue;
+               }
+
+               if (do_one_sme_vl(td, si, uc, vls[i]))
+                       return 1;
+       }
+
+       td->pass = 1;
+
+       return 0;
+}
+
+struct tdescr tde = {
+       .name = "ZA register",
+       .descr = "Check that we get the right ZA registers reported",
+       .feats_required = FEAT_SME,
+       .timeout = 3,
+       .init = sme_get_vls,
+       .run = sme_regs,
+};
index 0d06ffa95d9d4cdba158decf53bd17de479092f4..93d77574b255d863f9fc84a4702a10b6c7179b47 100644 (file)
@@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm)
        return success;
 }
 
-static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
 {
        struct kvm_pmu_event_filter *f;
        int size = sizeof(*f) + nevents * sizeof(f->events[0]);
@@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
        return f;
 }
 
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+
+static struct kvm_pmu_event_filter *
+create_pmu_event_filter(const uint64_t event_list[],
+                       int nevents, uint32_t action)
 {
        struct kvm_pmu_event_filter *f;
        int i;
 
-       f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+       f = alloc_pmu_event_filter(nevents);
        f->action = action;
-       for (i = 0; i < ARRAY_SIZE(event_list); i++)
+       for (i = 0; i < nevents; i++)
                f->events[i] = event_list[i];
 
        return f;
 }
 
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+       return create_pmu_event_filter(event_list,
+                                      ARRAY_SIZE(event_list),
+                                      action);
+}
+
 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
@@ -271,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm,
        return run_vm_to_sync(vm);
 }
 
+static void test_amd_deny_list(struct kvm_vm *vm)
+{
+       uint64_t event = EVENT(0x1C2, 0);
+       struct kvm_pmu_event_filter *f;
+       uint64_t count;
+
+       f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
+       count = test_with_filter(vm, f);
+
+       free(f);
+       if (count != NUM_BRANCHES)
+               pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+                       __func__, count, NUM_BRANCHES);
+       TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
 static void test_member_deny_list(struct kvm_vm *vm)
 {
        struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
@@ -453,6 +479,9 @@ int main(int argc, char *argv[])
                exit(KSFT_SKIP);
        }
 
+       if (use_amd_pmu())
+               test_amd_deny_list(vm);
+
        test_without_filter(vm);
        test_member_deny_list(vm);
        test_member_allow_list(vm);
index ca40abe9daa8607c971e08bfa916605128b21eda..da92908178667086c9583a435b8920b16f76a509 100644 (file)
 #include "common.h"
 
 #ifndef O_PATH
-#define O_PATH         010000000
+#define O_PATH 010000000
 #endif
 
-TEST(inconsistent_attr) {
+TEST(inconsistent_attr)
+{
        const long page_size = sysconf(_SC_PAGESIZE);
        char *const buf = malloc(page_size + 1);
        struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
@@ -34,20 +35,26 @@ TEST(inconsistent_attr) {
        ASSERT_EQ(EINVAL, errno);
        ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
        ASSERT_EQ(EINVAL, errno);
+       ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0));
+       ASSERT_EQ(EINVAL, errno);
 
        ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
        /* The size if less than sizeof(struct landlock_attr_enforce). */
        ASSERT_EQ(EFAULT, errno);
 
-       ASSERT_EQ(-1, landlock_create_ruleset(NULL,
-                               sizeof(struct landlock_ruleset_attr), 0));
+       ASSERT_EQ(-1, landlock_create_ruleset(
+                             NULL, sizeof(struct landlock_ruleset_attr), 0));
        ASSERT_EQ(EFAULT, errno);
 
        ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
        ASSERT_EQ(E2BIG, errno);
 
-       ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr,
-                               sizeof(struct landlock_ruleset_attr), 0));
+       /* Checks minimal valid attribute size. */
+       ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0));
+       ASSERT_EQ(ENOMSG, errno);
+       ASSERT_EQ(-1, landlock_create_ruleset(
+                             ruleset_attr,
+                             sizeof(struct landlock_ruleset_attr), 0));
        ASSERT_EQ(ENOMSG, errno);
        ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
        ASSERT_EQ(ENOMSG, errno);
@@ -63,38 +70,44 @@ TEST(inconsistent_attr) {
        free(buf);
 }
 
-TEST(abi_version) {
+TEST(abi_version)
+{
        const struct landlock_ruleset_attr ruleset_attr = {
                .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
        };
-       ASSERT_EQ(1, landlock_create_ruleset(NULL, 0,
-                               LANDLOCK_CREATE_RULESET_VERSION));
+       ASSERT_EQ(2, landlock_create_ruleset(NULL, 0,
+                                            LANDLOCK_CREATE_RULESET_VERSION));
 
        ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
-                               LANDLOCK_CREATE_RULESET_VERSION));
+                                             LANDLOCK_CREATE_RULESET_VERSION));
        ASSERT_EQ(EINVAL, errno);
 
        ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
-                               LANDLOCK_CREATE_RULESET_VERSION));
+                                             LANDLOCK_CREATE_RULESET_VERSION));
        ASSERT_EQ(EINVAL, errno);
 
-       ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
-                               sizeof(ruleset_attr),
-                               LANDLOCK_CREATE_RULESET_VERSION));
+       ASSERT_EQ(-1,
+                 landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+                                         LANDLOCK_CREATE_RULESET_VERSION));
        ASSERT_EQ(EINVAL, errno);
 
        ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
-                               LANDLOCK_CREATE_RULESET_VERSION | 1 << 31));
+                                             LANDLOCK_CREATE_RULESET_VERSION |
+                                                     1 << 31));
        ASSERT_EQ(EINVAL, errno);
 }
 
-TEST(inval_create_ruleset_flags) {
+/* Tests ordering of syscall argument checks. */
+TEST(create_ruleset_checks_ordering)
+{
        const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
        const int invalid_flag = last_flag << 1;
+       int ruleset_fd;
        const struct landlock_ruleset_attr ruleset_attr = {
                .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
        };
 
+       /* Checks priority for invalid flags. */
        ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag));
        ASSERT_EQ(EINVAL, errno);
 
@@ -102,44 +115,121 @@ TEST(inval_create_ruleset_flags) {
        ASSERT_EQ(EINVAL, errno);
 
        ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
-                               invalid_flag));
+                                             invalid_flag));
+       ASSERT_EQ(EINVAL, errno);
+
+       ASSERT_EQ(-1,
+                 landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+                                         invalid_flag));
        ASSERT_EQ(EINVAL, errno);
 
-       ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
-                               sizeof(ruleset_attr), invalid_flag));
+       /* Checks too big ruleset_attr size. */
+       ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0));
+       ASSERT_EQ(E2BIG, errno);
+
+       /* Checks too small ruleset_attr size. */
+       ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0));
+       ASSERT_EQ(EINVAL, errno);
+       ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0));
        ASSERT_EQ(EINVAL, errno);
+
+       /* Checks valid call. */
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+       ASSERT_LE(0, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
 }
 
-TEST(empty_path_beneath_attr) {
+/* Tests ordering of syscall argument checks. */
+TEST(add_rule_checks_ordering)
+{
        const struct landlock_ruleset_attr ruleset_attr = {
                .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
        };
-       const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       struct landlock_path_beneath_attr path_beneath_attr = {
+               .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+               .parent_fd = -1,
+       };
+       const int ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 
        ASSERT_LE(0, ruleset_fd);
 
-       /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
+       /* Checks invalid flags. */
+       ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+       ASSERT_EQ(EINVAL, errno);
+
+       /* Checks invalid ruleset FD. */
+       ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0));
+       ASSERT_EQ(EBADF, errno);
+
+       /* Checks invalid rule type. */
+       ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0));
+       ASSERT_EQ(EINVAL, errno);
+
+       /* Checks invalid rule attr. */
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               NULL, 0));
+                                       NULL, 0));
        ASSERT_EQ(EFAULT, errno);
+
+       /* Checks invalid path_beneath.parent_fd. */
+       ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+                                       &path_beneath_attr, 0));
+       ASSERT_EQ(EBADF, errno);
+
+       /* Checks valid call. */
+       path_beneath_attr.parent_fd =
+               open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+       ASSERT_LE(0, path_beneath_attr.parent_fd);
+       ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+                                      &path_beneath_attr, 0));
+       ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
        ASSERT_EQ(0, close(ruleset_fd));
 }
 
-TEST(inval_fd_enforce) {
+/* Tests ordering of syscall argument and permission checks. */
+TEST(restrict_self_checks_ordering)
+{
+       const struct landlock_ruleset_attr ruleset_attr = {
+               .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+       };
+       struct landlock_path_beneath_attr path_beneath_attr = {
+               .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+               .parent_fd = -1,
+       };
+       const int ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+       ASSERT_LE(0, ruleset_fd);
+       path_beneath_attr.parent_fd =
+               open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+       ASSERT_LE(0, path_beneath_attr.parent_fd);
+       ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+                                      &path_beneath_attr, 0));
+       ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+       /* Checks unprivileged enforcement without no_new_privs. */
+       drop_caps(_metadata);
+       ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+       ASSERT_EQ(EPERM, errno);
+       ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+       ASSERT_EQ(EPERM, errno);
+       ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+       ASSERT_EQ(EPERM, errno);
+
        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
 
+       /* Checks invalid flags. */
+       ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+       ASSERT_EQ(EINVAL, errno);
+
+       /* Checks invalid ruleset FD. */
        ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
        ASSERT_EQ(EBADF, errno);
-}
-
-TEST(unpriv_enforce_without_no_new_privs) {
-       int err;
 
-       drop_caps(_metadata);
-       err = landlock_restrict_self(-1, 0);
-       ASSERT_EQ(EPERM, errno);
-       ASSERT_EQ(err, -1);
+       /* Checks valid call. */
+       ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+       ASSERT_EQ(0, close(ruleset_fd));
 }
 
 TEST(ruleset_fd_io)
@@ -151,8 +241,8 @@ TEST(ruleset_fd_io)
        char buf;
 
        drop_caps(_metadata);
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
        ASSERT_LE(0, ruleset_fd);
 
        ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
@@ -197,14 +287,15 @@ TEST(ruleset_fd_transfer)
        drop_caps(_metadata);
 
        /* Creates a test ruleset with a simple rule. */
-       ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       ruleset_fd_tx =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
        ASSERT_LE(0, ruleset_fd_tx);
-       path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW |
-                       O_DIRECTORY | O_CLOEXEC);
+       path_beneath_attr.parent_fd =
+               open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
        ASSERT_LE(0, path_beneath_attr.parent_fd);
-       ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath_attr, 0));
+       ASSERT_EQ(0,
+                 landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
+                                   &path_beneath_attr, 0));
        ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
 
        cmsg = CMSG_FIRSTHDR(&msg);
@@ -215,7 +306,8 @@ TEST(ruleset_fd_transfer)
        memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx));
 
        /* Sends the ruleset FD over a socketpair and then close it. */
-       ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds));
+       ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+                               socket_fds));
        ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0));
        ASSERT_EQ(0, close(socket_fds[0]));
        ASSERT_EQ(0, close(ruleset_fd_tx));
@@ -226,7 +318,8 @@ TEST(ruleset_fd_transfer)
                int ruleset_fd_rx;
 
                *(char *)msg.msg_iov->iov_base = '\0';
-               ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
+               ASSERT_EQ(sizeof(data_tx),
+                         recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
                ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base);
                ASSERT_EQ(0, close(socket_fds[1]));
                cmsg = CMSG_FIRSTHDR(&msg);
index 183b7e8e1b957caf815d44ae32a0e32566ac0a04..7ba18eb2378382a2b5b4fe928b0bd299348c4f8a 100644 (file)
@@ -25,6 +25,7 @@
  * this to be possible, we must not call abort() but instead exit smoothly
  * (hence the step print).
  */
+/* clang-format off */
 #define TEST_F_FORK(fixture_name, test_name) \
        static void fixture_name##_##test_name##_child( \
                struct __test_metadata *_metadata, \
                FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
                const FIXTURE_VARIANT(fixture_name) \
                        __attribute__((unused)) *variant)
+/* clang-format on */
 
 #ifndef landlock_create_ruleset
-static inline int landlock_create_ruleset(
-               const struct landlock_ruleset_attr *const attr,
-               const size_t size, const __u32 flags)
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+                       const size_t size, const __u32 flags)
 {
        return syscall(__NR_landlock_create_ruleset, attr, size, flags);
 }
@@ -83,17 +85,18 @@ static inline int landlock_create_ruleset(
 
 #ifndef landlock_add_rule
 static inline int landlock_add_rule(const int ruleset_fd,
-               const enum landlock_rule_type rule_type,
-               const void *const rule_attr, const __u32 flags)
+                                   const enum landlock_rule_type rule_type,
+                                   const void *const rule_attr,
+                                   const __u32 flags)
 {
-       return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
-                       rule_attr, flags);
+       return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+                      flags);
 }
 #endif
 
 #ifndef landlock_restrict_self
 static inline int landlock_restrict_self(const int ruleset_fd,
-               const __u32 flags)
+                                        const __u32 flags)
 {
        return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
 }
@@ -111,69 +114,76 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
        };
 
        cap_p = cap_get_proc();
-       EXPECT_NE(NULL, cap_p) {
+       EXPECT_NE(NULL, cap_p)
+       {
                TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
        }
-       EXPECT_NE(-1, cap_clear(cap_p)) {
+       EXPECT_NE(-1, cap_clear(cap_p))
+       {
                TH_LOG("Failed to cap_clear: %s", strerror(errno));
        }
        if (!drop_all) {
                EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
-                                       ARRAY_SIZE(caps), caps, CAP_SET)) {
+                                          ARRAY_SIZE(caps), caps, CAP_SET))
+               {
                        TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
                }
        }
-       EXPECT_NE(-1, cap_set_proc(cap_p)) {
+       EXPECT_NE(-1, cap_set_proc(cap_p))
+       {
                TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
        }
-       EXPECT_NE(-1, cap_free(cap_p)) {
+       EXPECT_NE(-1, cap_free(cap_p))
+       {
                TH_LOG("Failed to cap_free: %s", strerror(errno));
        }
 }
 
 /* We cannot put such helpers in a library because of kselftest_harness.h . */
-__attribute__((__unused__))
-static void disable_caps(struct __test_metadata *const _metadata)
+__attribute__((__unused__)) static void
+disable_caps(struct __test_metadata *const _metadata)
 {
        _init_caps(_metadata, false);
 }
 
-__attribute__((__unused__))
-static void drop_caps(struct __test_metadata *const _metadata)
+__attribute__((__unused__)) static void
+drop_caps(struct __test_metadata *const _metadata)
 {
        _init_caps(_metadata, true);
 }
 
 static void _effective_cap(struct __test_metadata *const _metadata,
-               const cap_value_t caps, const cap_flag_value_t value)
+                          const cap_value_t caps, const cap_flag_value_t value)
 {
        cap_t cap_p;
 
        cap_p = cap_get_proc();
-       EXPECT_NE(NULL, cap_p) {
+       EXPECT_NE(NULL, cap_p)
+       {
                TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
        }
-       EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) {
+       EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value))
+       {
                TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
        }
-       EXPECT_NE(-1, cap_set_proc(cap_p)) {
+       EXPECT_NE(-1, cap_set_proc(cap_p))
+       {
                TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
        }
-       EXPECT_NE(-1, cap_free(cap_p)) {
+       EXPECT_NE(-1, cap_free(cap_p))
+       {
                TH_LOG("Failed to cap_free: %s", strerror(errno));
        }
 }
 
-__attribute__((__unused__))
-static void set_cap(struct __test_metadata *const _metadata,
-               const cap_value_t caps)
+__attribute__((__unused__)) static void
+set_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
 {
        _effective_cap(_metadata, caps, CAP_SET);
 }
 
-__attribute__((__unused__))
-static void clear_cap(struct __test_metadata *const _metadata,
-               const cap_value_t caps)
+__attribute__((__unused__)) static void
+clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
 {
        _effective_cap(_metadata, caps, CAP_CLEAR);
 }
index 10c9a1e4ebd9b7a4225b6a3f43d1423413743b5d..21a2ce8fa739d8a11d9a656fee2c6e816ed4e0ed 100644 (file)
 
 #include "common.h"
 
-#define TMP_DIR                "tmp"
-#define BINARY_PATH    "./true"
+#ifndef renameat2
+int renameat2(int olddirfd, const char *oldpath, int newdirfd,
+             const char *newpath, unsigned int flags)
+{
+       return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath,
+                      flags);
+}
+#endif
+
+#ifndef RENAME_EXCHANGE
+#define RENAME_EXCHANGE (1 << 1)
+#endif
+
+#define TMP_DIR "tmp"
+#define BINARY_PATH "./true"
 
 /* Paths (sibling number and depth) */
 static const char dir_s1d1[] = TMP_DIR "/s1d1";
@@ -75,7 +88,7 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
  */
 
 static void mkdir_parents(struct __test_metadata *const _metadata,
-               const char *const path)
+                         const char *const path)
 {
        char *walker;
        const char *parent;
@@ -90,9 +103,10 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
                        continue;
                walker[i] = '\0';
                err = mkdir(parent, 0700);
-               ASSERT_FALSE(err && errno != EEXIST) {
-                       TH_LOG("Failed to create directory \"%s\": %s",
-                                       parent, strerror(errno));
+               ASSERT_FALSE(err && errno != EEXIST)
+               {
+                       TH_LOG("Failed to create directory \"%s\": %s", parent,
+                              strerror(errno));
                }
                walker[i] = '/';
        }
@@ -100,22 +114,24 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
 }
 
 static void create_directory(struct __test_metadata *const _metadata,
-               const char *const path)
+                            const char *const path)
 {
        mkdir_parents(_metadata, path);
-       ASSERT_EQ(0, mkdir(path, 0700)) {
+       ASSERT_EQ(0, mkdir(path, 0700))
+       {
                TH_LOG("Failed to create directory \"%s\": %s", path,
-                               strerror(errno));
+                      strerror(errno));
        }
 }
 
 static void create_file(struct __test_metadata *const _metadata,
-               const char *const path)
+                       const char *const path)
 {
        mkdir_parents(_metadata, path);
-       ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) {
+       ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0))
+       {
                TH_LOG("Failed to create file \"%s\": %s", path,
-                               strerror(errno));
+                      strerror(errno));
        }
 }
 
@@ -130,7 +146,7 @@ static int remove_path(const char *const path)
                goto out;
        }
        if (unlink(path) && rmdir(path)) {
-               if (errno != ENOENT)
+               if (errno != ENOENT && errno != ENOTDIR)
                        err = errno;
                goto out;
        }
@@ -221,8 +237,9 @@ static void remove_layout1(struct __test_metadata *const _metadata)
        EXPECT_EQ(0, remove_path(dir_s3d2));
 }
 
-FIXTURE(layout1) {
-};
+/* clang-format off */
+FIXTURE(layout1) {};
+/* clang-format on */
 
 FIXTURE_SETUP(layout1)
 {
@@ -242,7 +259,8 @@ FIXTURE_TEARDOWN(layout1)
  * This helper enables to use the ASSERT_* macros and print the line number
  * pointing to the test caller.
  */
-static int test_open_rel(const int dirfd, const char *const path, const int flags)
+static int test_open_rel(const int dirfd, const char *const path,
+                        const int flags)
 {
        int fd;
 
@@ -291,23 +309,23 @@ TEST_F_FORK(layout1, inval)
 {
        struct landlock_path_beneath_attr path_beneath = {
                .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                .parent_fd = -1,
        };
        struct landlock_ruleset_attr ruleset_attr = {
                .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                    LANDLOCK_ACCESS_FS_WRITE_FILE,
        };
        int ruleset_fd;
 
-       path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
-                       O_CLOEXEC);
+       path_beneath.parent_fd =
+               open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
        ASSERT_LE(0, path_beneath.parent_fd);
 
        ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
        ASSERT_LE(0, ruleset_fd);
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
        ASSERT_EQ(EBADF, errno);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -315,55 +333,55 @@ TEST_F_FORK(layout1, inval)
        ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
        ASSERT_LE(0, ruleset_fd);
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        /* Returns EBADFD because ruleset_fd is not a valid ruleset. */
        ASSERT_EQ(EBADFD, errno);
        ASSERT_EQ(0, close(ruleset_fd));
 
        /* Gets a real ruleset. */
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
        ASSERT_LE(0, ruleset_fd);
        ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                      &path_beneath, 0));
        ASSERT_EQ(0, close(path_beneath.parent_fd));
 
        /* Tests without O_PATH. */
        path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
        ASSERT_LE(0, path_beneath.parent_fd);
        ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                      &path_beneath, 0));
        ASSERT_EQ(0, close(path_beneath.parent_fd));
 
        /* Tests with a ruleset FD. */
        path_beneath.parent_fd = ruleset_fd;
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        ASSERT_EQ(EBADFD, errno);
 
        /* Checks unhandled allowed_access. */
-       path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
-                       O_CLOEXEC);
+       path_beneath.parent_fd =
+               open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
        ASSERT_LE(0, path_beneath.parent_fd);
 
        /* Test with legitimate values. */
        path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        ASSERT_EQ(EINVAL, errno);
        path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
 
        /* Test with unknown (64-bits) value. */
        path_beneath.allowed_access |= (1ULL << 60);
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        ASSERT_EQ(EINVAL, errno);
        path_beneath.allowed_access &= ~(1ULL << 60);
 
        /* Test with no access. */
        path_beneath.allowed_access = 0;
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        ASSERT_EQ(ENOMSG, errno);
        path_beneath.allowed_access &= ~(1ULL << 60);
 
@@ -376,12 +394,14 @@ TEST_F_FORK(layout1, inval)
        ASSERT_EQ(0, close(ruleset_fd));
 }
 
+/* clang-format off */
+
 #define ACCESS_FILE ( \
        LANDLOCK_ACCESS_FS_EXECUTE | \
        LANDLOCK_ACCESS_FS_WRITE_FILE | \
        LANDLOCK_ACCESS_FS_READ_FILE)
 
-#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_REFER
 
 #define ACCESS_ALL ( \
        ACCESS_FILE | \
@@ -394,55 +414,90 @@ TEST_F_FORK(layout1, inval)
        LANDLOCK_ACCESS_FS_MAKE_SOCK | \
        LANDLOCK_ACCESS_FS_MAKE_FIFO | \
        LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+       LANDLOCK_ACCESS_FS_MAKE_SYM | \
        ACCESS_LAST)
 
-TEST_F_FORK(layout1, file_access_rights)
+/* clang-format on */
+
+TEST_F_FORK(layout1, file_and_dir_access_rights)
 {
        __u64 access;
        int err;
-       struct landlock_path_beneath_attr path_beneath = {};
+       struct landlock_path_beneath_attr path_beneath_file = {},
+                                         path_beneath_dir = {};
        struct landlock_ruleset_attr ruleset_attr = {
                .handled_access_fs = ACCESS_ALL,
        };
-       const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       const int ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 
        ASSERT_LE(0, ruleset_fd);
 
        /* Tests access rights for files. */
-       path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
-       ASSERT_LE(0, path_beneath.parent_fd);
+       path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+       ASSERT_LE(0, path_beneath_file.parent_fd);
+
+       /* Tests access rights for directories. */
+       path_beneath_dir.parent_fd =
+               open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+       ASSERT_LE(0, path_beneath_dir.parent_fd);
+
        for (access = 1; access <= ACCESS_LAST; access <<= 1) {
-               path_beneath.allowed_access = access;
+               path_beneath_dir.allowed_access = access;
+               ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+                                              LANDLOCK_RULE_PATH_BENEATH,
+                                              &path_beneath_dir, 0));
+
+               path_beneath_file.allowed_access = access;
                err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0);
-               if ((access | ACCESS_FILE) == ACCESS_FILE) {
+                                       &path_beneath_file, 0);
+               if (access & ACCESS_FILE) {
                        ASSERT_EQ(0, err);
                } else {
                        ASSERT_EQ(-1, err);
                        ASSERT_EQ(EINVAL, errno);
                }
        }
-       ASSERT_EQ(0, close(path_beneath.parent_fd));
+       ASSERT_EQ(0, close(path_beneath_file.parent_fd));
+       ASSERT_EQ(0, close(path_beneath_dir.parent_fd));
+       ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, unknown_access_rights)
+{
+       __u64 access_mask;
+
+       for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+            access_mask >>= 1) {
+               struct landlock_ruleset_attr ruleset_attr = {
+                       .handled_access_fs = access_mask,
+               };
+
+               ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+                                                     sizeof(ruleset_attr), 0));
+               ASSERT_EQ(EINVAL, errno);
+       }
 }
 
 static void add_path_beneath(struct __test_metadata *const _metadata,
-               const int ruleset_fd, const __u64 allowed_access,
-               const char *const path)
+                            const int ruleset_fd, const __u64 allowed_access,
+                            const char *const path)
 {
        struct landlock_path_beneath_attr path_beneath = {
                .allowed_access = allowed_access,
        };
 
        path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
-       ASSERT_LE(0, path_beneath.parent_fd) {
+       ASSERT_LE(0, path_beneath.parent_fd)
+       {
                TH_LOG("Failed to open directory \"%s\": %s", path,
-                               strerror(errno));
+                      strerror(errno));
        }
        ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0)) {
+                                      &path_beneath, 0))
+       {
                TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
-                               strerror(errno));
+                      strerror(errno));
        }
        ASSERT_EQ(0, close(path_beneath.parent_fd));
 }
@@ -452,6 +507,8 @@ struct rule {
        __u64 access;
 };
 
+/* clang-format off */
+
 #define ACCESS_RO ( \
        LANDLOCK_ACCESS_FS_READ_FILE | \
        LANDLOCK_ACCESS_FS_READ_DIR)
@@ -460,39 +517,46 @@ struct rule {
        ACCESS_RO | \
        LANDLOCK_ACCESS_FS_WRITE_FILE)
 
+/* clang-format on */
+
 static int create_ruleset(struct __test_metadata *const _metadata,
-               const __u64 handled_access_fs, const struct rule rules[])
+                         const __u64 handled_access_fs,
+                         const struct rule rules[])
 {
        int ruleset_fd, i;
        struct landlock_ruleset_attr ruleset_attr = {
                .handled_access_fs = handled_access_fs,
        };
 
-       ASSERT_NE(NULL, rules) {
+       ASSERT_NE(NULL, rules)
+       {
                TH_LOG("No rule list");
        }
-       ASSERT_NE(NULL, rules[0].path) {
+       ASSERT_NE(NULL, rules[0].path)
+       {
                TH_LOG("Empty rule list");
        }
 
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
-       ASSERT_LE(0, ruleset_fd) {
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+       ASSERT_LE(0, ruleset_fd)
+       {
                TH_LOG("Failed to create a ruleset: %s", strerror(errno));
        }
 
        for (i = 0; rules[i].path; i++) {
                add_path_beneath(_metadata, ruleset_fd, rules[i].access,
-                               rules[i].path);
+                                rules[i].path);
        }
        return ruleset_fd;
 }
 
 static void enforce_ruleset(struct __test_metadata *const _metadata,
-               const int ruleset_fd)
+                           const int ruleset_fd)
 {
        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
-       ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) {
+       ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+       {
                TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
        }
 }
@@ -503,13 +567,14 @@ TEST_F_FORK(layout1, proc_nsfs)
                {
                        .path = "/dev/null",
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        struct landlock_path_beneath_attr path_beneath;
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
-                       LANDLOCK_ACCESS_FS_READ_DIR, rules);
+       const int ruleset_fd = create_ruleset(
+               _metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
+               rules);
 
        ASSERT_LE(0, ruleset_fd);
        ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
@@ -536,22 +601,23 @@ TEST_F_FORK(layout1, proc_nsfs)
         * references to a ruleset.
         */
        path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
-               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                     LANDLOCK_ACCESS_FS_WRITE_FILE,
        path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
        ASSERT_LE(0, path_beneath.parent_fd);
        ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-                               &path_beneath, 0));
+                                       &path_beneath, 0));
        ASSERT_EQ(EBADFD, errno);
        ASSERT_EQ(0, close(path_beneath.parent_fd));
 }
 
-TEST_F_FORK(layout1, unpriv) {
+TEST_F_FORK(layout1, unpriv)
+{
        const struct rule rules[] = {
                {
                        .path = dir_s1d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        int ruleset_fd;
 
@@ -577,9 +643,9 @@ TEST_F_FORK(layout1, effective_access)
                {
                        .path = file1_s2d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
        char buf;
@@ -589,17 +655,23 @@ TEST_F_FORK(layout1, effective_access)
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
 
-       /* Tests on a directory. */
+       /* Tests on a directory (with or without O_PATH). */
        ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+       ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH));
        ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+       ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH));
        ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+       ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH));
+
        ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
        ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
        ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
        ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
 
-       /* Tests on a file. */
+       /* Tests on a file (with or without O_PATH). */
        ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
+       ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH));
+
        ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
 
        /* Checks effective read and write actions. */
@@ -626,7 +698,7 @@ TEST_F_FORK(layout1, unhandled_access)
                        .path = dir_s1d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        /* Here, we only handle read accesses, not write accesses. */
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
@@ -653,14 +725,14 @@ TEST_F_FORK(layout1, ruleset_overlap)
                {
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
                {
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_READ_DIR,
+                                 LANDLOCK_ACCESS_FS_READ_DIR,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -687,6 +759,113 @@ TEST_F_FORK(layout1, ruleset_overlap)
        ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
 }
 
+TEST_F_FORK(layout1, layer_rule_unions)
+{
+       const struct rule layer1[] = {
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_READ_FILE,
+               },
+               /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+               {
+                       .path = dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+               },
+               {},
+       };
+       const struct rule layer2[] = {
+               /* Doesn't change anything from layer1. */
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_READ_FILE |
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
+               },
+               {},
+       };
+       const struct rule layer3[] = {
+               /* Only allows write (but not read) to dir_s1d3. */
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+               },
+               {},
+       };
+       int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       /* Checks s1d1 hierarchy with layer1. */
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Checks s1d2 hierarchy with layer1. */
+       ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Checks s1d3 hierarchy with layer1. */
+       ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+       ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+       /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+       ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Doesn't change anything from layer1. */
+       ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2);
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       /* Checks s1d1 hierarchy with layer2. */
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Checks s1d2 hierarchy with layer2. */
+       ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Checks s1d3 hierarchy with layer2. */
+       ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+       ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+       /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+       ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Only allows write (but not read) to dir_s1d3. */
+       ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3);
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       /* Checks s1d1 hierarchy with layer3. */
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Checks s1d2 hierarchy with layer3. */
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+       ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+       /* Checks s1d3 hierarchy with layer3. */
+       ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+       ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+       /* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */
+       ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR));
+       ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+}
+
 TEST_F_FORK(layout1, non_overlapping_accesses)
 {
        const struct rule layer1[] = {
@@ -694,22 +873,22 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_MAKE_REG,
                },
-               {}
+               {},
        };
        const struct rule layer2[] = {
                {
                        .path = dir_s1d3,
                        .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
                },
-               {}
+               {},
        };
        int ruleset_fd;
 
        ASSERT_EQ(0, unlink(file1_s1d1));
        ASSERT_EQ(0, unlink(file1_s1d2));
 
-       ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG,
-                       layer1);
+       ruleset_fd =
+               create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -720,7 +899,7 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
        ASSERT_EQ(0, unlink(file1_s1d2));
 
        ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
-                       layer2);
+                                   layer2);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -758,7 +937,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                        .path = file1_s1d3,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        /* First rule with write restrictions. */
        const struct rule layer2_read_write[] = {
@@ -766,14 +945,14 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                {
                        .path = dir_s1d3,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
                /* ...but also denies read access via its grandparent directory. */
                {
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        const struct rule layer3_read[] = {
                /* Allows read access via its great-grandparent directory. */
@@ -781,7 +960,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                        .path = dir_s1d1,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        const struct rule layer4_read_write[] = {
                /*
@@ -792,7 +971,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        const struct rule layer5_read[] = {
                /*
@@ -803,7 +982,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        const struct rule layer6_execute[] = {
                /*
@@ -814,7 +993,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                        .path = dir_s2d1,
                        .access = LANDLOCK_ACCESS_FS_EXECUTE,
                },
-               {}
+               {},
        };
        const struct rule layer7_read_write[] = {
                /*
@@ -825,12 +1004,12 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        int ruleset_fd;
 
        ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
-                       layer1_read);
+                                   layer1_read);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -840,8 +1019,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
        ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
        ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
 
-       ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write);
+       ruleset_fd = create_ruleset(_metadata,
+                                   LANDLOCK_ACCESS_FS_READ_FILE |
+                                           LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                   layer2_read_write);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -852,7 +1033,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
        ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
 
        ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
-                       layer3_read);
+                                   layer3_read);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -863,8 +1044,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
        ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
 
        /* This time, denies write access for the file hierarchy. */
-       ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write);
+       ruleset_fd = create_ruleset(_metadata,
+                                   LANDLOCK_ACCESS_FS_READ_FILE |
+                                           LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                   layer4_read_write);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -879,7 +1062,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
        ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
 
        ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
-                       layer5_read);
+                                   layer5_read);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -891,7 +1074,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
        ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
 
        ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
-                       layer6_execute);
+                                   layer6_execute);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -902,8 +1085,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
        ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
        ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
 
-       ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write);
+       ruleset_fd = create_ruleset(_metadata,
+                                   LANDLOCK_ACCESS_FS_READ_FILE |
+                                           LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                   layer7_read_write);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
@@ -921,9 +1106,9 @@ TEST_F_FORK(layout1, inherit_subset)
                {
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_READ_DIR,
+                                 LANDLOCK_ACCESS_FS_READ_DIR,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -949,7 +1134,7 @@ TEST_F_FORK(layout1, inherit_subset)
         * ANDed with the previous ones.
         */
        add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
-                       dir_s1d2);
+                        dir_s1d2);
        /*
         * According to ruleset_fd, dir_s1d2 should now have the
         * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
@@ -1004,7 +1189,7 @@ TEST_F_FORK(layout1, inherit_subset)
         * that there was no rule tied to it before.
         */
        add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
-                       dir_s1d3);
+                        dir_s1d3);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
 
@@ -1039,7 +1224,7 @@ TEST_F_FORK(layout1, inherit_superset)
                        .path = dir_s1d3,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1054,8 +1239,10 @@ TEST_F_FORK(layout1, inherit_superset)
        ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
 
        /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
-       add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2);
+       add_path_beneath(_metadata, ruleset_fd,
+                        LANDLOCK_ACCESS_FS_READ_FILE |
+                                LANDLOCK_ACCESS_FS_READ_DIR,
+                        dir_s1d2);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
 
@@ -1075,12 +1262,12 @@ TEST_F_FORK(layout1, max_layers)
                        .path = dir_s1d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
        ASSERT_LE(0, ruleset_fd);
-       for (i = 0; i < 64; i++)
+       for (i = 0; i < 16; i++)
                enforce_ruleset(_metadata, ruleset_fd);
 
        for (i = 0; i < 2; i++) {
@@ -1097,15 +1284,15 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
        int ruleset_fd;
 
        /* Tests empty handled_access_fs. */
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
        ASSERT_LE(-1, ruleset_fd);
        ASSERT_EQ(ENOMSG, errno);
 
        /* Enforces policy which deny read access to all files. */
        ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
@@ -1113,8 +1300,8 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
 
        /* Nests a policy which deny read access to all directories. */
        ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
@@ -1137,7 +1324,7 @@ TEST_F_FORK(layout1, rule_on_mountpoint)
                        .path = dir_s3d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1166,7 +1353,7 @@ TEST_F_FORK(layout1, rule_over_mountpoint)
                        .path = dir_s3d1,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1194,7 +1381,7 @@ TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
                        .path = "/",
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1224,7 +1411,7 @@ TEST_F_FORK(layout1, rule_over_root_deny)
                        .path = "/",
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1244,12 +1431,13 @@ TEST_F_FORK(layout1, rule_inside_mount_ns)
                        .path = "s3d3",
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        int ruleset_fd;
 
        set_cap(_metadata, CAP_SYS_ADMIN);
-       ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) {
+       ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3))
+       {
                TH_LOG("Failed to pivot root: %s", strerror(errno));
        };
        ASSERT_EQ(0, chdir("/"));
@@ -1271,7 +1459,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
                        .path = dir_s3d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1282,7 +1470,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
        set_cap(_metadata, CAP_SYS_ADMIN);
        ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
        ASSERT_EQ(EPERM, errno);
-       ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3));
+       ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
        ASSERT_EQ(EPERM, errno);
        clear_cap(_metadata, CAP_SYS_ADMIN);
 }
@@ -1294,28 +1482,29 @@ TEST_F_FORK(layout1, move_mount)
                        .path = dir_s3d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
        ASSERT_LE(0, ruleset_fd);
 
        set_cap(_metadata, CAP_SYS_ADMIN);
-       ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
-                               dir_s1d2, 0)) {
+       ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+                            dir_s1d2, 0))
+       {
                TH_LOG("Failed to move mount: %s", strerror(errno));
        }
 
-       ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
-                               dir_s3d2, 0));
+       ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+                            dir_s3d2, 0));
        clear_cap(_metadata, CAP_SYS_ADMIN);
 
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
 
        set_cap(_metadata, CAP_SYS_ADMIN);
-       ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
-                               dir_s1d2, 0));
+       ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+                             dir_s1d2, 0));
        ASSERT_EQ(EPERM, errno);
        clear_cap(_metadata, CAP_SYS_ADMIN);
 }
@@ -1335,7 +1524,7 @@ TEST_F_FORK(layout1, release_inodes)
                        .path = dir_s3d3,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1362,7 +1551,7 @@ enum relative_access {
 };
 
 static void test_relative_path(struct __test_metadata *const _metadata,
-               const enum relative_access rel)
+                              const enum relative_access rel)
 {
        /*
         * Common layer to check that chroot doesn't ignore it (i.e. a chroot
@@ -1373,7 +1562,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
                        .path = TMP_DIR,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        const struct rule layer2_subs[] = {
                {
@@ -1384,7 +1573,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
                        .path = dir_s2d2,
                        .access = ACCESS_RO,
                },
-               {}
+               {},
        };
        int dirfd, ruleset_fd;
 
@@ -1425,14 +1614,16 @@ static void test_relative_path(struct __test_metadata *const _metadata,
                break;
        case REL_CHROOT_ONLY:
                /* Do chroot into dir_s1d2 (relative to dir_s2d2). */
-               ASSERT_EQ(0, chroot("../../s1d1/s1d2")) {
+               ASSERT_EQ(0, chroot("../../s1d1/s1d2"))
+               {
                        TH_LOG("Failed to chroot: %s", strerror(errno));
                }
                dirfd = AT_FDCWD;
                break;
        case REL_CHROOT_CHDIR:
                /* Do chroot into dir_s1d2. */
-               ASSERT_EQ(0, chroot(".")) {
+               ASSERT_EQ(0, chroot("."))
+               {
                        TH_LOG("Failed to chroot: %s", strerror(errno));
                }
                dirfd = AT_FDCWD;
@@ -1440,7 +1631,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
        }
 
        ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
-                       test_open_rel(dirfd, "..", O_RDONLY));
+                 test_open_rel(dirfd, "..", O_RDONLY));
        ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
 
        if (rel == REL_CHROOT_ONLY) {
@@ -1462,11 +1653,13 @@ static void test_relative_path(struct __test_metadata *const _metadata,
        if (rel != REL_CHROOT_CHDIR) {
                ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
                ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
-               ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY));
+               ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3",
+                                          O_RDONLY));
 
                ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
                ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
-               ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY));
+               ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3",
+                                          O_RDONLY));
        }
 
        if (rel == REL_OPEN)
@@ -1495,40 +1688,42 @@ TEST_F_FORK(layout1, relative_chroot_chdir)
 }
 
 static void copy_binary(struct __test_metadata *const _metadata,
-               const char *const dst_path)
+                       const char *const dst_path)
 {
        int dst_fd, src_fd;
        struct stat statbuf;
 
        dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
-       ASSERT_LE(0, dst_fd) {
-               TH_LOG("Failed to open \"%s\": %s", dst_path,
-                               strerror(errno));
+       ASSERT_LE(0, dst_fd)
+       {
+               TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
        }
        src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
-       ASSERT_LE(0, src_fd) {
+       ASSERT_LE(0, src_fd)
+       {
                TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
-                               strerror(errno));
+                      strerror(errno));
        }
        ASSERT_EQ(0, fstat(src_fd, &statbuf));
-       ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0,
-                               statbuf.st_size));
+       ASSERT_EQ(statbuf.st_size,
+                 sendfile(dst_fd, src_fd, 0, statbuf.st_size));
        ASSERT_EQ(0, close(src_fd));
        ASSERT_EQ(0, close(dst_fd));
 }
 
-static void test_execute(struct __test_metadata *const _metadata,
-               const int err, const char *const path)
+static void test_execute(struct __test_metadata *const _metadata, const int err,
+                        const char *const path)
 {
        int status;
-       char *const argv[] = {(char *)path, NULL};
+       char *const argv[] = { (char *)path, NULL };
        const pid_t child = fork();
 
        ASSERT_LE(0, child);
        if (child == 0) {
-               ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) {
+               ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL))
+               {
                        TH_LOG("Failed to execute \"%s\": %s", path,
-                                       strerror(errno));
+                              strerror(errno));
                };
                ASSERT_EQ(err, errno);
                _exit(_metadata->passed ? 2 : 1);
@@ -1536,9 +1731,10 @@ static void test_execute(struct __test_metadata *const _metadata,
        }
        ASSERT_EQ(child, waitpid(child, &status, 0));
        ASSERT_EQ(1, WIFEXITED(status));
-       ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) {
+       ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
+       {
                TH_LOG("Unexpected return code for \"%s\": %s", path,
-                               strerror(errno));
+                      strerror(errno));
        };
 }
 
@@ -1549,10 +1745,10 @@ TEST_F_FORK(layout1, execute)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_EXECUTE,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
 
        ASSERT_LE(0, ruleset_fd);
        copy_binary(_metadata, file1_s1d1);
@@ -1577,15 +1773,21 @@ TEST_F_FORK(layout1, execute)
 
 TEST_F_FORK(layout1, link)
 {
-       const struct rule rules[] = {
+       const struct rule layer1[] = {
                {
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_MAKE_REG,
                },
-               {}
+               {},
+       };
+       const struct rule layer2[] = {
+               {
+                       .path = dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+               },
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
 
        ASSERT_LE(0, ruleset_fd);
 
@@ -1598,14 +1800,30 @@ TEST_F_FORK(layout1, link)
 
        ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
        ASSERT_EQ(EACCES, errno);
+
        /* Denies linking because of reparenting. */
        ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
        ASSERT_EQ(EXDEV, errno);
        ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
        ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
 
        ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
        ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+
+       /* Prepares for next unlinks. */
+       ASSERT_EQ(0, unlink(file2_s1d2));
+       ASSERT_EQ(0, unlink(file2_s1d3));
+
+       ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       /* Checks that linkind doesn't require the ability to delete a file. */
+       ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+       ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
 }
 
 TEST_F_FORK(layout1, rename_file)
@@ -1619,14 +1837,13 @@ TEST_F_FORK(layout1, rename_file)
                        .path = dir_s2d2,
                        .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
 
        ASSERT_LE(0, ruleset_fd);
 
-       ASSERT_EQ(0, unlink(file1_s1d1));
        ASSERT_EQ(0, unlink(file1_s1d2));
 
        enforce_ruleset(_metadata, ruleset_fd);
@@ -1662,9 +1879,15 @@ TEST_F_FORK(layout1, rename_file)
        ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
                                RENAME_EXCHANGE));
        ASSERT_EQ(EACCES, errno);
+       /* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */
+       ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1));
+       ASSERT_EQ(EACCES, errno);
        ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
                                RENAME_EXCHANGE));
        ASSERT_EQ(EACCES, errno);
+       /* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */
+       ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+       ASSERT_EQ(EACCES, errno);
 
        /* Renames files with different parents. */
        ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
@@ -1675,14 +1898,14 @@ TEST_F_FORK(layout1, rename_file)
 
        /* Exchanges and renames files with same parent. */
        ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
-                               RENAME_EXCHANGE));
+                              RENAME_EXCHANGE));
        ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
 
        /* Exchanges files and directories with same parent, twice. */
        ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
-                               RENAME_EXCHANGE));
+                              RENAME_EXCHANGE));
        ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
-                               RENAME_EXCHANGE));
+                              RENAME_EXCHANGE));
 }
 
 TEST_F_FORK(layout1, rename_dir)
@@ -1696,10 +1919,10 @@ TEST_F_FORK(layout1, rename_dir)
                        .path = dir_s2d1,
                        .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
 
        ASSERT_LE(0, ruleset_fd);
 
@@ -1727,140 +1950,864 @@ TEST_F_FORK(layout1, rename_dir)
        ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
                                RENAME_EXCHANGE));
        ASSERT_EQ(EACCES, errno);
+       /* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */
+       ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1));
+       ASSERT_EQ(EACCES, errno);
        ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
                                RENAME_EXCHANGE));
        ASSERT_EQ(EACCES, errno);
+       /* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */
+       ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+       ASSERT_EQ(EACCES, errno);
 
        /*
         * Exchanges and renames directory to the same parent, which allows
         * directory removal.
         */
        ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
-                               RENAME_EXCHANGE));
+                              RENAME_EXCHANGE));
        ASSERT_EQ(0, unlink(dir_s1d3));
        ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
        ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
        ASSERT_EQ(0, rmdir(dir_s1d3));
 }
 
-TEST_F_FORK(layout1, remove_dir)
+TEST_F_FORK(layout1, reparent_refer)
 {
-       const struct rule rules[] = {
+       const struct rule layer1[] = {
                {
                        .path = dir_s1d2,
-                       .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = dir_s2d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       int ruleset_fd =
+               create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
 
        ASSERT_LE(0, ruleset_fd);
-
-       ASSERT_EQ(0, unlink(file1_s1d1));
-       ASSERT_EQ(0, unlink(file1_s1d2));
-       ASSERT_EQ(0, unlink(file1_s1d3));
-       ASSERT_EQ(0, unlink(file2_s1d3));
-
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
 
-       ASSERT_EQ(0, rmdir(dir_s1d3));
-       ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
-       ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+       ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d1));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d2));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+       ASSERT_EQ(EXDEV, errno);
 
-       /* dir_s1d2 itself cannot be removed. */
-       ASSERT_EQ(-1, rmdir(dir_s1d2));
-       ASSERT_EQ(EACCES, errno);
-       ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR));
-       ASSERT_EQ(EACCES, errno);
-       ASSERT_EQ(-1, rmdir(dir_s1d1));
-       ASSERT_EQ(EACCES, errno);
-       ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR));
-       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d1));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d2));
+       ASSERT_EQ(EXDEV, errno);
+       /*
+        * Moving should only be allowed when the source and the destination
+        * parent directory have REFER.
+        */
+       ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d3));
+       ASSERT_EQ(ENOTEMPTY, errno);
+       ASSERT_EQ(0, unlink(file1_s2d3));
+       ASSERT_EQ(0, unlink(file2_s2d3));
+       ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3));
 }
 
-TEST_F_FORK(layout1, remove_file)
+TEST_F_FORK(layout1, reparent_link)
 {
-       const struct rule rules[] = {
+       const struct rule layer1[] = {
                {
                        .path = dir_s1d2,
-                       .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+               },
+               {
+                       .path = dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = dir_s2d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = dir_s2d3,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_REG,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd = create_ruleset(
+               _metadata,
+               LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
 
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
        ASSERT_EQ(0, close(ruleset_fd));
 
-       ASSERT_EQ(-1, unlink(file1_s1d1));
+       ASSERT_EQ(0, unlink(file1_s1d1));
+       ASSERT_EQ(0, unlink(file1_s1d2));
+       ASSERT_EQ(0, unlink(file1_s1d3));
+
+       /* Denies linking because of missing MAKE_REG. */
+       ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
        ASSERT_EQ(EACCES, errno);
-       ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0));
+       /* Denies linking because of missing source and destination REFER. */
+       ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+       /* Denies linking because of missing source REFER. */
+       ASSERT_EQ(-1, link(file1_s2d1, file1_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Denies linking because of missing MAKE_REG. */
+       ASSERT_EQ(-1, link(file1_s2d2, file1_s1d1));
        ASSERT_EQ(EACCES, errno);
-       ASSERT_EQ(0, unlink(file1_s1d2));
-       ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0));
+       /* Denies linking because of missing destination REFER. */
+       ASSERT_EQ(-1, link(file1_s2d2, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Allows linking because of REFER and MAKE_REG. */
+       ASSERT_EQ(0, link(file1_s2d2, file1_s1d3));
+       ASSERT_EQ(0, unlink(file1_s2d2));
+       /* Reverse linking denied because of missing MAKE_REG. */
+       ASSERT_EQ(-1, link(file1_s1d3, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(0, unlink(file1_s2d3));
+       /* Checks reverse linking. */
+       ASSERT_EQ(0, link(file1_s1d3, file1_s2d3));
+       ASSERT_EQ(0, unlink(file1_s1d3));
+
+       /*
+        * This is OK for a file link, but it should not be allowed for a
+        * directory rename (because of the superset of access rights.
+        */
+       ASSERT_EQ(0, link(file1_s2d3, file1_s1d3));
+       ASSERT_EQ(0, unlink(file1_s1d3));
+
+       ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+       ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
 }
 
-static void test_make_file(struct __test_metadata *const _metadata,
-               const __u64 access, const mode_t mode, const dev_t dev)
+TEST_F_FORK(layout1, reparent_rename)
 {
-       const struct rule rules[] = {
+       /* Same rules as for reparent_link. */
+       const struct rule layer1[] = {
                {
                        .path = dir_s1d2,
-                       .access = access,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+               },
+               {
+                       .path = dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = dir_s2d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = dir_s2d3,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_REG,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, access, rules);
+       const int ruleset_fd = create_ruleset(
+               _metadata,
+               LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
 
        ASSERT_LE(0, ruleset_fd);
-
-       ASSERT_EQ(0, unlink(file1_s1d1));
-       ASSERT_EQ(0, unlink(file2_s1d1));
-       ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) {
-               TH_LOG("Failed to make file \"%s\": %s",
-                               file2_s1d1, strerror(errno));
-       };
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
 
        ASSERT_EQ(0, unlink(file1_s1d2));
-       ASSERT_EQ(0, unlink(file2_s1d2));
-
        ASSERT_EQ(0, unlink(file1_s1d3));
-       ASSERT_EQ(0, unlink(file2_s1d3));
 
-       enforce_ruleset(_metadata, ruleset_fd);
-       ASSERT_EQ(0, close(ruleset_fd));
-
-       ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev));
+       /* Denies renaming because of missing MAKE_REG. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s1d1,
+                               RENAME_EXCHANGE));
        ASSERT_EQ(EACCES, errno);
-       ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1,
+                               RENAME_EXCHANGE));
        ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(0, unlink(file1_s1d1));
        ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
        ASSERT_EQ(EACCES, errno);
+       /* Even denies same file exchange. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file2_s1d1,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
 
-       ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) {
-               TH_LOG("Failed to make file \"%s\": %s",
-                               file1_s1d2, strerror(errno));
-       };
-       ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
-       ASSERT_EQ(0, unlink(file2_s1d2));
-       ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
-
-       ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev));
-       ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
-       ASSERT_EQ(0, unlink(file2_s1d3));
-       ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
-}
+       /* Denies renaming because of missing source and destination REFER. */
+       ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+       /*
+        * Denies renaming because of missing MAKE_REG, source and destination
+        * REFER.
+        */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d1,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s2d1,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* Denies renaming because of missing source REFER. */
+       ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+       /* Denies renaming because of missing MAKE_REG. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* Denies renaming because of missing MAKE_REG. */
+       ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d1));
+       ASSERT_EQ(EACCES, errno);
+       /* Denies renaming because of missing destination REFER*/
+       ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Denies exchange because of one missing MAKE_REG. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, file2_s1d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       /* Allows renaming because of REFER and MAKE_REG. */
+       ASSERT_EQ(0, rename(file1_s2d2, file1_s1d3));
+
+       /* Reverse renaming denied because of missing MAKE_REG. */
+       ASSERT_EQ(-1, rename(file1_s1d3, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(0, unlink(file1_s2d3));
+       ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+       /* Tests reverse renaming. */
+       ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+       ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s1d3,
+                              RENAME_EXCHANGE));
+       ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+       /*
+        * This is OK for a file rename, but it should not be allowed for a
+        * directory rename (because of the superset of access rights).
+        */
+       ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+       ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+       /*
+        * Tests superset restrictions applied to directories.  Not only the
+        * dir_s2d3's parent (dir_s2d2) should be taken into account but also
+        * access rights tied to dir_s2d3. dir_s2d2 is missing one access right
+        * compared to dir_s1d3/file1_s1d3 (MAKE_REG) but it is provided
+        * directly by the moved dir_s2d3.
+        */
+       ASSERT_EQ(0, rename(dir_s2d3, file1_s1d3));
+       ASSERT_EQ(0, rename(file1_s1d3, dir_s2d3));
+       /*
+        * The first rename is allowed but not the exchange because dir_s1d3's
+        * parent (dir_s1d2) doesn't have REFER.
+        */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, rename(file1_s2d3, dir_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+
+       ASSERT_EQ(-1, rename(file2_s1d2, file1_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, rename(file2_s1d3, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Renaming in the same directory is always allowed. */
+       ASSERT_EQ(0, rename(file2_s1d2, file1_s1d2));
+       ASSERT_EQ(0, rename(file2_s1d3, file1_s1d3));
+
+       ASSERT_EQ(0, unlink(file1_s1d2));
+       /* Denies because of missing source MAKE_REG and destination REFER. */
+       ASSERT_EQ(-1, rename(dir_s2d3, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       ASSERT_EQ(0, unlink(file1_s1d3));
+       /* Denies because of missing source MAKE_REG and REFER. */
+       ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+}
+
+static void
+reparent_exdev_layers_enforce1(struct __test_metadata *const _metadata)
+{
+       const struct rule layer1[] = {
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       /* Interesting for the layer2 tests. */
+                       .path = dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+               },
+               {
+                       .path = dir_s2d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = dir_s2d3,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+               },
+               {},
+       };
+       const int ruleset_fd = create_ruleset(
+               _metadata,
+               LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+}
+
+static void
+reparent_exdev_layers_enforce2(struct __test_metadata *const _metadata)
+{
+       const struct rule layer2[] = {
+               {
+                       .path = dir_s2d3,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+               },
+               {},
+       };
+       /*
+        * Same checks as before but with a second layer and a new MAKE_DIR
+        * rule (and no explicit handling of REFER).
+        */
+       const int ruleset_fd =
+               create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_DIR, layer2);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename1)
+{
+       ASSERT_EQ(0, unlink(file1_s2d2));
+       ASSERT_EQ(0, unlink(file1_s2d3));
+
+       reparent_exdev_layers_enforce1(_metadata);
+
+       /*
+        * Moving the dir_s1d3 directory below dir_s2d2 is allowed by Landlock
+        * because it doesn't inherit new access rights.
+        */
+       ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+       ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+       /*
+        * Moving the dir_s1d3 directory below dir_s2d3 is allowed, even if it
+        * gets a new inherited access rights (MAKE_REG), because MAKE_REG is
+        * already allowed for dir_s1d3.
+        */
+       ASSERT_EQ(0, rename(dir_s1d3, file1_s2d3));
+       ASSERT_EQ(0, rename(file1_s2d3, dir_s1d3));
+
+       /*
+        * However, moving the file1_s1d3 file below dir_s2d3 is allowed
+        * because it cannot inherit MAKE_REG right (which is dedicated to
+        * directories).
+        */
+       ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+       reparent_exdev_layers_enforce2(_metadata);
+
+       /*
+        * Moving the dir_s1d3 directory below dir_s2d2 is now denied because
+        * MAKE_DIR is not tied to dir_s2d2.
+        */
+       ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+
+       /*
+        * Moving the dir_s1d3 directory below dir_s2d3 is forbidden because it
+        * would grants MAKE_REG and MAKE_DIR rights to it.
+        */
+       ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+
+       /*
+        * However, moving the file2_s1d3 file below dir_s2d3 is allowed
+        * because it cannot inherit MAKE_REG nor MAKE_DIR rights (which are
+        * dedicated to directories).
+        */
+       ASSERT_EQ(0, rename(file2_s1d3, file1_s2d3));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
+{
+       reparent_exdev_layers_enforce1(_metadata);
+
+       /* Checks EACCES predominance over EXDEV. */
+       ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+       /* Modify layout! */
+       ASSERT_EQ(0, rename(file1_s1d2, file1_s2d3));
+
+       /* Without REFER source. */
+       ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       reparent_exdev_layers_enforce2(_metadata);
+
+       /* Checks EACCES predominance over EXDEV. */
+       ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       /* Checks with actual file2_s1d2. */
+       ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+       /* Modify layout! */
+       ASSERT_EQ(0, rename(file2_s1d2, file1_s2d3));
+
+       /* Without REFER source, EACCES wins over EXDEV. */
+       ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+       ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange1)
+{
+       const char *const dir_file1_s1d2 = file1_s1d2, *const dir_file2_s2d3 =
+                                                              file2_s2d3;
+
+       ASSERT_EQ(0, unlink(file1_s1d2));
+       ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+       ASSERT_EQ(0, unlink(file2_s2d3));
+       ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+       reparent_exdev_layers_enforce1(_metadata);
+
+       /* Error predominance with file exchange: returns EXDEV and EACCES. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /*
+        * Checks with directories which creation could be allowed, but denied
+        * because of access rights that would be inherited.
+        */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+                               dir_file2_s2d3, RENAME_EXCHANGE));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+                               dir_file1_s1d2, RENAME_EXCHANGE));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Checks with same access rights. */
+       ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+                              RENAME_EXCHANGE));
+       ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+                              RENAME_EXCHANGE));
+
+       /* Checks with different (child-only) access rights. */
+       ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+                              RENAME_EXCHANGE));
+       ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+                              RENAME_EXCHANGE));
+
+       /*
+        * Checks that exchange between file and directory are consistent.
+        *
+        * Moving a file (file1_s2d2) to a directory which only grants more
+        * directory-related access rights is allowed, and at the same time
+        * moving a directory (dir_file2_s2d3) to another directory which
+        * grants less access rights is allowed too.
+        *
+        * See layout1.reparent_exdev_layers_exchange3 for inverted arguments.
+        */
+       ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+                              RENAME_EXCHANGE));
+       /*
+        * However, moving back the directory is denied because it would get
+        * more access rights than the current state and because file creation
+        * is forbidden (in dir_s2d2).
+        */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       reparent_exdev_layers_enforce2(_metadata);
+
+       /* Error predominance with file exchange: returns EXDEV and EACCES. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* Checks with directories which creation is now denied. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+                               dir_file2_s2d3, RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+                               dir_file1_s1d2, RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* Checks with different (child-only) access rights. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+                               RENAME_EXCHANGE));
+       /* Denied because of MAKE_DIR. */
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* Checks with different (child-only) access rights. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+                               RENAME_EXCHANGE));
+       /* Denied because of MAKE_DIR. */
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* See layout1.reparent_exdev_layers_exchange2 for complement. */
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange2)
+{
+       const char *const dir_file2_s2d3 = file2_s2d3;
+
+       ASSERT_EQ(0, unlink(file2_s2d3));
+       ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+       reparent_exdev_layers_enforce1(_metadata);
+       reparent_exdev_layers_enforce2(_metadata);
+
+       /* Checks that exchange between file and directory are consistent. */
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange3)
+{
+       const char *const dir_file2_s2d3 = file2_s2d3;
+
+       ASSERT_EQ(0, unlink(file2_s2d3));
+       ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+       reparent_exdev_layers_enforce1(_metadata);
+
+       /*
+        * Checks that exchange between file and directory are consistent,
+        * including with inverted arguments (see
+        * layout1.reparent_exdev_layers_exchange1).
+        */
+       ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+                              RENAME_EXCHANGE));
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_remove)
+{
+       const struct rule layer1[] = {
+               {
+                       .path = dir_s1d1,
+                       .access = LANDLOCK_ACCESS_FS_REFER |
+                                 LANDLOCK_ACCESS_FS_REMOVE_DIR,
+               },
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+               },
+               {
+                       .path = dir_s2d1,
+                       .access = LANDLOCK_ACCESS_FS_REFER |
+                                 LANDLOCK_ACCESS_FS_REMOVE_FILE,
+               },
+               {},
+       };
+       const int ruleset_fd = create_ruleset(
+               _metadata,
+               LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_REMOVE_DIR |
+                       LANDLOCK_ACCESS_FS_REMOVE_FILE,
+               layer1);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       /* Access denied because of wrong/swapped remove file/dir. */
+       ASSERT_EQ(-1, rename(file1_s1d1, dir_s2d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d1));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d2,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+
+       /* Access allowed thanks to the matching rights. */
+       ASSERT_EQ(-1, rename(file1_s2d1, dir_s1d2));
+       ASSERT_EQ(EISDIR, errno);
+       ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d1));
+       ASSERT_EQ(ENOTDIR, errno);
+       ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+       ASSERT_EQ(ENOTDIR, errno);
+       ASSERT_EQ(0, unlink(file1_s2d1));
+       ASSERT_EQ(0, unlink(file1_s1d3));
+       ASSERT_EQ(0, unlink(file2_s1d3));
+       ASSERT_EQ(0, rename(dir_s1d3, file1_s2d1));
+
+       /* Effectively removes a file and a directory by exchanging them. */
+       ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+       ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+                              RENAME_EXCHANGE));
+       ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+                               RENAME_EXCHANGE));
+       ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_dom_superset)
+{
+       const struct rule layer1[] = {
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = file1_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_EXECUTE,
+               },
+               {
+                       .path = dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_MAKE_SOCK |
+                                 LANDLOCK_ACCESS_FS_EXECUTE,
+               },
+               {
+                       .path = dir_s2d2,
+                       .access = LANDLOCK_ACCESS_FS_REFER |
+                                 LANDLOCK_ACCESS_FS_EXECUTE |
+                                 LANDLOCK_ACCESS_FS_MAKE_SOCK,
+               },
+               {
+                       .path = dir_s2d3,
+                       .access = LANDLOCK_ACCESS_FS_READ_FILE |
+                                 LANDLOCK_ACCESS_FS_MAKE_FIFO,
+               },
+               {},
+       };
+       int ruleset_fd = create_ruleset(_metadata,
+                                       LANDLOCK_ACCESS_FS_REFER |
+                                               LANDLOCK_ACCESS_FS_EXECUTE |
+                                               LANDLOCK_ACCESS_FS_MAKE_SOCK |
+                                               LANDLOCK_ACCESS_FS_READ_FILE |
+                                               LANDLOCK_ACCESS_FS_MAKE_FIFO,
+                                       layer1);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d1));
+       ASSERT_EQ(EXDEV, errno);
+       /*
+        * Moving file1_s1d2 beneath dir_s2d3 would grant it the READ_FILE
+        * access right.
+        */
+       ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+       /*
+        * Moving file1_s1d2 should be allowed even if dir_s2d2 grants a
+        * superset of access rights compared to dir_s1d2, because file1_s1d2
+        * already has these access rights anyway.
+        */
+       ASSERT_EQ(0, rename(file1_s1d2, file1_s2d2));
+       ASSERT_EQ(0, rename(file1_s2d2, file1_s1d2));
+
+       ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+       ASSERT_EQ(EXDEV, errno);
+       /*
+        * Moving dir_s1d3 beneath dir_s2d3 would grant it the MAKE_FIFO access
+        * right.
+        */
+       ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+       /*
+        * Moving dir_s1d3 should be allowed even if dir_s2d2 grants a superset
+        * of access rights compared to dir_s1d2, because dir_s1d3 already has
+        * these access rights anyway.
+        */
+       ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+       ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+       /*
+        * Moving file1_s2d3 beneath dir_s1d2 is allowed, but moving it back
+        * will be denied because the new inherited access rights from dir_s1d2
+        * will be less than the destination (original) dir_s2d3.  This is a
+        * sinkhole scenario where we cannot move back files or directories.
+        */
+       ASSERT_EQ(0, rename(file1_s2d3, file2_s1d2));
+       ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+       ASSERT_EQ(0, unlink(file2_s1d2));
+       ASSERT_EQ(0, unlink(file2_s2d3));
+       /*
+        * Checks similar directory one-way move: dir_s2d3 loses EXECUTE and
+        * MAKE_SOCK which were inherited from dir_s1d3.
+        */
+       ASSERT_EQ(0, rename(dir_s2d3, file2_s1d2));
+       ASSERT_EQ(-1, rename(file2_s1d2, dir_s2d3));
+       ASSERT_EQ(EXDEV, errno);
+}
+
+TEST_F_FORK(layout1, remove_dir)
+{
+       const struct rule rules[] = {
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+               },
+               {},
+       };
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
+
+       ASSERT_LE(0, ruleset_fd);
+
+       ASSERT_EQ(0, unlink(file1_s1d1));
+       ASSERT_EQ(0, unlink(file1_s1d2));
+       ASSERT_EQ(0, unlink(file1_s1d3));
+       ASSERT_EQ(0, unlink(file2_s1d3));
+
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       ASSERT_EQ(0, rmdir(dir_s1d3));
+       ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+       ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+
+       /* dir_s1d2 itself cannot be removed. */
+       ASSERT_EQ(-1, rmdir(dir_s1d2));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rmdir(dir_s1d1));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR));
+       ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, remove_file)
+{
+       const struct rule rules[] = {
+               {
+                       .path = dir_s1d2,
+                       .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+               },
+               {},
+       };
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       ASSERT_EQ(-1, unlink(file1_s1d1));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(0, unlink(file1_s1d2));
+       ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0));
+}
+
+static void test_make_file(struct __test_metadata *const _metadata,
+                          const __u64 access, const mode_t mode,
+                          const dev_t dev)
+{
+       const struct rule rules[] = {
+               {
+                       .path = dir_s1d2,
+                       .access = access,
+               },
+               {},
+       };
+       const int ruleset_fd = create_ruleset(_metadata, access, rules);
+
+       ASSERT_LE(0, ruleset_fd);
+
+       ASSERT_EQ(0, unlink(file1_s1d1));
+       ASSERT_EQ(0, unlink(file2_s1d1));
+       ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev))
+       {
+               TH_LOG("Failed to make file \"%s\": %s", file2_s1d1,
+                      strerror(errno));
+       };
+
+       ASSERT_EQ(0, unlink(file1_s1d2));
+       ASSERT_EQ(0, unlink(file2_s1d2));
+
+       ASSERT_EQ(0, unlink(file1_s1d3));
+       ASSERT_EQ(0, unlink(file2_s1d3));
+
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+       ASSERT_EQ(EACCES, errno);
+       ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+       ASSERT_EQ(EACCES, errno);
+
+       ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev))
+       {
+               TH_LOG("Failed to make file \"%s\": %s", file1_s1d2,
+                      strerror(errno));
+       };
+       ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+       ASSERT_EQ(0, unlink(file2_s1d2));
+       ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
+
+       ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev));
+       ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+       ASSERT_EQ(0, unlink(file2_s1d3));
+       ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
+}
 
 TEST_F_FORK(layout1, make_char)
 {
        /* Creates a /dev/null device. */
        set_cap(_metadata, CAP_MKNOD);
        test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
-                       makedev(1, 3));
+                      makedev(1, 3));
 }
 
 TEST_F_FORK(layout1, make_block)
@@ -1868,7 +2815,7 @@ TEST_F_FORK(layout1, make_block)
        /* Creates a /dev/loop0 device. */
        set_cap(_metadata, CAP_MKNOD);
        test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
-                       makedev(7, 0));
+                      makedev(7, 0));
 }
 
 TEST_F_FORK(layout1, make_reg_1)
@@ -1898,10 +2845,10 @@ TEST_F_FORK(layout1, make_sym)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_MAKE_SYM,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
 
        ASSERT_LE(0, ruleset_fd);
 
@@ -1943,10 +2890,10 @@ TEST_F_FORK(layout1, make_dir)
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
                },
-               {}
+               {},
        };
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
 
        ASSERT_LE(0, ruleset_fd);
 
@@ -1965,12 +2912,12 @@ TEST_F_FORK(layout1, make_dir)
 }
 
 static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
-               const int open_flags)
+                       const int open_flags)
 {
        static const char path_template[] = "/proc/self/fd/%d";
        char procfd_path[sizeof(path_template) + 10];
-       const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
-                       path_template, fd);
+       const int procfd_path_size =
+               snprintf(procfd_path, sizeof(procfd_path), path_template, fd);
 
        ASSERT_LT(procfd_path_size, sizeof(procfd_path));
        return open(procfd_path, open_flags);
@@ -1983,12 +2930,13 @@ TEST_F_FORK(layout1, proc_unlinked_file)
                        .path = file1_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        int reg_fd, proc_fd;
-       const int ruleset_fd = create_ruleset(_metadata,
-                       LANDLOCK_ACCESS_FS_READ_FILE |
-                       LANDLOCK_ACCESS_FS_WRITE_FILE, rules);
+       const int ruleset_fd = create_ruleset(
+               _metadata,
+               LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE,
+               rules);
 
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
@@ -2005,9 +2953,10 @@ TEST_F_FORK(layout1, proc_unlinked_file)
        ASSERT_EQ(0, close(proc_fd));
 
        proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
-       ASSERT_EQ(-1, proc_fd) {
-               TH_LOG("Successfully opened /proc/self/fd/%d: %s",
-                               reg_fd, strerror(errno));
+       ASSERT_EQ(-1, proc_fd)
+       {
+               TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd,
+                      strerror(errno));
        }
        ASSERT_EQ(EACCES, errno);
 
@@ -2023,13 +2972,13 @@ TEST_F_FORK(layout1, proc_pipe)
                {
                        .path = dir_s1d2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        /* Limits read and write access to files tied to the filesystem. */
-       const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-                       rules);
+       const int ruleset_fd =
+               create_ruleset(_metadata, rules[0].access, rules);
 
        ASSERT_LE(0, ruleset_fd);
        enforce_ruleset(_metadata, ruleset_fd);
@@ -2041,7 +2990,8 @@ TEST_F_FORK(layout1, proc_pipe)
 
        /* Checks access to pipes through FD. */
        ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
-       ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) {
+       ASSERT_EQ(1, write(pipe_fds[1], ".", 1))
+       {
                TH_LOG("Failed to write in pipe: %s", strerror(errno));
        }
        ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
@@ -2050,9 +3000,10 @@ TEST_F_FORK(layout1, proc_pipe)
        /* Checks write access to pipe through /proc/self/fd . */
        proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
        ASSERT_LE(0, proc_fd);
-       ASSERT_EQ(1, write(proc_fd, ".", 1)) {
+       ASSERT_EQ(1, write(proc_fd, ".", 1))
+       {
                TH_LOG("Failed to write through /proc/self/fd/%d: %s",
-                               pipe_fds[1], strerror(errno));
+                      pipe_fds[1], strerror(errno));
        }
        ASSERT_EQ(0, close(proc_fd));
 
@@ -2060,9 +3011,10 @@ TEST_F_FORK(layout1, proc_pipe)
        proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
        ASSERT_LE(0, proc_fd);
        buf = '\0';
-       ASSERT_EQ(1, read(proc_fd, &buf, 1)) {
+       ASSERT_EQ(1, read(proc_fd, &buf, 1))
+       {
                TH_LOG("Failed to read through /proc/self/fd/%d: %s",
-                               pipe_fds[1], strerror(errno));
+                      pipe_fds[1], strerror(errno));
        }
        ASSERT_EQ(0, close(proc_fd));
 
@@ -2070,8 +3022,9 @@ TEST_F_FORK(layout1, proc_pipe)
        ASSERT_EQ(0, close(pipe_fds[1]));
 }
 
-FIXTURE(layout1_bind) {
-};
+/* clang-format off */
+FIXTURE(layout1_bind) {};
+/* clang-format on */
 
 FIXTURE_SETUP(layout1_bind)
 {
@@ -2161,7 +3114,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
                        .path = dir_s2d1,
                        .access = ACCESS_RW,
                },
-               {}
+               {},
        };
        /*
         * Sets access rights on the same bind-mounted directories.  The result
@@ -2177,7 +3130,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
                        .path = dir_s2d2,
                        .access = ACCESS_RW,
                },
-               {}
+               {},
        };
        /* Only allow read-access to the s1d3 hierarchies. */
        const struct rule layer3_source[] = {
@@ -2185,7 +3138,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
                        .path = dir_s1d3,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE,
                },
-               {}
+               {},
        };
        /* Removes all access rights. */
        const struct rule layer4_destination[] = {
@@ -2193,7 +3146,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
                        .path = bind_file1_s1d3,
                        .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        int ruleset_fd;
 
@@ -2282,8 +3235,46 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
        ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
 }
 
-#define LOWER_BASE     TMP_DIR "/lower"
-#define LOWER_DATA     LOWER_BASE "/data"
+TEST_F_FORK(layout1_bind, reparent_cross_mount)
+{
+       const struct rule layer1[] = {
+               {
+                       /* dir_s2d1 is beneath the dir_s2d2 mount point. */
+                       .path = dir_s2d1,
+                       .access = LANDLOCK_ACCESS_FS_REFER,
+               },
+               {
+                       .path = bind_dir_s1d3,
+                       .access = LANDLOCK_ACCESS_FS_EXECUTE,
+               },
+               {},
+       };
+       int ruleset_fd = create_ruleset(
+               _metadata,
+               LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_EXECUTE, layer1);
+
+       ASSERT_LE(0, ruleset_fd);
+       enforce_ruleset(_metadata, ruleset_fd);
+       ASSERT_EQ(0, close(ruleset_fd));
+
+       /* Checks basic denied move. */
+       ASSERT_EQ(-1, rename(file1_s1d1, file1_s1d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Checks real cross-mount move (Landlock is not involved). */
+       ASSERT_EQ(-1, rename(file1_s2d1, file1_s2d2));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Checks move that will give more accesses. */
+       ASSERT_EQ(-1, rename(file1_s2d2, bind_file1_s1d3));
+       ASSERT_EQ(EXDEV, errno);
+
+       /* Checks legitimate downgrade move. */
+       ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2));
+}
+
+#define LOWER_BASE TMP_DIR "/lower"
+#define LOWER_DATA LOWER_BASE "/data"
 static const char lower_fl1[] = LOWER_DATA "/fl1";
 static const char lower_dl1[] = LOWER_DATA "/dl1";
 static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
@@ -2295,23 +3286,23 @@ static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
 static const char (*lower_base_files[])[] = {
        &lower_fl1,
        &lower_fo1,
-       NULL
+       NULL,
 };
 static const char (*lower_base_directories[])[] = {
        &lower_dl1,
        &lower_do1,
-       NULL
+       NULL,
 };
 static const char (*lower_sub_files[])[] = {
        &lower_dl1_fl2,
        &lower_do1_fo2,
        &lower_do1_fl3,
-       NULL
+       NULL,
 };
 
-#define UPPER_BASE     TMP_DIR "/upper"
-#define UPPER_DATA     UPPER_BASE "/data"
-#define UPPER_WORK     UPPER_BASE "/work"
+#define UPPER_BASE TMP_DIR "/upper"
+#define UPPER_DATA UPPER_BASE "/data"
+#define UPPER_WORK UPPER_BASE "/work"
 static const char upper_fu1[] = UPPER_DATA "/fu1";
 static const char upper_du1[] = UPPER_DATA "/du1";
 static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
@@ -2323,22 +3314,22 @@ static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
 static const char (*upper_base_files[])[] = {
        &upper_fu1,
        &upper_fo1,
-       NULL
+       NULL,
 };
 static const char (*upper_base_directories[])[] = {
        &upper_du1,
        &upper_do1,
-       NULL
+       NULL,
 };
 static const char (*upper_sub_files[])[] = {
        &upper_du1_fu2,
        &upper_do1_fo2,
        &upper_do1_fu3,
-       NULL
+       NULL,
 };
 
-#define MERGE_BASE     TMP_DIR "/merge"
-#define MERGE_DATA     MERGE_BASE "/data"
+#define MERGE_BASE TMP_DIR "/merge"
+#define MERGE_DATA MERGE_BASE "/data"
 static const char merge_fl1[] = MERGE_DATA "/fl1";
 static const char merge_dl1[] = MERGE_DATA "/dl1";
 static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
@@ -2355,21 +3346,17 @@ static const char (*merge_base_files[])[] = {
        &merge_fl1,
        &merge_fu1,
        &merge_fo1,
-       NULL
+       NULL,
 };
 static const char (*merge_base_directories[])[] = {
        &merge_dl1,
        &merge_du1,
        &merge_do1,
-       NULL
+       NULL,
 };
 static const char (*merge_sub_files[])[] = {
-       &merge_dl1_fl2,
-       &merge_du1_fu2,
-       &merge_do1_fo2,
-       &merge_do1_fl3,
-       &merge_do1_fu3,
-       NULL
+       &merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2,
+       &merge_do1_fl3, &merge_do1_fu3, NULL,
 };
 
 /*
@@ -2411,8 +3398,9 @@ static const char (*merge_sub_files[])[] = {
  *         └── work
  */
 
-FIXTURE(layout2_overlay) {
-};
+/* clang-format off */
+FIXTURE(layout2_overlay) {};
+/* clang-format on */
 
 FIXTURE_SETUP(layout2_overlay)
 {
@@ -2444,9 +3432,8 @@ FIXTURE_SETUP(layout2_overlay)
        set_cap(_metadata, CAP_SYS_ADMIN);
        set_cap(_metadata, CAP_DAC_OVERRIDE);
        ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
-                               "lowerdir=" LOWER_DATA
-                               ",upperdir=" UPPER_DATA
-                               ",workdir=" UPPER_WORK));
+                          "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA
+                          ",workdir=" UPPER_WORK));
        clear_cap(_metadata, CAP_DAC_OVERRIDE);
        clear_cap(_metadata, CAP_SYS_ADMIN);
 }
@@ -2513,9 +3500,9 @@ TEST_F_FORK(layout2_overlay, no_restriction)
        ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
 }
 
-#define for_each_path(path_list, path_entry, i)                        \
-       for (i = 0, path_entry = *path_list[i]; path_list[i];   \
-                       path_entry = *path_list[++i])
+#define for_each_path(path_list, path_entry, i)               \
+       for (i = 0, path_entry = *path_list[i]; path_list[i]; \
+            path_entry = *path_list[++i])
 
 TEST_F_FORK(layout2_overlay, same_content_different_file)
 {
@@ -2533,7 +3520,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                        .path = MERGE_BASE,
                        .access = ACCESS_RW,
                },
-               {}
+               {},
        };
        const struct rule layer2_data[] = {
                {
@@ -2548,7 +3535,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                        .path = MERGE_DATA,
                        .access = ACCESS_RW,
                },
-               {}
+               {},
        };
        /* Sets access right on directories inside both layers. */
        const struct rule layer3_subdirs[] = {
@@ -2580,7 +3567,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                        .path = merge_do1,
                        .access = ACCESS_RW,
                },
-               {}
+               {},
        };
        /* Tighten access rights to the files. */
        const struct rule layer4_files[] = {
@@ -2611,37 +3598,37 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                {
                        .path = merge_dl1_fl2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
                {
                        .path = merge_du1_fu2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
                {
                        .path = merge_do1_fo2,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
                {
                        .path = merge_do1_fl3,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
                {
                        .path = merge_do1_fu3,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        const struct rule layer5_merge_only[] = {
                {
                        .path = MERGE_DATA,
                        .access = LANDLOCK_ACCESS_FS_READ_FILE |
-                               LANDLOCK_ACCESS_FS_WRITE_FILE,
+                                 LANDLOCK_ACCESS_FS_WRITE_FILE,
                },
-               {}
+               {},
        };
        int ruleset_fd;
        size_t i;
@@ -2659,7 +3646,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
        }
        for_each_path(lower_base_directories, path_entry, i) {
-               ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+               ASSERT_EQ(EACCES,
+                         test_open(path_entry, O_RDONLY | O_DIRECTORY));
        }
        for_each_path(lower_sub_files, path_entry, i) {
                ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
@@ -2671,7 +3659,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
        }
        for_each_path(upper_base_directories, path_entry, i) {
-               ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+               ASSERT_EQ(EACCES,
+                         test_open(path_entry, O_RDONLY | O_DIRECTORY));
        }
        for_each_path(upper_sub_files, path_entry, i) {
                ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
@@ -2756,7 +3745,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
        }
        for_each_path(merge_base_directories, path_entry, i) {
-               ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+               ASSERT_EQ(EACCES,
+                         test_open(path_entry, O_RDONLY | O_DIRECTORY));
        }
        for_each_path(merge_sub_files, path_entry, i) {
                ASSERT_EQ(0, test_open(path_entry, O_RDWR));
@@ -2781,7 +3771,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
                ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
        }
        for_each_path(merge_base_directories, path_entry, i) {
-               ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+               ASSERT_EQ(EACCES,
+                         test_open(path_entry, O_RDONLY | O_DIRECTORY));
        }
        for_each_path(merge_sub_files, path_entry, i) {
                ASSERT_EQ(0, test_open(path_entry, O_RDWR));
index 15fbef9cc84962038ede217ea1b06d25073dd41e..c28ef98ff3ac10f24770cecb62ec3d2528229f49 100644 (file)
@@ -26,9 +26,10 @@ static void create_domain(struct __test_metadata *const _metadata)
                .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
        };
 
-       ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-                       sizeof(ruleset_attr), 0);
-       EXPECT_LE(0, ruleset_fd) {
+       ruleset_fd =
+               landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+       EXPECT_LE(0, ruleset_fd)
+       {
                TH_LOG("Failed to create a ruleset: %s", strerror(errno));
        }
        EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
@@ -43,7 +44,7 @@ static int test_ptrace_read(const pid_t pid)
        int procenv_path_size, fd;
 
        procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
-                       path_template, pid);
+                                    path_template, pid);
        if (procenv_path_size >= sizeof(procenv_path))
                return E2BIG;
 
@@ -59,9 +60,12 @@ static int test_ptrace_read(const pid_t pid)
        return 0;
 }
 
-FIXTURE(hierarchy) { };
+/* clang-format off */
+FIXTURE(hierarchy) {};
+/* clang-format on */
 
-FIXTURE_VARIANT(hierarchy) {
+FIXTURE_VARIANT(hierarchy)
+{
        const bool domain_both;
        const bool domain_parent;
        const bool domain_child;
@@ -83,7 +87,9 @@ FIXTURE_VARIANT(hierarchy) {
  *       \              P2 -> P1 : allow
  *        'P2
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+       /* clang-format on */
        .domain_both = false,
        .domain_parent = false,
        .domain_child = false,
@@ -98,7 +104,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
  *        |  P2  |
  *        '------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+       /* clang-format on */
        .domain_both = false,
        .domain_parent = false,
        .domain_child = true,
@@ -112,7 +120,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
  *            '
  *            P2
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+       /* clang-format on */
        .domain_both = false,
        .domain_parent = true,
        .domain_child = false,
@@ -127,7 +137,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
  *         |  P2  |
  *         '------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+       /* clang-format on */
        .domain_both = false,
        .domain_parent = true,
        .domain_child = true,
@@ -142,7 +154,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
  * |         P2  |
  * '-------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+       /* clang-format on */
        .domain_both = true,
        .domain_parent = false,
        .domain_child = false,
@@ -158,7 +172,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
  * |        '------' |
  * '-----------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+       /* clang-format on */
        .domain_both = true,
        .domain_parent = false,
        .domain_child = true,
@@ -174,7 +190,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
  * |             P2  |
  * '-----------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+       /* clang-format on */
        .domain_both = true,
        .domain_parent = true,
        .domain_child = false,
@@ -192,17 +210,21 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
  * |        '------' |
  * '-----------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
+       /* clang-format on */
        .domain_both = true,
        .domain_parent = true,
        .domain_child = true,
 };
 
 FIXTURE_SETUP(hierarchy)
-{ }
+{
+}
 
 FIXTURE_TEARDOWN(hierarchy)
-{ }
+{
+}
 
 /* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
 TEST_F(hierarchy, trace)
@@ -330,7 +352,7 @@ TEST_F(hierarchy, trace)
        ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
        ASSERT_EQ(child, waitpid(child, &status, 0));
        if (WIFSIGNALED(status) || !WIFEXITED(status) ||
-                       WEXITSTATUS(status) != EXIT_SUCCESS)
+           WEXITSTATUS(status) != EXIT_SUCCESS)
                _metadata->passed = 0;
 }
 
index 0f2ebc38d89347be95baf63ed7a6c73805083d31..e1f998defd10745a916a125ced0a405d5e0f074c 100644 (file)
@@ -25,6 +25,7 @@ TEST_PROGS += bareudp.sh
 TEST_PROGS += amt.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
 TEST_PROGS += veth.sh
 TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
@@ -61,6 +62,8 @@ TEST_FILES := settings
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
+include bpf/Makefile
+
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
new file mode 100644 (file)
index 0000000..f91bf14
--- /dev/null
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CLANG ?= clang
+CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../../../usr/include/
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
+all: $(TEST_CUSTOM_PROGS)
+
+$(OUTPUT)/%.o: %.c
+       $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
+
+clean:
+       rm -f $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c
new file mode 100644 (file)
index 0000000..ac54c36
--- /dev/null
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+       const int l2_header_size =  sizeof(struct ethhdr);
+       void *data = (void *)(long)skb->data;
+       const void *data_end = (void *)(long)skb->data_end;
+       const struct ethhdr * const eth = data;  // used iff is_ethernet
+       const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
+
+       // Require ethernet dst mac address to be our unicast address.
+       if  (skb->pkt_type != PACKET_HOST)
+               return TC_ACT_OK;
+
+       // Must be meta-ethernet IPv6 frame
+       if (skb->protocol != bpf_htons(ETH_P_IPV6))
+               return TC_ACT_OK;
+
+       // Must have (ethernet and) ipv6 header
+       if (data + l2_header_size + sizeof(*ip6) > data_end)
+               return TC_ACT_OK;
+
+       // Ethertype - if present - must be IPv6
+       if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+               return TC_ACT_OK;
+
+       // IP version must be 6
+       if (ip6->version != 6)
+               return TC_ACT_OK;
+       // Maximum IPv6 payload length that can be translated to IPv4
+       if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+               return TC_ACT_OK;
+       switch (ip6->nexthdr) {
+       case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+       case IPPROTO_UDP:  // address means there is no need to update their checksums.
+       case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
+       case IPPROTO_ESP:  // since there is never a checksum to update.
+               break;
+       default:  // do not know how to handle anything else
+               return TC_ACT_OK;
+       }
+
+       struct ethhdr eth2;  // used iff is_ethernet
+
+       eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+       eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
+
+       struct iphdr ip = {
+               .version = 4,                                                      // u4
+               .ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
+               .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
+               .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
+               .id = 0,                                                           // u16
+               .frag_off = bpf_htons(IP_DF),                                          // u16
+               .ttl = ip6->hop_limit,                                             // u8
+               .protocol = ip6->nexthdr,                                          // u8
+               .check = 0,                                                        // u16
+               .saddr = 0x0201a8c0,                            // u32
+               .daddr = 0x0101a8c0,                                         // u32
+       };
+
+       // Calculate the IPv4 one's complement checksum of the IPv4 header.
+       __wsum sum4 = 0;
+
+       for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+               sum4 += ((__u16 *)&ip)[i];
+
+       // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+       ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
+
+       // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+       __wsum sum6 = 0;
+       // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+       for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+               sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
+
+       // Note that there is no L4 checksum update: we are relying on the checksum neutrality
+       // of the ipv6 address chosen by netd's ClatdController.
+
+       // Packet mutations begin - point of no return, but if this first modification fails
+       // the packet is probably still pristine, so let clatd handle it.
+       if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+               return TC_ACT_OK;
+       bpf_csum_update(skb, sum6);
+
+       data = (void *)(long)skb->data;
+       data_end = (void *)(long)skb->data_end;
+       if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+               return TC_ACT_SHOT;
+
+       struct ethhdr *new_eth = data;
+
+       // Copy over the updated ethernet header
+       *new_eth = eth2;
+
+       // Copy over the new ipv4 header.
+       *(struct iphdr *)(new_eth + 1) = ip;
+       return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+       const int l2_header_size =  sizeof(struct ethhdr);
+       void *data = (void *)(long)skb->data;
+       const void *data_end = (void *)(long)skb->data_end;
+       const struct ethhdr *const eth = data;  // used iff is_ethernet
+       const struct iphdr *const ip4 = (void *)(eth + 1);
+
+       // Must be meta-ethernet IPv4 frame
+       if (skb->protocol != bpf_htons(ETH_P_IP))
+               return TC_ACT_OK;
+
+       // Must have ipv4 header
+       if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+               return TC_ACT_OK;
+
+       // Ethertype - if present - must be IPv4
+       if (eth->h_proto != bpf_htons(ETH_P_IP))
+               return TC_ACT_OK;
+
+       // IP version must be 4
+       if (ip4->version != 4)
+               return TC_ACT_OK;
+
+       // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+       if (ip4->ihl != 5)
+               return TC_ACT_OK;
+
+       // Maximum IPv6 payload length that can be translated to IPv4
+       if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+               return TC_ACT_OK;
+
+       // Calculate the IPv4 one's complement checksum of the IPv4 header.
+       __wsum sum4 = 0;
+
+       for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+               sum4 += ((__u16 *)ip4)[i];
+
+       // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+       // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+       if (sum4 != 0xFFFF)
+               return TC_ACT_OK;
+
+       // Minimum IPv4 total length is the size of the header
+       if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+               return TC_ACT_OK;
+
+       // We are incapable of dealing with IPv4 fragments
+       if (ip4->frag_off & ~bpf_htons(IP_DF))
+               return TC_ACT_OK;
+
+       switch (ip4->protocol) {
+       case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+       case IPPROTO_GRE:  // address means there is no need to update their checksums.
+       case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
+               break;         // since there is never a checksum to update.
+
+       case IPPROTO_UDP:  // See above comment, but must also have UDP header...
+               if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+                       return TC_ACT_OK;
+               const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+               // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+               // checksum.  Otherwise the network or more likely the NAT64 gateway might
+               // drop the packet because in most cases IPv6/UDP packets with a zero checksum
+               // are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
+               if (!uh->check)
+                       return TC_ACT_OK;
+               break;
+
+       default:  // do not know how to handle anything else
+               return TC_ACT_OK;
+       }
+       struct ethhdr eth2;  // used iff is_ethernet
+
+       eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+       eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
+
+       struct ipv6hdr ip6 = {
+               .version = 6,                                    // __u8:4
+               .priority = ip4->tos >> 4,                       // __u8:4
+               .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
+               .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
+               .nexthdr = ip4->protocol,                        // __u8
+               .hop_limit = ip4->ttl,                           // __u8
+       };
+       ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+       ip6.saddr.in6_u.u6_addr32[1] = 0;
+       ip6.saddr.in6_u.u6_addr32[2] = 0;
+       ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+       ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+       ip6.daddr.in6_u.u6_addr32[1] = 0;
+       ip6.daddr.in6_u.u6_addr32[2] = 0;
+       ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+       // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+       __wsum sum6 = 0;
+       // We'll end up with a non-zero sum due to ip6.version == 6
+       for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+               sum6 += ((__u16 *)&ip6)[i];
+
+       // Packet mutations begin - point of no return, but if this first modification fails
+       // the packet is probably still pristine, so let clatd handle it.
+       if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+               return TC_ACT_OK;
+
+       // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+       // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+       // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+       // However, we've already verified the ipv4 checksum is correct and thus 0.
+       // Thus we only need to add the ipv6 header's sum.
+       //
+       // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+       // (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
+       bpf_csum_update(skb, sum6);
+
+       // bpf_skb_change_proto() invalidates all pointers - reload them.
+       data = (void *)(long)skb->data;
+       data_end = (void *)(long)skb->data_end;
+
+       // I cannot think of any valid way for this error condition to trigger, however I do
+       // believe the explicit check is required to keep the in kernel ebpf verifier happy.
+       if (data + l2_header_size + sizeof(ip6) > data_end)
+               return TC_ACT_SHOT;
+
+       struct ethhdr *new_eth = data;
+
+       // Copy over the updated ethernet header
+       *new_eth = eth2;
+       // Copy over the new ipv4 header.
+       *(struct ipv6hdr *)(new_eth + 1) = ip6;
+       return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
index 47c4d4b4a44af6b549536a94328da30cdd2a4bb1..54701c8b0cd7062716317bd803f238e26d72fb21 100755 (executable)
@@ -810,10 +810,16 @@ ipv4_ping()
        setup
        set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
        ipv4_ping_novrf
+       setup
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv4_ping_novrf
 
        log_subsection "With VRF"
        setup "yes"
        ipv4_ping_vrf
+       setup "yes"
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv4_ping_vrf
 }
 
 ################################################################################
@@ -2348,10 +2354,16 @@ ipv6_ping()
        log_subsection "No VRF"
        setup
        ipv6_ping_novrf
+       setup
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv6_ping_novrf
 
        log_subsection "With VRF"
        setup "yes"
        ipv6_ping_vrf
+       setup "yes"
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv6_ping_vrf
 }
 
 ################################################################################
index c87e674b61b1d97f83001c4b7258a79e55bb0b9f..e811090f77483d1dd9370f28cd296e8c74b3db71 100644 (file)
@@ -86,7 +86,7 @@ TEST_PROGS = bridge_igmp.sh \
        vxlan_bridge_1d_port_8472.sh \
        vxlan_bridge_1d.sh \
        vxlan_bridge_1q_ipv6.sh \
-       vxlan_bridge_1q_port_8472_ipv6.sh
+       vxlan_bridge_1q_port_8472_ipv6.sh \
        vxlan_bridge_1q_port_8472.sh \
        vxlan_bridge_1q.sh \
        vxlan_symmetric_ipv6.sh \
index 7314257d248a73cb61035416d740cfeba1ef29a7..48ef112f42c2e7a92d8ed45f394b6dac481153c9 100755 (executable)
@@ -1444,6 +1444,33 @@ chk_prio_nr()
        [ "${dump_stats}" = 1 ] && dump_stats
 }
 
+chk_subflow_nr()
+{
+       local need_title="$1"
+       local msg="$2"
+       local subflow_nr=$3
+       local cnt1
+       local cnt2
+
+       if [ -n "${need_title}" ]; then
+               printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
+       else
+               printf "%-${nr_blank}s %s" " " "${msg}"
+       fi
+
+       cnt1=$(ss -N $ns1 -tOni | grep -c token)
+       cnt2=$(ss -N $ns2 -tOni | grep -c token)
+       if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then
+               echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr"
+               fail_test
+               dump_stats=1
+       else
+               echo "[ ok ]"
+       fi
+
+       [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint )
+}
+
 chk_link_usage()
 {
        local ns=$1
@@ -2556,7 +2583,7 @@ fastclose_tests()
        fi
 }
 
-implicit_tests()
+endpoint_tests()
 {
        # userspace pm type prevents add_addr
        if reset "implicit EP"; then
@@ -2578,6 +2605,23 @@ implicit_tests()
                        $ns2 10.0.2.2 id 1 flags signal
                wait
        fi
+
+       if reset "delete and re-add"; then
+               pm_nl_set_limits $ns1 1 1
+               pm_nl_set_limits $ns2 1 1
+               pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+               run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow &
+
+               wait_mpj $ns2
+               pm_nl_del_endpoint $ns2 2 10.0.2.2
+               sleep 0.5
+               chk_subflow_nr needtitle "after delete" 1
+
+               pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+               wait_mpj $ns2
+               chk_subflow_nr "" "after re-add" 2
+               wait
+       fi
 }
 
 # [$1: error message]
@@ -2624,7 +2668,7 @@ all_tests_sorted=(
        d@deny_join_id0_tests
        m@fullmesh_tests
        z@fastclose_tests
-       I@implicit_tests
+       I@endpoint_tests
 )
 
 all_tests_args=""
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755 (executable)
index 0000000..807b74c
--- /dev/null
@@ -0,0 +1,101 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+       local -r jobs="$(jobs -p)"
+       local -r ns="$(ip netns list|grep $PEER_NS)"
+
+       [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+       [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+       # use 'rx' as separator between sender args and receiver args
+       local -r all="$@"
+       local -r tx_args=${all%rx*}
+       local rx_args=${all#*rx}
+
+
+
+       ip netns add "${PEER_NS}"
+       ip -netns "${PEER_NS}" link set lo up
+       ip link add type veth
+       ip link set dev veth0 up
+       ip addr add dev veth0 192.168.1.2/24
+       ip addr add dev veth0 2001:db8::2/64 nodad
+
+       ip link set dev veth1 netns "${PEER_NS}"
+       ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+       ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+       ip -netns "${PEER_NS}" link set dev veth1 up
+       ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+       ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+       tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+       tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6  direct-action
+       tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+        echo ${rx_args}
+       ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+       # Hack: let bg programs complete the startup
+       sleep 0.1
+       ./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+       local -r args=$@
+  echo ${args}
+       ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+       local -r args=$@
+
+       echo "udp gso - over veth touching data"
+       run_in_netns ${args} -u -S 0 rx -4 -v
+
+       echo "udp gso and gro - over veth touching data"
+       run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+       local -r args=$@
+
+       echo "tcp - over veth touching data"
+       run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+       local -r core_args="-l 4"
+       local -r ipv4_args="${core_args} -4  -D 192.168.1.1"
+       local -r ipv6_args="${core_args} -6  -D 2001:db8::1"
+
+       echo "ipv6"
+       run_tcp "${ipv6_args}"
+       run_udp "${ipv6_args}"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+       echo "Missing xdp_dummy helper. Build bpf selftest first"
+       exit -1
+fi
+
+if [ ! -f bpf/nat6to4.o ]; then
+       echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+       exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+       run_all
+elif [[ $1 == "__subprocess" ]]; then
+       shift
+       run_one $@
+else
+       run_in_netns $@
+fi
index c35ba24f994c36b82b94e7536d8715fc677b599c..66d0414d8e4bc3e9fba3ddb394697e825cce9624 100644 (file)
@@ -301,7 +301,7 @@ specify_qemu_cpus () {
                        echo $2 -smp $3
                        ;;
                qemu-system-ppc64)
-                       nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+                       nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`"
                        echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
                        ;;
                esac
index 5f682fc892dd490570d5746e0aea4381be36aa36..88983cba795636cc41b9c872f1ee4f7ba7ad2505 100755 (executable)
@@ -36,7 +36,7 @@ do
        then
                egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
                files="$files $i.diags $i"
-       elif ! test -f ${scenariobasedir}/vmlinux
+       elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
        then
                echo No ${scenariobasedir}/vmlinux file > $i.diags
                files="$files $i.diags $i"
index 0a5419982ab3ead8b0986d787b356ddbd07ac169..0789c5606d2abb347e8f93efbeefc88f60c04b3c 100755 (executable)
@@ -33,7 +33,12 @@ do
                TORTURE_SUITE="`cat $i/../torture_suite`"
                configfile=`echo $i | sed -e 's,^.*/,,'`
                rm -f $i/console.log.*.diags
-               kvm-recheck-${TORTURE_SUITE}.sh $i
+               case "${TORTURE_SUITE}" in
+               X*)
+                       ;;
+               *)
+                       kvm-recheck-${TORTURE_SUITE}.sh $i
+               esac
                if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
                then
                        echo QEMU error, output:
index 8c4c1e4792d02c9714ad802aabe59d9dae971f37..0ff59bd8b640df201d4a3ffa8b031e61a3ffe53c 100755 (executable)
@@ -138,14 +138,14 @@ chmod +x $T/bin/kvm-remote-*.sh
 # Check first to avoid the need for cleanup for system-name typos
 for i in $systems
 do
-       ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
-       echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+       ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
        ret=$?
        if test "$ret" -ne 0
        then
                echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
                exit 4
        fi
+       echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 done
 
 # Download and expand the tarball on all systems.
@@ -153,14 +153,14 @@ echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
 for i in $systems
 do
        echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
-       cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+       cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
        ret=$?
        tries=0
        while test "$ret" -ne 0
        do
                echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
                sleep 60
-               cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+               cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
                ret=$?
                if test "$ret" -ne 0
                then
@@ -185,7 +185,7 @@ checkremotefile () {
 
        while :
        do
-               ssh $1 "test -f \"$2\""
+               ssh -o BatchMode=yes $1 "test -f \"$2\""
                ret=$?
                if test "$ret" -eq 255
                then
@@ -228,7 +228,7 @@ startbatches () {
                then
                        continue # System still running last test, skip.
                fi
-               ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+               ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
                ret=$?
                if test "$ret" -ne 0
                then
@@ -267,7 +267,7 @@ do
                sleep 30
        done
        echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
-       ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+       ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 done
 
 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
index 55b2c153328274bf8a1d469ad978060ad7c621a0..263e16aeca0e4e7b745dda2020716c5460cbc517 100755 (executable)
@@ -44,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
 TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
 TORTURE_QEMU_MEM=512
+torture_qemu_mem_default=1
 TORTURE_REMOTE=
 TORTURE_SHUTDOWN_GRACE=180
 TORTURE_SUITE=rcu
@@ -86,7 +87,7 @@ usage () {
        echo "       --remote"
        echo "       --results absolute-pathname"
        echo "       --shutdown-grace seconds"
-       echo "       --torture lock|rcu|rcuscale|refscale|scf"
+       echo "       --torture lock|rcu|rcuscale|refscale|scf|X*"
        echo "       --trust-make"
        exit 1
 }
@@ -180,6 +181,10 @@ do
                ;;
        --kasan)
                TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+               if test -n "$torture_qemu_mem_default"
+               then
+                       TORTURE_QEMU_MEM=2G
+               fi
                ;;
        --kconfig|--kconfigs)
                checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
@@ -202,6 +207,7 @@ do
        --memory)
                checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
                TORTURE_QEMU_MEM=$2
+               torture_qemu_mem_default=
                shift
                ;;
        --no-initrd)
@@ -231,7 +237,7 @@ do
                shift
                ;;
        --torture)
-               checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
+               checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--'
                TORTURE_SUITE=$2
                TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
                shift
index bfe09e2829c8d0f723e2455eca6faa3f2b19c117..d477618e7261df5a5ad430e5fea7e4f3bd73c48d 100755 (executable)
@@ -54,6 +54,7 @@ do_kvfree=yes
 do_kasan=yes
 do_kcsan=no
 do_clocksourcewd=yes
+do_rt=yes
 
 # doyesno - Helper function for yes/no arguments
 function doyesno () {
@@ -82,6 +83,7 @@ usage () {
        echo "       --do-rcuscale / --do-no-rcuscale"
        echo "       --do-rcutorture / --do-no-rcutorture"
        echo "       --do-refscale / --do-no-refscale"
+       echo "       --do-rt / --do-no-rt"
        echo "       --do-scftorture / --do-no-scftorture"
        echo "       --duration [ <minutes> | <hours>h | <days>d ]"
        echo "       --kcsan-kmake-arg kernel-make-arguments"
@@ -118,6 +120,7 @@ do
                do_scftorture=yes
                do_rcuscale=yes
                do_refscale=yes
+               do_rt=yes
                do_kvfree=yes
                do_kasan=yes
                do_kcsan=yes
@@ -148,6 +151,7 @@ do
                do_scftorture=no
                do_rcuscale=no
                do_refscale=no
+               do_rt=no
                do_kvfree=no
                do_kasan=no
                do_kcsan=no
@@ -162,6 +166,9 @@ do
        --do-refscale|--do-no-refscale)
                do_refscale=`doyesno "$1" --do-refscale`
                ;;
+       --do-rt|--do-no-rt)
+               do_rt=`doyesno "$1" --do-rt`
+               ;;
        --do-scftorture|--do-no-scftorture)
                do_scftorture=`doyesno "$1" --do-scftorture`
                ;;
@@ -322,6 +329,7 @@ then
        echo " --- make clean" > "$amcdir/Make.out" 2>&1
        make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
        echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
+       cp .config $amcdir
        make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
        echo " --- make " >> "$amcdir/Make.out" 2>&1
        make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
@@ -350,8 +358,19 @@ fi
 
 if test "$do_scftorture" = "yes"
 then
-       torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot"
-       torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+       torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
+       torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+fi
+
+if test "$do_rt" = "yes"
+then
+       # With all post-boot grace periods forced to normal.
+       torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
+       torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+
+       # With all post-boot grace periods forced to expedited.
+       torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
+       torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
 fi
 
 if test "$do_refscale" = yes
@@ -363,7 +382,7 @@ fi
 for prim in $primlist
 do
        torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
-       torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+       torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
 done
 
 if test "$do_rcuscale" = yes
@@ -375,13 +394,13 @@ fi
 for prim in $primlist
 do
        torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
-       torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+       torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
 done
 
 if test "$do_kvfree" = "yes"
 then
        torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
-       torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+       torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
 fi
 
 if test "$do_clocksourcewd" = "yes"
index 7093422050f66a43e160e59d9f6dfc147f6789a2..6fd6acb94518270503ea55f37526611ab6312d1c 100644 (file)
@@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_EXPERT=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
index 2da8b49589a0330332d7e064e995e176dc765b2c..07f5e0a70ae705cdef00bcb97a9dc0e35077b17a 100644 (file)
@@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index 3ca112444ce7791caf82c50dd592b896e428583c..d84801b9a7aed77a0ee538868d429ecc61fc77cf 100644 (file)
@@ -7,4 +7,5 @@ CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_RCU=y
 CONFIG_RCU_EXPERT=y
index ad2be91e5ee7624e95df63885f70dbbc833afb64..2f9fcffff5ae3c4b48939bcad0bb7ed4e3090717 100644 (file)
@@ -2,3 +2,7 @@ CONFIG_SMP=n
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
index cd2a188eeb6d986034e40d91359341944a1d2716..b9b6d67cbc5f6af55b8a6758f686c406369a99b4 100644 (file)
@@ -1 +1,2 @@
 rcutorture.torture_type=tasks
+rcutorture.stat_interval=60
index dc02083803ce574a769e93d230e87b078e583f87..dea26c5686784953650b6a233358d8b1958c7e55 100644 (file)
@@ -7,3 +7,5 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
index e4d74e5fc1d09bae1622a4f966458f253d14c034..85b407467454a2e98072ff234d9396e450ec01d1 100644 (file)
@@ -4,8 +4,11 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 #CHECK#CONFIG_PROVE_RCU=n
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=y
 CONFIG_RCU_EXPERT=y
index 77541eeb4e9fa1c6f8a955609c73b6ef4068fd24..093ea6e8e65cdf924ecaea7abb69f12d510ec4e8 100644 (file)
@@ -7,5 +7,7 @@ CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=n
 CONFIG_RCU_EXPERT=y
index 22ad0261728d0158ec3a4c5e18027ebe91fae4db..ae395981b5e5e7c03499bd3d9140c6de8b034690 100644 (file)
@@ -1,8 +1,9 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
index 2789b47e4ecd7da1b6f37d368595b693a5ad2773..d30922d8c88323ce6e4175dbc67c761a0af36238 100644 (file)
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=16
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
index 8523a7515cbf817659a2a023d9da9578e09cf82b..fc45645bb5f421c1c0ca122fa66af430e5d5c8e5 100644 (file)
@@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index 4a00539bfdd713cacfc3c14769a640efe16254ec..a323d8948b7cf13bcf5fbf489995f308f0369256 100644 (file)
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=56
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
index effa415f9b9282880083d83a2ecadf996711b331..e2bc99c785e75a9da38e2a11511c0a99424e4372 100644 (file)
@@ -9,7 +9,7 @@
 
 # rcutorture_param_n_barrier_cbs bootparam-string
 #
-# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+# Adds n_barrier_cbs rcutorture module parameter if not already specified.
 rcutorture_param_n_barrier_cbs () {
        if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
        then
@@ -30,13 +30,25 @@ rcutorture_param_onoff () {
        fi
 }
 
+# rcutorture_param_stat_interval bootparam-string
+#
+# Adds stat_interval rcutorture module parameter if not already specified.
+rcutorture_param_stat_interval () {
+       if echo $1 | grep -q "rcutorture\.stat_interval"
+       then
+               :
+       else
+               echo rcutorture.stat_interval=15
+       fi
+}
+
 # per_version_boot_params bootparam-string config-file seconds
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
        echo $1 `rcutorture_param_onoff "$1" "$2"` \
                `rcutorture_param_n_barrier_cbs "$1"` \
-               rcutorture.stat_interval=15 \
+               `rcutorture_param_stat_interval "$1"` \
                rcutorture.shutdown_secs=$3 \
                rcutorture.test_no_idle_hz=1 \
                rcutorture.verbose=1
index 90942bb5bebc505940461a38e32cfdb0122c295f..6a00157bee5b1755a39f7cf7a6d49c10b5114aa3 100644 (file)
@@ -1,5 +1,6 @@
 CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_TRACE_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
index f110d9ffbe4cb1e41db98b25cca92fc1d336cf17..b10706fd03a45089993133eb3ec2aafc04128c28 100644 (file)
@@ -16,3 +16,5 @@ CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
 CONFIG_RCU_TRACE=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index a98b58b54bb11d5fe2297bb6e96fd6bd39a2eb28..fbea3b13baba414d11fbb4514cb20b397d42b523 100644 (file)
@@ -1,2 +1,6 @@
 CONFIG_RCU_REF_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
index 7f06838a91e6103acbb362dfa6d9b22a1df7b1d0..ef2b501a697101e03e8f70146cbc1492e31d1b6d 100644 (file)
@@ -15,3 +15,5 @@ CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index b8429d6c6ebc320c0428ced7e6115ef9f5d5ec01..3a59346b3de78f8d10caab213d10df38d672c5fb 100644 (file)
@@ -7,3 +7,5 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index ae4992b141b06d7fcc0fa9e116ca606d3b0797ce..cb37e08037d6e5121313a5323f4d240ef9e47093 100644 (file)
@@ -7,3 +7,4 @@ CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
+CONFIG_RCU_EXPERT=y
index d3d9e35d3d55a88460046fe3f7bee12f87cfeddd..2d949e58f5a5dfd2e7b267be5110686f2c869570 100644 (file)
@@ -25,6 +25,5 @@ per_version_boot_params () {
        echo $1 `scftorture_param_onoff "$1" "$2"` \
                scftorture.stat_interval=15 \
                scftorture.shutdown_secs=$3 \
-               scftorture.verbose=1 \
-               scf
+               scftorture.verbose=1
 }
index 585f7a0c10cbea67d418366c94b3415cf9e00bfe..f017c382c0369c898881cdbac78648e07240f734 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/
 LDFLAGS += -lpthread
+LDLIBS += -lcap
 
 TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
 include ../lib.mk
index 313bb0cbfb1eb96479148b7fc94461c745a01299..136df5b76319d79725ee4bb3a4f1015b82628fbb 100644 (file)
@@ -46,6 +46,7 @@
 #include <sys/ioctl.h>
 #include <linux/kcmp.h>
 #include <sys/resource.h>
+#include <sys/capability.h>
 
 #include <unistd.h>
 #include <sys/syscall.h>
@@ -59,6 +60,8 @@
 #define SKIP(s, ...)   XFAIL(s, ##__VA_ARGS__)
 #endif
 
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
 #ifndef PR_SET_PTRACER
 # define PR_SET_PTRACER 0x59616d61
 #endif
@@ -268,6 +271,10 @@ struct seccomp_notif_addfd_big {
 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -3742,7 +3749,10 @@ TEST(user_notification_fault_recv)
        struct seccomp_notif req = {};
        struct seccomp_notif_resp resp = {};
 
-       ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+       ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
+               if (errno == EINVAL)
+                       SKIP(return, "kernel missing CLONE_NEWUSER support");
+       }
 
        listener = user_notif_syscall(__NR_getppid,
                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
@@ -4231,6 +4241,421 @@ TEST(user_notification_addfd_rlimit)
        close(memfd);
 }
 
+/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
+FIXTURE(O_SUSPEND_SECCOMP) {
+       pid_t pid;
+};
+
+FIXTURE_SETUP(O_SUSPEND_SECCOMP)
+{
+       ERRNO_FILTER(block_read, E2BIG);
+       cap_value_t cap_list[] = { CAP_SYS_ADMIN };
+       cap_t caps;
+
+       self->pid = 0;
+
+       /* make sure we don't have CAP_SYS_ADMIN */
+       caps = cap_get_proc();
+       ASSERT_NE(NULL, caps);
+       ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
+       ASSERT_EQ(0, cap_set_proc(caps));
+       cap_free(caps);
+
+       ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+       ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
+
+       self->pid = fork();
+       ASSERT_GE(self->pid, 0);
+
+       if (self->pid == 0) {
+               while (1)
+                       pause();
+               _exit(127);
+       }
+}
+
+FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
+{
+       if (self->pid)
+               kill(self->pid, SIGKILL);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, setoptions)
+{
+       int wstatus;
+
+       ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
+       ASSERT_EQ(self->pid, wait(&wstatus));
+       ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
+       if (errno == EINVAL)
+               SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+       ASSERT_EQ(EPERM, errno);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, seize)
+{
+       int ret;
+
+       ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
+       ASSERT_EQ(-1, ret);
+       if (errno == EINVAL)
+               SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+       ASSERT_EQ(EPERM, errno);
+}
+
+/*
+ * get_nth - Get the nth, space separated entry in a file.
+ *
+ * Returns the length of the read field.
+ * Throws error if field is zero-lengthed.
+ */
+static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
+                    const unsigned int position, char **entry)
+{
+       char *line = NULL;
+       unsigned int i;
+       ssize_t nread;
+       size_t len = 0;
+       FILE *f;
+
+       f = fopen(path, "r");
+       ASSERT_NE(f, NULL) {
+               TH_LOG("Could not open %s: %s", path, strerror(errno));
+       }
+
+       for (i = 0; i < position; i++) {
+               nread = getdelim(&line, &len, ' ', f);
+               ASSERT_GE(nread, 0) {
+                       TH_LOG("Failed to read %d entry in file %s", i, path);
+               }
+       }
+       fclose(f);
+
+       ASSERT_GT(nread, 0) {
+               TH_LOG("Entry in file %s had zero length", path);
+       }
+
+       *entry = line;
+       return nread - 1;
+}
+
+/* For a given PID, get the task state (D, R, etc...) */
+static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
+{
+       char proc_path[100] = {0};
+       char status;
+       char *line;
+
+       snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
+       ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
+
+       status = *line;
+       free(line);
+
+       return status;
+}
+
+TEST(user_notification_fifo)
+{
+       struct seccomp_notif_resp resp = {};
+       struct seccomp_notif req = {};
+       int i, status, listener;
+       pid_t pid, pids[3];
+       __u64 baseid;
+       long ret;
+       /* 100 ms */
+       struct timespec delay = { .tv_nsec = 100000000 };
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret) {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       /* Setup a listener */
+       listener = user_notif_syscall(__NR_getppid,
+                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
+       ASSERT_GE(listener, 0);
+
+       pid = fork();
+       ASSERT_GE(pid, 0);
+
+       if (pid == 0) {
+               ret = syscall(__NR_getppid);
+               exit(ret != USER_NOTIF_MAGIC);
+       }
+
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+       baseid = req.id + 1;
+
+       resp.id = req.id;
+       resp.error = 0;
+       resp.val = USER_NOTIF_MAGIC;
+
+       /* check that we make sure flags == 0 */
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+       EXPECT_EQ(waitpid(pid, &status, 0), pid);
+       EXPECT_EQ(true, WIFEXITED(status));
+       EXPECT_EQ(0, WEXITSTATUS(status));
+
+       /* Start children, and generate notifications */
+       for (i = 0; i < ARRAY_SIZE(pids); i++) {
+               pid = fork();
+               if (pid == 0) {
+                       ret = syscall(__NR_getppid);
+                       exit(ret != USER_NOTIF_MAGIC);
+               }
+               pids[i] = pid;
+       }
+
+       /* This spins until all of the children are sleeping */
+restart_wait:
+       for (i = 0; i < ARRAY_SIZE(pids); i++) {
+               if (get_proc_stat(_metadata, pids[i]) != 'S') {
+                       nanosleep(&delay, NULL);
+                       goto restart_wait;
+               }
+       }
+
+       /* Read the notifications in order (and respond) */
+       for (i = 0; i < ARRAY_SIZE(pids); i++) {
+               memset(&req, 0, sizeof(req));
+               EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+               EXPECT_EQ(req.id, baseid + i);
+               resp.id = req.id;
+               EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+       }
+
+       /* Make sure notifications were received */
+       for (i = 0; i < ARRAY_SIZE(pids); i++) {
+               EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
+               EXPECT_EQ(true, WIFEXITED(status));
+               EXPECT_EQ(0, WEXITSTATUS(status));
+       }
+}
+
+/* get_proc_syscall - Get the syscall in progress for a given pid
+ *
+ * Returns the current syscall number for a given process
+ * Returns -1 if not in syscall (running or blocked)
+ */
+static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
+{
+       char proc_path[100] = {0};
+       long ret = -1;
+       ssize_t nread;
+       char *line;
+
+       snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
+       nread = get_nth(_metadata, proc_path, 1, &line);
+       ASSERT_GT(nread, 0);
+
+       if (!strncmp("running", line, MIN(7, nread)))
+               ret = strtol(line, NULL, 16);
+
+       free(line);
+       return ret;
+}
+
+/* Ensure non-fatal signals prior to receive are unmodified */
+TEST(user_notification_wait_killable_pre_notification)
+{
+       struct sigaction new_action = {
+               .sa_handler = signal_handler,
+       };
+       int listener, status, sk_pair[2];
+       pid_t pid;
+       long ret;
+       char c;
+       /* 100 ms */
+       struct timespec delay = { .tv_nsec = 100000000 };
+
+       ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret)
+       {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+       listener = user_notif_syscall(
+               __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+                                     SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+       ASSERT_GE(listener, 0);
+
+       /*
+        * Check that we can kill the process with SIGUSR1 prior to receiving
+        * the notification. SIGUSR1 is wired up to a custom signal handler,
+        * and make sure it gets called.
+        */
+       pid = fork();
+       ASSERT_GE(pid, 0);
+
+       if (pid == 0) {
+               close(sk_pair[0]);
+               handled = sk_pair[1];
+
+               /* Setup the non-fatal sigaction without SA_RESTART */
+               if (sigaction(SIGUSR1, &new_action, NULL)) {
+                       perror("sigaction");
+                       exit(1);
+               }
+
+               ret = syscall(__NR_getppid);
+               /* Make sure we got a return from a signal interruption */
+               exit(ret != -1 || errno != EINTR);
+       }
+
+       /*
+        * Make sure we've gotten to the seccomp user notification wait
+        * from getppid prior to sending any signals
+        */
+       while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
+              get_proc_stat(_metadata, pid) != 'S')
+               nanosleep(&delay, NULL);
+
+       /* Send non-fatal kill signal */
+       EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+       /* wait for process to exit (exit checks for EINTR) */
+       EXPECT_EQ(waitpid(pid, &status, 0), pid);
+       EXPECT_EQ(true, WIFEXITED(status));
+       EXPECT_EQ(0, WEXITSTATUS(status));
+
+       EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+}
+
+/* Ensure non-fatal signals after receive are blocked */
+TEST(user_notification_wait_killable)
+{
+       struct sigaction new_action = {
+               .sa_handler = signal_handler,
+       };
+       struct seccomp_notif_resp resp = {};
+       struct seccomp_notif req = {};
+       int listener, status, sk_pair[2];
+       pid_t pid;
+       long ret;
+       char c;
+       /* 100 ms */
+       struct timespec delay = { .tv_nsec = 100000000 };
+
+       ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret)
+       {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+       listener = user_notif_syscall(
+               __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+                                     SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+       ASSERT_GE(listener, 0);
+
+       pid = fork();
+       ASSERT_GE(pid, 0);
+
+       if (pid == 0) {
+               close(sk_pair[0]);
+               handled = sk_pair[1];
+
+               /* Setup the sigaction without SA_RESTART */
+               if (sigaction(SIGUSR1, &new_action, NULL)) {
+                       perror("sigaction");
+                       exit(1);
+               }
+
+               /* Make sure that the syscall is completed (no EINTR) */
+               ret = syscall(__NR_getppid);
+               exit(ret != USER_NOTIF_MAGIC);
+       }
+
+       /*
+        * Get the notification, to make move the notifying process into a
+        * non-preemptible (TASK_KILLABLE) state.
+        */
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+       /* Send non-fatal kill signal */
+       EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+       /*
+        * Make sure the task enters moves to TASK_KILLABLE by waiting for
+        * D (Disk Sleep) state after receiving non-fatal signal.
+        */
+       while (get_proc_stat(_metadata, pid) != 'D')
+               nanosleep(&delay, NULL);
+
+       resp.id = req.id;
+       resp.val = USER_NOTIF_MAGIC;
+       /* Make sure the notification is found and able to be replied to */
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+       /*
+        * Make sure that the signal handler does get called once we're back in
+        * userspace.
+        */
+       EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+       /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
+       EXPECT_EQ(waitpid(pid, &status, 0), pid);
+       EXPECT_EQ(true, WIFEXITED(status));
+       EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+/* Ensure fatal signals after receive are not blocked */
+TEST(user_notification_wait_killable_fatal)
+{
+       struct seccomp_notif req = {};
+       int listener, status;
+       pid_t pid;
+       long ret;
+       /* 100 ms */
+       struct timespec delay = { .tv_nsec = 100000000 };
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret)
+       {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       listener = user_notif_syscall(
+               __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+                                     SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+       ASSERT_GE(listener, 0);
+
+       pid = fork();
+       ASSERT_GE(pid, 0);
+
+       if (pid == 0) {
+               /* This should never complete as it should get a SIGTERM */
+               syscall(__NR_getppid);
+               exit(1);
+       }
+
+       while (get_proc_stat(_metadata, pid) != 'S')
+               nanosleep(&delay, NULL);
+
+       /*
+        * Get the notification, to make move the notifying process into a
+        * non-preemptible (TASK_KILLABLE) state.
+        */
+       EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+       /* Kill the process with a fatal signal */
+       EXPECT_EQ(kill(pid, SIGTERM), 0);
+
+       /*
+        * Wait for the process to exit, and make sure the process terminated
+        * due to the SIGTERM signal.
+        */
+       EXPECT_EQ(waitpid(pid, &status, 0), pid);
+       EXPECT_EQ(true, WIFSIGNALED(status));
+       EXPECT_EQ(SIGTERM, WTERMSIG(status));
+}
+
 /*
  * TODO:
  * - expand NNP testing
index 04a49e876a46c346d01b4d1be0a78042551a8937..5b1ecd00695b362983e953495520051c8f8a7d65 100644 (file)
@@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog
 CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
 CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
 
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
 
 ifeq ($(CAN_BUILD_WITH_NOPIE),1)
 CFLAGS += -no-pie
@@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
 $(BINARIES_32): LDLIBS += -lrt -ldl -lm
 $(BINARIES_32): $(OUTPUT)/%_32: %.c
        $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
@@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
 $(BINARIES_64): LDLIBS += -lrt -ldl
 $(BINARIES_64): $(OUTPUT)/%_64: %.c
        $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
 endif
 
 # x86_64 users should be encouraged to install 32-bit libraries
diff --git a/tools/thermal/lib/Build b/tools/thermal/lib/Build
new file mode 100644 (file)
index 0000000..06f2276
--- /dev/null
@@ -0,0 +1,3 @@
+libthermal_tools-y += mainloop.o
+libthermal_tools-y += log.o
+libthermal_tools-y += uptimeofday.o
diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile
new file mode 100644 (file)
index 0000000..82db451
--- /dev/null
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Most of this file is copied from tools/lib/perf/Makefile
+
+LIBTHERMAL_TOOLS_VERSION = 0
+LIBTHERMAL_TOOLS_PATCHLEVEL = 0
+LIBTHERMAL_TOOLS_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+ifeq ("$(origin V)", "command line")
+  VERBOSE = $(V)
+endif
+ifndef VERBOSE
+  VERBOSE = 0
+endif
+
+ifeq ($(VERBOSE),1)
+  Q =
+else
+  Q = @
+endif
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+  CFLAGS := $(EXTRA_CFLAGS)
+else
+  CFLAGS := -g -Wall
+endif
+
+INCLUDES = \
+-I/usr/include/libnl3 \
+-I$(srctree)/tools/lib/thermal/include \
+-I$(srctree)/tools/lib/ \
+-I$(srctree)/tools/include \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/ \
+-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
+-I$(srctree)/tools/include/uapi
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+override CFGLAS += -Wl,-L.
+override CFGLAS += -Wl,-lthermal
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+export DESTDIR DESTDIR_SQ
+
+include $(srctree)/tools/build/Makefile.include
+
+PATCHLEVEL    = $(LIBTHERMAL_TOOLS_PATCHLEVEL)
+EXTRAVERSION  = $(LIBTHERMAL_TOOLS_EXTRAVERSION)
+VERSION       = $(LIBTHERMAL_TOOLS_VERSION).$(LIBTHERMAL_TOOLS_PATCHLEVEL).$(LIBTHERMAL_TOOLS_EXTRAVERSION)
+
+LIBTHERMAL_TOOLS_SO := $(OUTPUT)libthermal_tools.so.$(VERSION)
+LIBTHERMAL_TOOLS_A  := $(OUTPUT)libthermal_tools.a
+LIBTHERMAL_TOOLS_IN := $(OUTPUT)libthermal_tools-in.o
+LIBTHERMAL_TOOLS_PC := $(OUTPUT)libthermal_tools.pc
+
+LIBTHERMAL_TOOLS_ALL := $(LIBTHERMAL_TOOLS_A) $(OUTPUT)libthermal_tools.so*
+
+$(LIBTHERMAL_TOOLS_IN): FORCE
+       $(Q)$(MAKE) $(build)=libthermal_tools
+
+$(LIBTHERMAL_TOOLS_A): $(LIBTHERMAL_TOOLS_IN)
+       $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_TOOLS_IN)
+
+$(LIBTHERMAL_TOOLS_SO): $(LIBTHERMAL_TOOLS_IN)
+       $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal_tools.so $^ -o $@
+       @ln -sf $(@F) $(OUTPUT)libthermal_tools.so
+       @ln -sf $(@F) $(OUTPUT)libthermal_tools.so.$(LIBTHERMAL_TOOLS_VERSION)
+
+
+libs: $(LIBTHERMAL_TOOLS_A) $(LIBTHERMAL_TOOLS_SO) $(LIBTHERMAL_TOOLS_PC)
+
+all: fixdep
+       $(Q)$(MAKE) libs
+
+clean:
+       $(call QUIET_CLEAN, libthermal_tools) $(RM) $(LIBTHERMAL_TOOLS_A) \
+                *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_TOOLS_VERSION) .*.d .*.cmd LIBTHERMAL_TOOLS-CFLAGS $(LIBTHERMAL_TOOLS_PC)
+
+$(LIBTHERMAL_TOOLS_PC):
+       $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+               -e "s|@LIBDIR@|$(libdir_SQ)|" \
+               -e "s|@VERSION@|$(VERSION)|" \
+               < libthermal_tools.pc.template > $@
+
+define do_install_mkdir
+       if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+       fi
+endef
+
+define do_install
+       if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+       fi;                                             \
+       $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: libs
+       $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_ALL)) \
+               $(call do_install_mkdir,$(libdir_SQ)); \
+               cp -fpR $(LIBTHERMAL_TOOLS_ALL) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+       $(call QUIET_INSTALL, headers) \
+               $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \
+
+install_pkgconfig: $(LIBTHERMAL_TOOLS_PC)
+       $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_PC)) \
+               $(call do_install,$(LIBTHERMAL_TOOLS_PC),$(libdir_SQ)/pkgconfig,644)
+
+install_doc:
+       $(Q)$(MAKE) -C Documentation install-man install-html install-examples
+
+#install: install_lib install_headers install_pkgconfig install_doc
+install: install_lib install_headers install_pkgconfig
+
+FORCE:
+
+.PHONY: all install clean FORCE
diff --git a/tools/thermal/lib/libthermal_tools.pc.template b/tools/thermal/lib/libthermal_tools.pc.template
new file mode 100644 (file)
index 0000000..6f37697
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libthermal
+Description: thermal library
+Requires: libnl-3.0 libnl-genl-3.0
+Version: @VERSION@
+Libs: -L${libdir} -lnl-genl-3 -lnl-3
+Cflags: -I${includedir} -I{include}/libnl3
diff --git a/tools/thermal/lib/log.c b/tools/thermal/lib/log.c
new file mode 100644 (file)
index 0000000..597d6e7
--- /dev/null
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include "log.h"
+
+static const char *__ident = "unknown";
+static int __options;
+
+static const char * const loglvl[] = {
+       [LOG_DEBUG]     = "DEBUG",
+       [LOG_INFO]      = "INFO",
+       [LOG_NOTICE]    = "NOTICE",
+       [LOG_WARNING]   = "WARN",
+       [LOG_ERR]       = "ERROR",
+       [LOG_CRIT]      = "CRITICAL",
+       [LOG_ALERT]     = "ALERT",
+       [LOG_EMERG]     = "EMERG",
+};
+
+int log_str2level(const char *lvl)
+{
+       int i;
+
+       for (i = 0; i < sizeof(loglvl) / sizeof(loglvl[LOG_DEBUG]); i++)
+               if (!strcmp(lvl, loglvl[i]))
+                       return i;
+
+       return LOG_DEBUG;
+}
+
+extern void logit(int level, const char *format, ...)
+{
+       va_list args;
+
+       va_start(args, format);
+
+       if (__options & TO_SYSLOG)
+               vsyslog(level, format, args);
+
+       if (__options & TO_STDERR)
+               vfprintf(stderr, format, args);
+
+       if (__options & TO_STDOUT)
+               vfprintf(stdout, format, args);
+
+       va_end(args);
+}
+
+int log_init(int level, const char *ident, int options)
+{
+       if (!options)
+               return -1;
+
+       if (level > LOG_DEBUG)
+               return -1;
+
+       if (!ident)
+               return -1;
+
+       __ident = ident;
+       __options = options;
+
+       if (options & TO_SYSLOG) {
+               openlog(__ident, options | LOG_NDELAY, LOG_USER);
+               setlogmask(LOG_UPTO(level));
+       }
+
+       return 0;
+}
+
+void log_exit(void)
+{
+       closelog();
+}
diff --git a/tools/thermal/lib/log.h b/tools/thermal/lib/log.h
new file mode 100644 (file)
index 0000000..be8ab51
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS_LOG_H
+#define __THERMAL_TOOLS_LOG_H
+
+#include <syslog.h>
+
+#ifndef __maybe_unused
+#define __maybe_unused         __attribute__((__unused__))
+#endif
+
+#define TO_SYSLOG 0x1
+#define TO_STDOUT 0x2
+#define TO_STDERR 0x4
+
+extern void logit(int level, const char *format, ...);
+
+#define DEBUG(fmt, ...)                logit(LOG_DEBUG, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define INFO(fmt, ...)         logit(LOG_INFO, fmt, ##__VA_ARGS__)
+#define NOTICE(fmt, ...)       logit(LOG_NOTICE, fmt, ##__VA_ARGS__)
+#define WARN(fmt, ...)         logit(LOG_WARNING, fmt, ##__VA_ARGS__)
+#define ERROR(fmt, ...)                logit(LOG_ERR, fmt, ##__VA_ARGS__)
+#define CRITICAL(fmt, ...)     logit(LOG_CRIT, fmt, ##__VA_ARGS__)
+#define ALERT(fmt, ...)                logit(LOG_ALERT, fmt, ##__VA_ARGS__)
+#define EMERG(fmt, ...)                logit(LOG_EMERG, fmt, ##__VA_ARGS__)
+
+int log_init(int level, const char *ident, int options);
+int log_str2level(const char *lvl);
+void log_exit(void);
+
+#endif
diff --git a/tools/thermal/lib/mainloop.c b/tools/thermal/lib/mainloop.c
new file mode 100644 (file)
index 0000000..94cbbcb
--- /dev/null
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/epoll.h>
+#include "mainloop.h"
+#include "log.h"
+
+static int epfd = -1;
+static unsigned short nrhandler;
+static sig_atomic_t exit_mainloop;
+
+struct mainloop_data {
+       mainloop_callback_t cb;
+       void *data;
+       int fd;
+};
+
+static struct mainloop_data **mds;
+
+#define MAX_EVENTS 10
+
+int mainloop(unsigned int timeout)
+{
+       int i, nfds;
+       struct epoll_event events[MAX_EVENTS];
+       struct mainloop_data *md;
+
+       if (epfd < 0)
+               return -1;
+
+       for (;;) {
+
+               nfds = epoll_wait(epfd, events, MAX_EVENTS, timeout);
+
+               if (exit_mainloop || !nfds)
+                       return 0;
+
+               if (nfds < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       return -1;
+               }
+
+               for (i = 0; i < nfds; i++) {
+                       md = events[i].data.ptr;
+
+                       if (md->cb(md->fd, md->data) > 0)
+                               return 0;
+               }
+       }
+}
+
+int mainloop_add(int fd, mainloop_callback_t cb, void *data)
+{
+       struct epoll_event ev = {
+               .events = EPOLLIN,
+       };
+
+       struct mainloop_data *md;
+
+       if (fd >= nrhandler) {
+               mds = realloc(mds, sizeof(*mds) * (fd + 1));
+               if (!mds)
+                       return -1;
+               nrhandler = fd + 1;
+       }
+
+       md = malloc(sizeof(*md));
+       if (!md)
+               return -1;
+
+       md->data = data;
+       md->cb = cb;
+       md->fd = fd;
+
+       mds[fd] = md;
+       ev.data.ptr = md;
+
+       if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+               free(md);
+               return -1;
+       }
+
+       return 0;
+}
+
+int mainloop_del(int fd)
+{
+       if (fd >= nrhandler)
+               return -1;
+
+       if (epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL) < 0)
+               return -1;
+
+       free(mds[fd]);
+
+       return 0;
+}
+
+int mainloop_init(void)
+{
+       epfd = epoll_create(2);
+       if (epfd < 0)
+               return -1;
+
+       return 0;
+}
+
+void mainloop_exit(void)
+{
+       exit_mainloop = 1;
+}
+
+void mainloop_fini(void)
+{
+       close(epfd);
+}
diff --git a/tools/thermal/lib/mainloop.h b/tools/thermal/lib/mainloop.h
new file mode 100644 (file)
index 0000000..89b61e8
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS_MAINLOOP_H
+#define __THERMAL_TOOLS_MAINLOOP_H
+
+typedef int (*mainloop_callback_t)(int fd, void *data);
+
+extern int mainloop(unsigned int timeout);
+extern int mainloop_add(int fd, mainloop_callback_t cb, void *data);
+extern int mainloop_del(int fd);
+extern void mainloop_exit(void);
+extern int mainloop_init(void);
+extern void mainloop_fini(void);
+
+#endif
diff --git a/tools/thermal/lib/thermal-tools.h b/tools/thermal/lib/thermal-tools.h
new file mode 100644 (file)
index 0000000..f43939a
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS
+#define __THERMAL_TOOLS
+
+#include "log.h"
+#include "mainloop.h"
+#include "uptimeofday.h"
+
+#endif
diff --git a/tools/thermal/lib/uptimeofday.c b/tools/thermal/lib/uptimeofday.c
new file mode 100644 (file)
index 0000000..dacb029
--- /dev/null
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: LGPL-2.1+
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#include <stdio.h>
+#include <sys/time.h>
+#include <linux/sysinfo.h>
+#include "thermal-tools.h"
+
+static unsigned long __offset;
+static struct timeval __tv;
+
+int uptimeofday_init(void)
+{
+       struct sysinfo info;
+
+       if (sysinfo(&info))
+               return -1;
+
+       gettimeofday(&__tv, NULL);
+
+       __offset = __tv.tv_sec - info.uptime;
+
+       return 0;
+}
+
+unsigned long getuptimeofday_ms(void)
+{
+       gettimeofday(&__tv, NULL);
+
+       return ((__tv.tv_sec - __offset) * 1000) + (__tv.tv_usec / 1000);
+}
+
+struct timespec msec_to_timespec(int msec)
+{
+       struct timespec tv = {
+               .tv_sec = (msec / 1000),
+               .tv_nsec = (msec % 1000) * 1000000,
+       };
+
+       return tv;
+}
diff --git a/tools/thermal/lib/uptimeofday.h b/tools/thermal/lib/uptimeofday.h
new file mode 100644 (file)
index 0000000..c0da5de
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */
+#ifndef __THERMAL_TOOLS_UPTIMEOFDAY_H
+#define __THERMAL_TOOLS_UPTIMEOFDAY_H
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+
+int uptimeofday_init(void);
+unsigned long getuptimeofday_ms(void);
+struct timespec msec_to_timespec(int msec);
+
+#endif
diff --git a/tools/thermal/thermal-engine/Build b/tools/thermal/thermal-engine/Build
new file mode 100644 (file)
index 0000000..20c3c47
--- /dev/null
@@ -0,0 +1 @@
+thermal-engine-y += thermal-engine.o
diff --git a/tools/thermal/thermal-engine/Makefile b/tools/thermal/thermal-engine/Makefile
new file mode 100644 (file)
index 0000000..6bd05ff
--- /dev/null
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for thermal tools
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+CFLAGS = -Wall -Wextra
+CFLAGS += -I$(srctree)/tools/thermal/lib
+CFLAGS += -I$(srctree)/tools/lib/thermal/include
+
+LDFLAGS = -L$(srctree)/tools/thermal/lib
+LDFLAGS += -L$(srctree)/tools/lib/thermal
+LDFLAGS += -lthermal_tools
+LDFLAGS += -lthermal
+LDFLAGS += -lconfig
+LDFLAGS += -lnl-genl-3 -lnl-3
+
+VERSION = 0.0.1
+
+all: thermal-engine
+%: %.c
+       $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS)
+clean:
+       $(RM) thermal-engine
diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c
new file mode 100644 (file)
index 0000000..9b1476a
--- /dev/null
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Thermal monitoring tool based on the thermal netlink events.
+ *
+ * Copyright (C) 2022 Linaro Ltd.
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@kernel.org>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <syslog.h>
+
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <thermal.h>
+#include "thermal-tools.h"
+
+struct options {
+       int loglevel;
+       int logopt;
+       int interactive;
+       int daemonize;
+};
+
+struct thermal_data {
+       struct thermal_zone *tz;
+       struct thermal_handler *th;
+};
+
+static int show_trip(struct thermal_trip *tt, __maybe_unused void *arg)
+{
+       INFO("trip id=%d, type=%d, temp=%d, hyst=%d\n",
+            tt->id, tt->type, tt->temp, tt->hyst);
+
+       return 0;
+}
+
+static int show_temp(struct thermal_zone *tz, __maybe_unused void *arg)
+{
+       thermal_cmd_get_temp(arg, tz);
+
+       INFO("temperature: %d\n", tz->temp);
+
+       return 0;
+}
+
+static int show_governor(struct thermal_zone *tz, __maybe_unused void *arg)
+{
+       thermal_cmd_get_governor(arg, tz);
+
+       INFO("governor: '%s'\n", tz->governor);
+
+       return 0;
+}
+
+static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg)
+{
+       INFO("thermal zone '%s', id=%d\n", tz->name, tz->id);
+
+       for_each_thermal_trip(tz->trip, show_trip, NULL);
+
+       show_temp(tz, arg);
+
+       show_governor(tz, arg);
+
+       return 0;
+}
+
+static int tz_create(const char *name, int tz_id, __maybe_unused void *arg)
+{
+       INFO("Thermal zone '%s'/%d created\n", name, tz_id);
+
+       return 0;
+}
+
+static int tz_delete(int tz_id, __maybe_unused void *arg)
+{
+       INFO("Thermal zone %d deleted\n", tz_id);
+
+       return 0;
+}
+
+static int tz_disable(int tz_id, void *arg)
+{
+       struct thermal_data *td = arg;
+       struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+       INFO("Thermal zone %d ('%s') disabled\n", tz_id, tz->name);
+
+       return 0;
+}
+
+static int tz_enable(int tz_id, void *arg)
+{
+       struct thermal_data *td = arg;
+       struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+       INFO("Thermal zone %d ('%s') enabled\n", tz_id, tz->name);
+
+       return 0;
+}
+
+static int trip_high(int tz_id, int trip_id, int temp, void *arg)
+{
+       struct thermal_data *td = arg;
+       struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+       INFO("Thermal zone %d ('%s'): trip point %d crossed way up with %d °C\n",
+            tz_id, tz->name, trip_id, temp);
+
+       return 0;
+}
+
+static int trip_low(int tz_id, int trip_id, int temp, void *arg)
+{
+       struct thermal_data *td = arg;
+       struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+       INFO("Thermal zone %d ('%s'): trip point %d crossed way down with %d °C\n",
+            tz_id, tz->name, trip_id, temp);
+
+       return 0;
+}
+
+static int trip_add(int tz_id, int trip_id, int type, int temp, int hyst, __maybe_unused void *arg)
+{
+       INFO("Trip point added %d: id=%d, type=%d, temp=%d, hyst=%d\n",
+            tz_id, trip_id, type, temp, hyst);
+
+       return 0;
+}
+
+static int trip_delete(int tz_id, int trip_id, __maybe_unused void *arg)
+{
+       INFO("Trip point deleted %d: id=%d\n", tz_id, trip_id);
+
+       return 0;
+}
+
+static int trip_change(int tz_id, int trip_id, int type, int temp,
+                      int hyst, __maybe_unused void *arg)
+{
+       struct thermal_data *td = arg;
+       struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+       INFO("Trip point changed %d: id=%d, type=%d, temp=%d, hyst=%d\n",
+            tz_id, trip_id, type, temp, hyst);
+
+       tz->trip[trip_id].type = type;
+       tz->trip[trip_id].temp = temp;
+       tz->trip[trip_id].hyst = hyst;
+
+       return 0;
+}
+
+static int cdev_add(const char *name, int cdev_id, int max_state, __maybe_unused void *arg)
+{
+       INFO("Cooling device '%s'/%d (max state=%d) added\n", name, cdev_id, max_state);
+
+       return 0;
+}
+
+static int cdev_delete(int cdev_id, __maybe_unused void *arg)
+{
+       INFO("Cooling device %d deleted", cdev_id);
+
+       return 0;
+}
+
+static int cdev_update(int cdev_id, int cur_state, __maybe_unused void *arg)
+{
+       INFO("cdev:%d state:%d\n", cdev_id, cur_state);
+
+       return 0;
+}
+
+static int gov_change(int tz_id, const char *name, __maybe_unused void *arg)
+{
+       struct thermal_data *td = arg;
+       struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id);
+
+       INFO("%s: governor changed %s -> %s\n", tz->name, tz->governor, name);
+
+       strcpy(tz->governor, name);
+
+       return 0;
+}
+
+static struct thermal_ops ops = {
+       .events.tz_create       = tz_create,
+       .events.tz_delete       = tz_delete,
+       .events.tz_disable      = tz_disable,
+       .events.tz_enable       = tz_enable,
+       .events.trip_high       = trip_high,
+       .events.trip_low        = trip_low,
+       .events.trip_add        = trip_add,
+       .events.trip_delete     = trip_delete,
+       .events.trip_change     = trip_change,
+       .events.cdev_add        = cdev_add,
+       .events.cdev_delete     = cdev_delete,
+       .events.cdev_update     = cdev_update,
+       .events.gov_change      = gov_change
+};
+
+static int thermal_event(__maybe_unused int fd, __maybe_unused void *arg)
+{
+       struct thermal_data *td = arg;
+
+       return thermal_events_handle(td->th, td);
+}
+
+static void usage(const char *cmd)
+{
+       printf("%s : A thermal monitoring engine based on notifications\n", cmd);
+       printf("Usage: %s [options]\n", cmd);
+       printf("\t-h, --help\t\tthis help\n");
+       printf("\t-d, --daemonize\n");
+       printf("\t-l <level>, --loglevel <level>\tlog level: ");
+       printf("DEBUG, INFO, NOTICE, WARN, ERROR\n");
+       printf("\t-s, --syslog\t\toutput to syslog\n");
+       printf("\n");
+       exit(0);
+}
+
+static int options_init(int argc, char *argv[], struct options *options)
+{
+       int opt;
+
+       struct option long_options[] = {
+               { "help",       no_argument, NULL, 'h' },
+               { "daemonize",  no_argument, NULL, 'd' },
+               { "syslog",     no_argument, NULL, 's' },
+               { "loglevel",   required_argument, NULL, 'l' },
+               { 0, 0, 0, 0 }
+       };
+
+       while (1) {
+
+               int optindex = 0;
+
+               opt = getopt_long(argc, argv, "l:dhs", long_options, &optindex);
+               if (opt == -1)
+                       break;
+
+               switch (opt) {
+               case 'l':
+                       options->loglevel = log_str2level(optarg);
+                       break;
+               case 'd':
+                       options->daemonize = 1;
+                       break;
+               case 's':
+                       options->logopt = TO_SYSLOG;
+                       break;
+               case 'h':
+                       usage(basename(argv[0]));
+                       break;
+               default: /* '?' */
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+enum {
+       THERMAL_ENGINE_SUCCESS = 0,
+       THERMAL_ENGINE_OPTION_ERROR,
+       THERMAL_ENGINE_DAEMON_ERROR,
+       THERMAL_ENGINE_LOG_ERROR,
+       THERMAL_ENGINE_THERMAL_ERROR,
+       THERMAL_ENGINE_MAINLOOP_ERROR,
+};
+
+int main(int argc, char *argv[])
+{
+       struct thermal_data td;
+       struct options options = {
+               .loglevel = LOG_INFO,
+               .logopt = TO_STDOUT,
+       };
+
+       if (options_init(argc, argv, &options)) {
+               ERROR("Usage: %s --help\n", argv[0]);
+               return THERMAL_ENGINE_OPTION_ERROR;
+       }
+
+       if (options.daemonize && daemon(0, 0)) {
+               ERROR("Failed to daemonize: %p\n");
+               return THERMAL_ENGINE_DAEMON_ERROR;
+       }
+
+       if (log_init(options.loglevel, basename(argv[0]), options.logopt)) {
+               ERROR("Failed to initialize logging facility\n");
+               return THERMAL_ENGINE_LOG_ERROR;
+       }
+
+       td.th = thermal_init(&ops);
+       if (!td.th) {
+               ERROR("Failed to initialize the thermal library\n");
+               return THERMAL_ENGINE_THERMAL_ERROR;
+       }
+
+       td.tz = thermal_zone_discover(td.th);
+       if (!td.tz) {
+               ERROR("No thermal zone available\n");
+               return THERMAL_ENGINE_THERMAL_ERROR;
+       }
+
+       for_each_thermal_zone(td.tz, show_tz, td.th);
+
+       if (mainloop_init()) {
+               ERROR("Failed to initialize the mainloop\n");
+               return THERMAL_ENGINE_MAINLOOP_ERROR;
+       }
+
+       if (mainloop_add(thermal_events_fd(td.th), thermal_event, &td)) {
+               ERROR("Failed to setup the mainloop\n");
+               return THERMAL_ENGINE_MAINLOOP_ERROR;
+       }
+
+       INFO("Waiting for thermal events ...\n");
+
+       if (mainloop(-1)) {
+               ERROR("Mainloop failed\n");
+               return THERMAL_ENGINE_MAINLOOP_ERROR;
+       }
+
+       return THERMAL_ENGINE_SUCCESS;
+}
diff --git a/tools/thermal/thermometer/Build b/tools/thermal/thermometer/Build
new file mode 100644 (file)
index 0000000..1b96c15
--- /dev/null
@@ -0,0 +1 @@
+thermometer-y += thermometer.o
diff --git a/tools/thermal/thermometer/Makefile b/tools/thermal/thermometer/Makefile
new file mode 100644 (file)
index 0000000..d8f8bc8
--- /dev/null
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for cgroup tools
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+# $(info Determined 'srctree' to be $(srctree))
+endif
+
+CFLAGS = -Wall -Wextra
+CFLAGS += -I$(srctree)/tools/thermal/lib
+
+LDFLAGS = -L$(srctree)/tools/thermal/lib
+LDFLAGS += -lthermal_tools
+LDFLAGS += -lconfig
+
+VERSION = 0.0.1
+TARGET=thermometer
+
+all: $(TARGET)
+%: %.c
+       $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS)
+
+clean:
+       $(RM) $(TARGET)
diff --git a/tools/thermal/thermometer/thermometer.8 b/tools/thermal/thermometer/thermometer.8
new file mode 100644 (file)
index 0000000..d090fbc
--- /dev/null
@@ -0,0 +1,92 @@
+.TH THERMOMETER 8
+# SPDX-License-Identifier: GPL-2.0
+.SH NAME
+\fBthermometer\fP - A thermal profiling tool
+
+.SH SYNOPSIS
+.ft B
+.B thermometer
+.RB [ options ]
+.RB [ command ]
+.br
+.SH DESCRIPTION
+\fBthermometer \fP captures the thermal zones temperature at a
+specified sampling period. It is optimized to reduce as much as
+possible the overhead while doing the temperature acquisition in order
+to prevent disrupting the running application we may want to profile.
+
+This low overhead also allows a high rate sampling for the temperature
+which could be necessary to spot overshots and undershots.
+
+If no configuration file is specified, then all the thermal zones will
+be monitored at 4Hz, so every 250ms. A configuration file specifies
+the thermal zone names and the desired sampling period. A thermal zone
+name can be a regular expression to specify a group of thermal zone.
+
+The sampling of the different thermal zones will be written into
+separate files with the thermal zone name. It is possible to specify a
+postfix to identify them for example for a specific scenario. The
+output directory can be specified in addition.
+
+Without any parameters, \fBthermometer \fP captures all the thermal
+zone temperatures every 250ms and write to the current directory the
+captured files postfixed with the current date.
+
+If a running \fBduration\fP is specified or a \fBcommand\fP, the
+capture ends at the end of the duration if the command did not
+finished before. The \fBduration\fP can be specified alone as well as
+the \fBcommand\fP. If none is specified, the capture will continue
+indefinitively until interrupted by \fBSIGINT\fP or \fBSIGQUIT\fP.
+.PP
+
+.SS Options
+.PP
+The \fB-h, --help\fP option shows a short usage help
+.PP
+The \fB-o <dir>, --output <dir>\fP option defines the output directory to put the
+sampling files
+.PP
+The \fB-c <config>, --config <config>\fP option specifies the configuration file to use
+.PP
+The \fB-d <seconds>, --duration <seconds>\fP option specifies the duration of the capture
+.PP
+The \fB-l <loglevel>, --loglevel <loglevel>\fP option sets the loglevel [DEBUG,INFO,NOTICE,WARN,ERROR]
+.PP
+The \fB-p <string>, --postfix <string>\fP option appends \fBstring\fP at the end of the capture filenames
+.PP
+The \fB-s, --syslog\fP option sets the output to syslog, default is \fBstdout\fP
+.PP
+The \fB-w, --overwrite\fP overwrites the output files if they exist
+.PP
+
+.PP
+
+.SS "Exit status:"
+.TP
+0
+if OK,
+.TP
+1
+Error with the options specified as parameters
+.TP
+2
+Error when configuring the logging facility
+.TP
+3
+Error when configuring the time
+.TP
+4
+Error in the initialization routine
+.TP
+5
+Error during the runtime
+
+.SH Capture file format
+
+Every file contains two columns. The first one is the uptime timestamp
+in order to find a point in time since the system started up if there
+is any thermal event. The second one is the temperature in milli
+degree. The first line contains the label of each column.
+
+.SH AUTHOR
+Daniel Lezcano <daniel.lezcano@kernel.org>
diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c
new file mode 100644 (file)
index 0000000..1a87a0a
--- /dev/null
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <regex.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/signalfd.h>
+#include <sys/timerfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/thermal.h>
+
+#include <libconfig.h>
+#include "thermal-tools.h"
+
+#define CLASS_THERMAL "/sys/class/thermal"
+
+enum {
+       THERMOMETER_SUCCESS = 0,
+       THERMOMETER_OPTION_ERROR,
+       THERMOMETER_LOG_ERROR,
+       THERMOMETER_CONFIG_ERROR,
+       THERMOMETER_TIME_ERROR,
+       THERMOMETER_INIT_ERROR,
+       THERMOMETER_RUNTIME_ERROR
+};
+
+struct options {
+       int loglvl;
+       int logopt;
+       int overwrite;
+       int duration;
+       const char *config;
+       char postfix[PATH_MAX];
+       char output[PATH_MAX];
+};
+
+struct tz_regex {
+       regex_t regex;
+       int polling;
+};
+
+struct configuration {
+       struct tz_regex *tz_regex;
+       int nr_tz_regex;
+
+};
+
+struct tz {
+       FILE *file_out;
+       int fd_temp;
+       int fd_timer;
+       int polling;
+       const char *name;
+};
+
+struct thermometer {
+       struct tz *tz;
+       int nr_tz;
+};
+
+static struct tz_regex *configuration_tz_match(const char *expr,
+                                              struct configuration *config)
+{
+       int i;
+
+       for (i = 0; i < config->nr_tz_regex; i++) {
+
+               if (!regexec(&config->tz_regex[i].regex, expr, 0, NULL, 0))
+                       return &config->tz_regex[i];
+       }
+
+       return NULL;
+}
+
+static int configuration_default_init(struct configuration *config)
+{
+       config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) *
+                                  (config->nr_tz_regex + 1));
+
+       if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, ".*",
+                   REG_NOSUB | REG_EXTENDED)) {
+               ERROR("Invalid regular expression\n");
+               return -1;
+       }
+
+       config->tz_regex[config->nr_tz_regex].polling = 250;
+       config->nr_tz_regex = 1;
+
+       return 0;
+}
+
+static int configuration_init(const char *path, struct configuration *config)
+{
+       config_t cfg;
+
+       config_setting_t *tz;
+       int i, length;
+
+       if (path && access(path, F_OK)) {
+               ERROR("'%s' is not accessible\n", path);
+               return -1;
+       }
+
+       if (!path && !config->nr_tz_regex) {
+               INFO("No thermal zones configured, using wildcard for all of them\n");
+               return configuration_default_init(config);
+       }
+
+       config_init(&cfg);
+
+       if (!config_read_file(&cfg, path)) {
+               ERROR("Failed to parse %s:%d - %s\n", config_error_file(&cfg),
+                     config_error_line(&cfg), config_error_text(&cfg));
+
+               return -1;
+       }
+
+       tz = config_lookup(&cfg, "thermal-zones");
+       if (!tz) {
+               ERROR("No thermal zone configured to be monitored\n");
+               return -1;
+       }
+
+       length = config_setting_length(tz);
+
+       INFO("Found %d thermal zone(s) regular expression\n", length);
+
+       for (i = 0; i < length; i++) {
+
+               config_setting_t *node;
+               const char *name;
+               int polling;
+
+               node = config_setting_get_elem(tz, i);
+               if (!node) {
+                       ERROR("Missing node name '%d'\n", i);
+                       return -1;
+               }
+
+               if (!config_setting_lookup_string(node, "name", &name)) {
+                       ERROR("Thermal zone name not found\n");
+                       return -1;
+               }
+
+               if (!config_setting_lookup_int(node, "polling", &polling)) {
+                       ERROR("Polling value not found");
+                       return -1;
+               }
+
+               config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) *
+                                       (config->nr_tz_regex + 1));
+
+               if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, name,
+                           REG_NOSUB | REG_EXTENDED)) {
+                       ERROR("Invalid regular expression '%s'\n", name);
+                       continue;
+               }
+
+               config->tz_regex[config->nr_tz_regex].polling = polling;
+               config->nr_tz_regex++;
+
+               INFO("Thermal zone regular expression '%s' with polling %d\n",
+                    name, polling);
+       }
+
+       return 0;
+}
+
+static void usage(const char *cmd)
+{
+       printf("%s Version: %s\n", cmd, VERSION);
+       printf("Usage: %s [options]\n", cmd);
+       printf("\t-h, --help\t\tthis help\n");
+       printf("\t-o, --output <dir>\toutput directory for temperature capture\n");
+       printf("\t-c, --config <file>\tconfiguration file\n");
+       printf("\t-d, --duration <seconds>\tcapture duration\n");
+       printf("\t-l, --loglevel <level>\tlog level: ");
+       printf("DEBUG, INFO, NOTICE, WARN, ERROR\n");
+       printf("\t-p, --postfix <string>\tpostfix to be happened at the end of the files\n");
+       printf("\t-s, --syslog\t\toutput to syslog\n");
+       printf("\t-w, --overwrite\t\toverwrite the temperature capture files if they exist\n");
+       printf("\n");
+       exit(0);
+}
+
+static int options_init(int argc, char *argv[], struct options *options)
+{
+       int opt;
+       time_t now = time(NULL);
+
+       struct option long_options[] = {
+               { "help",       no_argument, NULL, 'h' },
+               { "config",     required_argument, NULL, 'c' },
+               { "duration",   required_argument, NULL, 'd' },
+               { "loglevel",   required_argument, NULL, 'l' },
+               { "postfix",    required_argument, NULL, 'p' },
+               { "output",     required_argument, NULL, 'o' },
+               { "syslog",     required_argument, NULL, 's' },
+               { "overwrite",  no_argument, NULL, 'w' },
+               { 0, 0, 0, 0 }
+       };
+
+       strftime(options->postfix, sizeof(options->postfix),
+                "-%Y-%m-%d_%H:%M:%S", gmtime(&now));
+
+       while (1) {
+
+               int optindex = 0;
+
+               opt = getopt_long(argc, argv, "ho:c:d:l:p:sw", long_options, &optindex);
+               if (opt == -1)
+                       break;
+
+               switch (opt) {
+               case 'c':
+                       options->config = optarg;
+                       break;
+               case 'd':
+                       options->duration = atoi(optarg) * 1000;
+                       break;
+               case 'l':
+                       options->loglvl = log_str2level(optarg);
+                       break;
+               case 'h':
+                       usage(basename(argv[0]));
+                       break;
+               case 'p':
+                       strcpy(options->postfix, optarg);
+                       break;
+               case 'o':
+                       strcpy(options->output, optarg);
+                       break;
+               case 's':
+                       options->logopt = TO_SYSLOG;
+                       break;
+               case 'w':
+                       options->overwrite = 1;
+                       break;
+               default: /* '?' */
+                       ERROR("Usage: %s --help\n", argv[0]);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int thermometer_add_tz(const char *path, const char *name, int polling,
+                             struct thermometer *thermometer)
+{
+       int fd;
+       char tz_path[PATH_MAX];
+
+       sprintf(tz_path, CLASS_THERMAL"/%s/temp", path);
+
+       fd = open(tz_path, O_RDONLY);
+       if (fd < 0) {
+               ERROR("Failed to open '%s': %m\n", tz_path);
+               return -1;
+       }
+
+       thermometer->tz = realloc(thermometer->tz,
+                                 sizeof(*thermometer->tz) * (thermometer->nr_tz + 1));
+       if (!thermometer->tz) {
+               ERROR("Failed to allocate thermometer->tz\n");
+               return -1;
+       }
+
+       thermometer->tz[thermometer->nr_tz].fd_temp = fd;
+       thermometer->tz[thermometer->nr_tz].name = strdup(name);
+       thermometer->tz[thermometer->nr_tz].polling = polling;
+       thermometer->nr_tz++;
+
+       INFO("Added thermal zone '%s->%s (polling:%d)'\n", path, name, polling);
+
+       return 0;
+}
+
+static int thermometer_init(struct configuration *config,
+                           struct thermometer *thermometer)
+{
+       DIR *dir;
+       struct dirent *dirent;
+       struct tz_regex *tz_regex;
+       const char *tz_dirname = "thermal_zone";
+
+       if (mainloop_init()) {
+               ERROR("Failed to start mainloop\n");
+               return -1;
+       }
+
+       dir = opendir(CLASS_THERMAL);
+       if (!dir) {
+               ERROR("failed to open '%s'\n", CLASS_THERMAL);
+               return -1;
+       }
+
+       while ((dirent = readdir(dir))) {
+               char tz_type[THERMAL_NAME_LENGTH];
+               char tz_path[PATH_MAX];
+               FILE *tz_file;
+
+               if (strncmp(dirent->d_name, tz_dirname, strlen(tz_dirname)))
+                       continue;
+
+               sprintf(tz_path, CLASS_THERMAL"/%s/type", dirent->d_name);
+
+               tz_file = fopen(tz_path, "r");
+               if (!tz_file) {
+                       ERROR("Failed to open '%s': %m", tz_path);
+                       continue;
+               }
+
+               fscanf(tz_file, "%s", tz_type);
+
+               fclose(tz_file);
+
+               tz_regex = configuration_tz_match(tz_type, config);
+               if (!tz_regex)
+                       continue;
+
+               if (thermometer_add_tz(dirent->d_name, tz_type,
+                                      tz_regex->polling, thermometer))
+                       continue;
+       }
+
+       closedir(dir);
+
+       return 0;
+}
+
+static int timer_temperature_callback(int fd, void *arg)
+{
+       struct tz *tz = arg;
+       char buf[16] = { 0 };
+
+       pread(tz->fd_temp, buf, sizeof(buf), 0);
+
+       fprintf(tz->file_out, "%ld %s", getuptimeofday_ms(), buf);
+
+       read(fd, buf, sizeof(buf));
+
+       return 0;
+}
+
+static int thermometer_start(struct thermometer *thermometer,
+                            struct options *options)
+{
+       struct itimerspec timer_it = { 0 };
+       char *path;
+       FILE *f;
+       int i;
+
+       INFO("Capturing %d thermal zone(s) temperature...\n", thermometer->nr_tz);
+
+       if (access(options->output, F_OK) && mkdir(options->output, 0700)) {
+               ERROR("Failed to create directory '%s'\n", options->output);
+               return -1;
+       }
+
+       for (i = 0; i < thermometer->nr_tz; i++) {
+
+               asprintf(&path, "%s/%s%s", options->output,
+                        thermometer->tz[i].name, options->postfix);
+
+               if (!options->overwrite && !access(path, F_OK)) {
+                       ERROR("'%s' already exists\n", path);
+                       return -1;
+               }
+
+               f = fopen(path, "w");
+               if (!f) {
+                       ERROR("Failed to create '%s':%m\n", path);
+                       return -1;
+               }
+
+               fprintf(f, "timestamp(ms) %s(°mC)\n", thermometer->tz[i].name);
+
+               thermometer->tz[i].file_out = f;
+
+               DEBUG("Created '%s' file for thermal zone '%s'\n", path, thermometer->tz[i].name);
+
+               /*
+                * Create polling timer
+                */
+               thermometer->tz[i].fd_timer = timerfd_create(CLOCK_MONOTONIC, 0);
+               if (thermometer->tz[i].fd_timer < 0) {
+                       ERROR("Failed to create timer for '%s': %m\n",
+                             thermometer->tz[i].name);
+                       return -1;
+               }
+
+               DEBUG("Watching '%s' every %d ms\n",
+                     thermometer->tz[i].name, thermometer->tz[i].polling);
+
+               timer_it.it_interval = timer_it.it_value =
+                       msec_to_timespec(thermometer->tz[i].polling);
+
+               if (timerfd_settime(thermometer->tz[i].fd_timer, 0,
+                                   &timer_it, NULL) < 0)
+                       return -1;
+
+               if (mainloop_add(thermometer->tz[i].fd_timer,
+                                timer_temperature_callback,
+                                &thermometer->tz[i]))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int thermometer_execute(int argc, char *argv[], char *const envp[], pid_t *pid)
+{
+       if (!argc)
+               return 0;
+
+       *pid = fork();
+       if (*pid < 0) {
+               ERROR("Failed to fork process: %m");
+               return -1;
+       }
+
+       if (!(*pid)) {
+               execvpe(argv[0], argv, envp);
+               exit(1);
+       }
+
+       return 0;
+}
+
+static int kill_process(__maybe_unused int fd, void *arg)
+{
+       pid_t pid = *(pid_t *)arg;
+
+       if (kill(pid, SIGTERM))
+               ERROR("Failed to send SIGTERM signal to '%d': %p\n", pid);
+       else if (waitpid(pid, NULL, 0))
+               ERROR("Failed to wait pid '%d': %p\n", pid);
+
+       mainloop_exit();
+
+       return 0;
+}
+
+static int exit_mainloop(__maybe_unused int fd, __maybe_unused void *arg)
+{
+       mainloop_exit();
+       return 0;
+}
+
+static int thermometer_wait(struct options *options, pid_t pid)
+{
+       int fd;
+       sigset_t mask;
+
+       /*
+        * If there is a duration specified, we will exit the mainloop
+        * and gracefully close all the files which will flush the
+        * file system cache
+        */
+       if (options->duration) {
+               struct itimerspec timer_it = { 0 };
+
+               timer_it.it_value = msec_to_timespec(options->duration);
+
+               fd = timerfd_create(CLOCK_MONOTONIC, 0);
+               if (fd < 0) {
+                       ERROR("Failed to create duration timer: %m\n");
+                       return -1;
+               }
+
+               if (timerfd_settime(fd, 0, &timer_it, NULL)) {
+                       ERROR("Failed to set timer time: %m\n");
+                       return -1;
+               }
+
+               if (mainloop_add(fd, pid < 0 ? exit_mainloop : kill_process, &pid)) {
+                       ERROR("Failed to set timer exit mainloop callback\n");
+                       return -1;
+               }
+       }
+
+       /*
+        * We want to catch any keyboard interrupt, as well as child
+        * signals if any in order to exit properly
+        */
+       sigemptyset(&mask);
+       sigaddset(&mask, SIGINT);
+       sigaddset(&mask, SIGQUIT);
+       sigaddset(&mask, SIGCHLD);
+
+       if (sigprocmask(SIG_BLOCK, &mask, NULL)) {
+               ERROR("Failed to set sigprocmask: %m\n");
+               return -1;
+       }
+
+       fd = signalfd(-1, &mask, 0);
+       if (fd < 0) {
+               ERROR("Failed to set the signalfd: %m\n");
+               return -1;
+       }
+
+       if (mainloop_add(fd, exit_mainloop, NULL)) {
+               ERROR("Failed to set timer exit mainloop callback\n");
+               return -1;
+       }
+
+       return mainloop(-1);
+}
+
+static int thermometer_stop(struct thermometer *thermometer)
+{
+       int i;
+
+       INFO("Closing/flushing output files\n");
+
+       for (i = 0; i < thermometer->nr_tz; i++)
+               fclose(thermometer->tz[i].file_out);
+
+       return 0;
+}
+
+int main(int argc, char *argv[], char *const envp[])
+{
+       struct options options = {
+               .loglvl = LOG_DEBUG,
+               .logopt = TO_STDOUT,
+               .output = ".",
+       };
+       struct configuration config = { 0 };
+       struct thermometer thermometer = { 0 };
+
+       pid_t pid = -1;
+
+       if (options_init(argc, argv, &options))
+               return THERMOMETER_OPTION_ERROR;
+
+       if (log_init(options.loglvl, argv[0], options.logopt))
+               return THERMOMETER_LOG_ERROR;
+
+       if (configuration_init(options.config, &config))
+               return THERMOMETER_CONFIG_ERROR;
+
+       if (uptimeofday_init())
+               return THERMOMETER_TIME_ERROR;
+
+       if (thermometer_init(&config, &thermometer))
+               return THERMOMETER_INIT_ERROR;
+
+       if (thermometer_start(&thermometer, &options))
+               return THERMOMETER_RUNTIME_ERROR;
+
+       if (thermometer_execute(argc - optind, &argv[optind], envp, &pid))
+               return THERMOMETER_RUNTIME_ERROR;
+
+       if (thermometer_wait(&options, pid))
+               return THERMOMETER_RUNTIME_ERROR;
+
+       if (thermometer_stop(&thermometer))
+               return THERMOMETER_RUNTIME_ERROR;
+
+       return THERMOMETER_SUCCESS;
+}
diff --git a/tools/thermal/thermometer/thermometer.conf b/tools/thermal/thermometer/thermometer.conf
new file mode 100644 (file)
index 0000000..02c6dab
--- /dev/null
@@ -0,0 +1,5 @@
+
+thermal-zones = (
+             { name = "cpu[0-9]-thermal";
+               polling = 100; }
+      )
index 59b1dd4a549ee041767628b4a3c1f2d7992e7abb..2a3ed401ce4653377d55de9b02ccfa9e6086f3f3 100644 (file)
@@ -77,7 +77,8 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 
        idx = srcu_read_lock(&kvm->irq_srcu);
 
-       list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
+       list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
+           srcu_read_lock_held(&kvm->irq_srcu))
                eventfd_signal(irqfd->resamplefd, 1);
 
        srcu_read_unlock(&kvm->irq_srcu, idx);
index 6d971fb1b08d8ed196e139e54004b0ac9f5fab0c..5ab12214e18dd86869f66b1c5af89f3d3891fa5d 100644 (file)
@@ -1560,7 +1560,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
        r = kvm_arch_prepare_memory_region(kvm, old, new, change);
 
        /* Free the bitmap on failure if it was allocated above. */
-       if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap)
+       if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
                kvm_destroy_dirty_bitmap(new);
 
        return r;